Compare commits

..

176 Commits

Author SHA1 Message Date
3007e22a7d Add info to CHANGELOG
Update submodules
2024-02-24 21:33:28 +01:00
02e456befb Complete predict & predict_proba in ensemble 2024-02-24 18:36:09 +01:00
8477698d8d Complete predict & predict_proba with voting & probabilities 2024-02-23 23:11:14 +01:00
52abd2d670 Implement the proba branch and begin with the voting one 2024-02-23 20:36:11 +01:00
3116eaa763 Begin testing ensemble predict_proba 2024-02-22 18:44:40 +01:00
443e5cc882 Implement classifier.predict_proba & test 2024-02-22 11:45:40 +01:00
e1c4221c11 Add predict_voting and predict_prob to ensemble 2024-02-20 10:58:21 +01:00
a63a35df3f Fix epsilont early stopping in BoostAODE 2024-02-20 10:11:22 +01:00
c7555dac3f Add comments to BoostAODE algorithm 2024-02-19 22:58:15 +01:00
f3b8150e2c Add notes to Classifier & Changelog 2024-02-12 10:58:20 +01:00
03f8b8653b Add getNotes test 2024-02-09 12:06:19 +01:00
2163e95c4a add getNotes method 2024-02-09 10:57:19 +01:00
b33da34655 Add notes to Classifier & use them in BoostAODE 2024-02-08 18:01:09 +01:00
e17aee7bdb Remove argparse module 2024-01-09 18:02:17 +01:00
37c31ee4c2 Update libraries 2024-01-08 17:45:11 +01:00
80afdc06f7 Remove unneeded argparse module 2024-01-08 00:55:16 +01:00
Ricardo Montañana Gómez
666782217e Merge pull request #1 from rmontanana/library
Remove other projects' sources
2024-01-07 20:01:37 +01:00
55af0714cd Remove other projects' sources 2024-01-07 19:58:22 +01:00
6ef5ca541a Add app version to command line utils 2024-01-06 22:38:34 +01:00
4364317411 Merge pull request 'Refactor mpi grid search process using the producer consumer pattern' (#15) from producer_consumer into main
Reviewed-on: #15
2024-01-04 15:24:48 +00:00
65a96851ef Check min number of nested folds 2024-01-04 11:01:59 +01:00
722da7f781 Keep only mpi b_grid compute 2024-01-04 01:21:56 +01:00
b1833a5feb Add reset color to final progress bar 2024-01-03 22:45:16 +01:00
41a0bd4ddd fix dataset name mistakes 2024-01-03 17:15:57 +01:00
9ab4fc7d76 Fix some mistakes in methods 2024-01-03 11:53:46 +01:00
beadb7465f Complete first approach 2023-12-31 12:02:13 +01:00
652e5f623f Add todo comments 2023-12-28 23:32:24 +01:00
b7fef9a99d Remove kk file 2023-12-28 23:24:59 +01:00
343269d48c Fix syntax errors 2023-12-28 23:21:50 +01:00
21c4c6df51 Fix first mistakes in structure 2023-12-25 19:33:52 +01:00
702f086706 Update miniconda instructions 2023-12-23 19:54:00 +01:00
981bc8f98b Fix install message in readme 2023-12-23 01:00:55 +01:00
e0b7b2d316 Set structure & protocol of producer-consumer 2023-12-22 12:47:13 +01:00
9b9e91e856 Merge pull request 'mpi_grid' (#14) from mpi_grid into main
Reviewed-on: #14
2023-12-18 09:05:55 +00:00
18e8e84284 Add openmpi instructions for Oracle Linux 2023-12-17 12:19:50 +01:00
7de11b0e6d Fix format of duration 2023-12-17 01:45:04 +01:00
9b8db37a4b Fix duration of task not set 2023-12-16 19:31:45 +01:00
49b26bd04b fix duration output 2023-12-16 12:53:25 +01:00
b5b5b48864 Update grid progress bar output 2023-12-15 18:09:17 +01:00
19586a3a5a Fix pesky error allocating memory in workers 2023-12-15 01:54:13 +01:00
ffe6d37436 Add messages to control trace 2023-12-14 21:06:43 +01:00
b73f4be146 First try with complete algorithm 2023-12-14 15:55:08 +01:00
dbf2f35502 First compiling version 2023-12-12 18:57:57 +01:00
db9e80a70e Create build tasks 2023-12-12 12:15:22 +01:00
40ae4ad7f9 Include mpi in CMakeLists 2023-12-11 09:06:05 +01:00
234342f2de Add mpi parameter to b_grid 2023-12-10 22:33:17 +01:00
aa0936abd1 Add --exclude parameter to b_grid to exclude datasets 2023-12-08 12:09:08 +01:00
f0d6f0cc38 Fix sample building 2023-12-04 19:12:44 +01:00
cc316bb8d3 Add colors to results of gridsearch 2023-12-04 17:34:00 +01:00
0723564e66 Fix some output in gridsearch 2023-12-03 17:55:44 +01:00
2e95e8999d Complete nested gridsearch 2023-12-03 12:37:25 +01:00
fb9b395748 Begin output nested grid 2023-12-02 13:19:12 +01:00
03e4437fea refactor gridsearch to have only one go method 2023-12-02 10:59:05 +01:00
33cd32c639 Add header to grid output and report 2023-12-01 10:30:53 +01:00
c460ef46ed Refactor gridsearch method 2023-11-30 11:01:37 +01:00
dee9c674da Refactor grid input hyperparameter file 2023-11-29 18:24:34 +01:00
e3f6dc1e0b Fix tolerance hyperp error & gridsearch 2023-11-29 12:33:50 +01:00
460d20a402 Add reports to gridsearch 2023-11-29 00:26:48 +01:00
8dbbb65a2f Add only parameter to gridsearch 2023-11-28 10:08:40 +01:00
d06bf187b2 Implement Random Forest nodes/leaves/depth 2023-11-28 00:35:38 +01:00
4addaefb47 Implement sklearn version in PyWrap 2023-11-27 22:34:34 +01:00
82964190f6 Add nodes/leaves/depth to STree & ODTE 2023-11-27 10:57:57 +01:00
4fefe9a1d2 Add grid input info to grid output 2023-11-26 16:07:32 +01:00
7c12dd25e5 Fix upper case typo 2023-11-26 10:55:32 +01:00
c713c0b1df Add continue from parameter to gridsearch 2023-11-26 10:36:09 +01:00
64069a6cb7 Adapt b_main to the new hyperparam file format 2023-11-25 16:52:25 +01:00
ba2a3f9523 Merge pull request 'gridsearch' (#13) from gridsearch into main
Reviewed-on: #13
2023-11-25 11:16:13 +00:00
f94e2d6a27 Add quiet parameter 2023-11-24 21:16:20 +01:00
2121ba9b98 Refactor input grid parameters to json file 2023-11-24 09:57:29 +01:00
8b7b59d42b Complete first step 2023-11-23 12:59:21 +01:00
bbe5302ab1 Add info to output 2023-11-22 16:38:50 +01:00
c2eb727fc7 Complete output interface of gridsearch 2023-11-22 16:30:04 +01:00
fb347ed5b9 Begin gridsearch implementation 2023-11-22 12:22:30 +01:00
b657762c0c Generate combinations sample 2023-11-22 00:18:24 +01:00
495d8a8528 Begin implementing grid combinations 2023-11-21 13:11:14 +01:00
4628e48d3c Build gridsearch structure 2023-11-20 23:32:34 +01:00
5876be4b24 Add more install instructions of Boost to README 2023-11-20 20:39:22 +01:00
dc3400197f Add coment todo impelemt number of nodes 2023-11-20 01:14:13 +01:00
26d3a57782 Add info to invalid hyperparameter exception 2023-11-19 23:02:28 +01:00
4f3a04058f Refactor Hyperparameters management 2023-11-19 22:36:27 +01:00
89c4613591 Implement hyperparameters with json file 2023-11-18 11:56:10 +01:00
28f3d87e32 Add Python Classifiers
Add STree, Odte, SVC & RandomForest Classifiers
Remove using namespace ... in project
2023-11-17 11:11:05 +01:00
e8d2c9fc0b Set intolerant convergence 2023-11-17 10:26:25 +01:00
d3cb580387 Remove n_jobs from STree 2023-11-17 10:10:31 +01:00
f088df14fd Restore the Creation model position in experiment 2023-11-17 01:10:46 +01:00
e2249eace7 Disable Warning messages in python clfs
Disable removing Python env
2023-11-16 22:38:46 +01:00
64f5a7f14a Fix header in example 2023-11-16 17:03:40 +01:00
408db2aad5 Mark override fit funtcion 2023-11-14 18:59:41 +01:00
e03efb5f63 set tolerance=0 if feature selection in BoostAODE 2023-11-14 10:12:02 +01:00
f617886133 Add new models to example 2023-11-14 09:12:25 +01:00
69ad660040 Refactor version method in PyClassifier 2023-11-13 13:59:06 +01:00
431b3a3aa5 Fit PyWrap into BayesNet 2023-11-13 11:13:32 +01:00
6a23e2cc26 Add CMakelist integration 2023-11-12 22:14:29 +01:00
f6e00530be Add Pywrap sources 2023-11-12 21:43:07 +01:00
f9258e43b9 Remove using namespace from Library 2023-11-08 18:45:35 +01:00
92820555da Simple fix 2023-10-28 10:56:47 +02:00
5a3af51826 Activate best score in odte 2023-10-25 10:23:42 +02:00
a8f9800631 Fix mistake when no results in manage 2023-10-24 19:44:23 +02:00
84cec0c1e0 Add results files affected in best results excel 2023-10-24 16:18:52 +02:00
130139f644 Update formulas to use letters in ranges in excel 2023-10-24 13:06:31 +02:00
651f84b562 Fix mistake in conditional format in bestresults 2023-10-24 11:18:19 +02:00
553ab0fa22 Add conditional format to BestResults Excel 2023-10-24 10:56:41 +02:00
4975feabff Fix mistake in node count 2023-10-23 22:46:10 +02:00
32293af69f Fix header in manage 2023-10-23 17:04:59 +02:00
858664be2d Add total number of results in manage 2023-10-23 16:22:15 +02:00
1f705f6018 Refactor BestScore and add experiment to .env 2023-10-23 16:12:52 +02:00
7bcd2eed06 Add variable width of dataset name in reports 2023-10-22 22:58:52 +02:00
833acefbb3 Fix index limits mistake in manage 2023-10-22 20:21:50 +02:00
26b649ebae Refactor ManageResults and CommandParser 2023-10-22 20:03:34 +02:00
080eddf9cd Fix hyperparameters output in b_best 2023-10-20 22:52:48 +02:00
04e754b2f5 Adjust filename and hyperparameters in reports 2023-10-20 11:12:46 +02:00
38423048bd Add excel to best report of model 2023-10-19 18:12:55 +02:00
64fc97b892 Rename utilities sources to match final names 2023-10-19 09:57:04 +02:00
2c2159f192 Add quiet mode to b_main
Reduce output when --quiet is set, not showing fold info
2023-10-17 21:51:53 +02:00
6765552a7c Update submodule versions 2023-10-16 19:21:57 +02:00
f72aa5b9a6 Merge pull request 'Create Boost_CFS' (#11) from Boost_CFS into main
Add hyper parameter to BoostAODE. This hyper parameter decides if we select features with cfs/fcbf/iwss before start building models and build a Spode with the selected features.
The hyperparameter is select_features
2023-10-15 09:22:14 +00:00
fa7fe081ad Fix xlsx library finding 2023-10-15 11:19:58 +02:00
660e783517 Update validation for feature selection 2023-10-14 13:32:09 +02:00
b35532dd9e Implement IWSS and FCBF too for BoostAODE 2023-10-14 13:12:04 +02:00
6ef49385ea Remove unneeded method declaration FeatureSelect 2023-10-14 11:30:32 +02:00
6d5a25cdc8 Refactor CFS class creating abstract base class 2023-10-14 11:27:46 +02:00
d00b08cbe8 Fix Header for Linux 2023-10-13 14:26:47 +02:00
977ff6fddb Update CMakeLists for Linux 2023-10-13 14:01:52 +02:00
54b8939f35 Prepare BoostAODE first try 2023-10-13 13:46:22 +02:00
5022a4dc90 Complete CFS tested with Python mufs 2023-10-13 12:29:25 +02:00
40d1dad5d8 Begin CFS implementation 2023-10-11 21:17:26 +02:00
47e2b138c5 Complete first working cfs 2023-10-11 11:33:29 +02:00
e7ded68267 First cfs working version 2023-10-10 23:00:38 +02:00
ca833a34f5 try openssl sha256 2023-10-10 18:16:43 +02:00
df9b4c48d2 Begin CFS initialization 2023-10-10 13:39:11 +02:00
f288bbd6fa Begin adding cfs to BoostAODE 2023-10-10 11:52:39 +02:00
7d8aca4f59 Add Locale shared config to reports 2023-10-09 19:41:29 +02:00
8fdad78a8c Continue Test Network 2023-10-09 11:25:30 +02:00
e3ae073333 Continue test Network 2023-10-08 15:54:58 +02:00
4b732e76c2 MST change unordered_set to list 2023-10-07 19:08:13 +02:00
fe5fead27e Begin Fix Test MST 2023-10-07 01:43:26 +02:00
8c3864f3c8 Complete Folding Test 2023-10-07 01:23:36 +02:00
1287160c47 Refactor makefile to use variables 2023-10-07 00:16:25 +02:00
2f58807322 Begin refactor CMakeLists debug/release paths 2023-10-06 19:32:29 +02:00
17e079edd5 Begin Test Folding 2023-10-06 17:08:54 +02:00
b9e0028e9d Refactor Makefile 2023-10-06 01:28:27 +02:00
e0d39fe631 Fix BayesMetrics Test 2023-10-06 01:14:55 +02:00
36b0277576 Add Maximum Spanning Tree test 2023-10-05 15:45:36 +02:00
da8d018ec4 Refactor Makefile 2023-10-05 11:45:00 +02:00
5f0676691c Add First BayesMetrics Tests 2023-10-05 01:14:16 +02:00
3448fb1299 Refactor Tests and add BayesMetrics test 2023-10-04 23:19:23 +02:00
5e938d5cca Add ranks sheet to excel best results 2023-10-04 16:26:57 +02:00
55e742438f Add constant references to Statistics 2023-10-04 13:40:45 +02:00
c4ae3fe429 Add Control model rank info to report 2023-10-04 12:42:35 +02:00
93e4ff94db Add significance level as parameter in best 2023-10-02 15:46:40 +02:00
57c27f739c Remove unused code in BestResults 2023-10-02 15:31:02 +02:00
a434d7f1ae Add a Linux config in launch.json 2023-09-30 18:44:21 +02:00
294666c516 Fix a Linux problem in Datasets 2023-09-30 18:43:47 +02:00
fd04e78ad9 Restore sample.cc 2023-09-29 18:50:25 +02:00
66ec1b343b Remove platformUtils and split Datasets & Dataset 2023-09-29 18:20:46 +02:00
bb423da42f Add csv and R_dat files to platform 2023-09-29 13:52:50 +02:00
db17c14042 Change names of executables to b_... 2023-09-29 09:17:50 +02:00
a4401cb78f Linux CMakeLists.txt adjustment 2023-09-29 00:30:47 +02:00
9d3d9cc6c6 Complete Excel output for bestResults with Friedman test 2023-09-28 18:52:37 +02:00
cfcf3c16df Add best results Excel 2023-09-28 17:12:04 +02:00
85202260f3 Separate specific Excel methods to ExcelFile 2023-09-28 13:07:11 +02:00
82acb3cab5 Enhance output of Best results reports 2023-09-28 12:08:56 +02:00
623ceed396 Merge pull request 'Add Friedman Test & post hoc tests to BestResults' (#10) from boost into main
Reviewed-on: #10
2023-09-28 07:44:55 +00:00
926de2bebd Add boost info to README 2023-09-28 09:44:33 +02:00
71704e3547 Enhance output info in Statistics 2023-09-28 01:27:18 +02:00
3b06534327 Remove duplicated code in BestResults 2023-09-28 00:59:34 +02:00
ac89a451e3 Duplicate statistics tests in class 2023-09-28 00:45:15 +02:00
00c6cf663b Fix order of output in posthoc 2023-09-27 19:11:47 +02:00
5043c12be8 Complete posthoc with Holm adjust 2023-09-27 18:34:16 +02:00
11320e2cc7 Complete friedman test as in exreport 2023-09-27 12:36:03 +02:00
ce66483b65 Update boost version requirement for Linux 2023-09-26 14:12:53 +02:00
cab8e14b2d Add friedman hyperparameter 2023-09-26 11:26:59 +02:00
f0d0abe891 Add boost library link to linux build 2023-09-26 01:07:50 +02:00
dcba146e12 Begin adding Friedman test to BestResults 2023-09-26 01:04:59 +02:00
3ea0285119 Fix ranks to match friedman test ranks 2023-09-25 18:38:12 +02:00
e3888e1503 Merge pull request 'bestResults' (#9) from bestResults into main
Reviewed-on: https://gitea.rmontanana.es:3000/rmontanana/BayesNet/pulls/9

Add best results management, build, report, build all & report all
2023-09-25 12:02:17 +00:00
135 changed files with 2503 additions and 6891 deletions

View File

@@ -1,31 +0,0 @@
compilation_database_dir: build
output_directory: puml
diagrams:
BayesNet:
type: class
glob:
- src/BayesNet/*.cc
- src/Platform/*.cc
using_namespace: bayesnet
include:
namespaces:
- bayesnet
- platform
plantuml:
after:
- "note left of {{ alias(\"MyProjectMain\") }}: Main class of myproject library."
sequence:
type: sequence
glob:
- src/Platform/main.cc
combine_free_functions_into_file_participants: true
using_namespace:
- std
- bayesnet
- platform
include:
paths:
- src/BayesNet
- src/Platform
start_from:
- function: main(int,const char **)

3
.gitignore vendored
View File

@@ -31,7 +31,8 @@
*.exe
*.out
*.app
build/
build/**
build_*/**
*.dSYM/**
cmake-build*/**
.idea

17
.gitmodules vendored
View File

@@ -1,15 +1,20 @@
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"]
path = lib/catch2
main = v2.x
update = merge
url = https://github.com/catchorg/Catch2.git
[submodule "lib/argparse"]
path = lib/argparse
url = https://github.com/p-ranav/argparse
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
[submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git
master = master
update = merge
[submodule "lib/folding"]
path = lib/folding
url = https://github.com/rmontanana/folding
main = main
update = merge

18
.vscode/c_cpp_properties.json vendored Normal file
View File

@@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "Mac",
"includePath": [
"${workspaceFolder}/**"
],
"defines": [],
"macFrameworkPath": [
"/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks"
],
"cStandard": "c17",
"cppStandard": "c++17",
"compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json"
}
],
"version": 4
}

75
.vscode/launch.json vendored
View File

@@ -5,7 +5,7 @@
"type": "lldb",
"request": "launch",
"name": "sample",
"program": "${workspaceFolder}/build/sample/BayesNetSample",
"program": "${workspaceFolder}/build_debug/sample/BayesNetSample",
"args": [
"-d",
"iris",
@@ -21,27 +21,58 @@
{
"type": "lldb",
"request": "launch",
"name": "experiment",
"program": "${workspaceFolder}/build/src/Platform/main",
"name": "experimentPy",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"BoostAODE",
"-p",
"/Users/rmontanana/Code/discretizbench/datasets",
"STree",
"--stratified",
"-d",
"mfeat-morphological",
"--discretize"
"iris",
//"--discretize"
// "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "gridsearch",
"program": "${workspaceFolder}/build_debug/src/Platform/b_grid",
"args": [
"-m",
"KDB",
"--discretize",
"--continue",
"glass",
"--only",
"--compute"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "experimentBayes",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"TAN",
"--stratified",
"--discretize",
"-d",
"iris",
"--hyperparameters",
"{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/home/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "best",
"program": "${workspaceFolder}/build/src/Platform/best",
"program": "${workspaceFolder}/build_debug/src/Platform/b_best",
"args": [
"-m",
"BoostAODE",
@@ -49,32 +80,44 @@
"accuracy",
"--build",
],
"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "manage",
"program": "${workspaceFolder}/build/src/Platform/manage",
"program": "${workspaceFolder}/build_debug/src/Platform/b_manage",
"args": [
"-n",
"20"
],
"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "list",
"program": "${workspaceFolder}/build/src/Platform/list",
"program": "${workspaceFolder}/build_debug/src/Platform/b_list",
"args": [],
"cwd": "/Users/rmontanana/Code/discretizbench",
//"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "test",
"program": "${workspaceFolder}/build_debug/tests/unit_tests_bayesnet",
"args": [
//"-c=\"Metrics Test\"",
// "-s",
],
"cwd": "${workspaceFolder}/build_debug/tests",
},
{
"name": "Build & debug active file",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/bayesnet",
"program": "${workspaceFolder}/build_debug/bayesnet",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",

41
CHANGELOG.md Normal file
View File

@@ -0,0 +1,41 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Voting / probability aggregation in Ensemble classes
- predict_proba method in Classifier
- predict_proba method in BoostAODE
- predict_voting parameter in BoostAODE constructor to use voting or probability to predict (default is voting)
- hyperparameter predict_voting to AODE, AODELd and BoostAODE (Ensemble child classes)
- tests to check predict & predict_proba coherence
## [1.0.2] - 2024-02-20
### Fixed
- Fix bug in BoostAODE: do not include the model if epsilon sub t is greater than 0.5
- Fix bug in BoostAODE: compare accuracy with previous accuracy instead of the first of the ensemble if convergence true
## [1.0.1] - 2024-02-12
### Added
- Notes in Classifier class
- BoostAODE: Add note with used features in initialization with feature selection
- BoostAODE: Add note with the number of models
- BoostAODE: Add note with the number of features used to create models if not all features are used
- Test version number in TestBayesModels
- Add tests with feature_select and notes on BoostAODE
### Fixed
- Network predict test
- Network predict_proba test
- Network score test

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20)
project(BayesNet
VERSION 0.2.0
VERSION 1.0.3
DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX
@@ -24,18 +24,18 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# Options
# -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# CMakes modules
# --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule)
if (CODE_COVERAGE)
enable_testing()
include(CodeCoverage)
@@ -52,25 +52,18 @@ endif (ENABLE_CLANG_TIDY)
# ---------------------------------------------
# include(FetchContent)
add_git_submodule("lib/mdlp")
add_git_submodule("lib/argparse")
add_git_submodule("lib/json")
find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib)
# Subdirectories
# --------------
add_subdirectory(config)
add_subdirectory(lib/Files)
add_subdirectory(src/BayesNet)
add_subdirectory(src/Platform)
add_subdirectory(sample)
add_subdirectory(src)
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.hpp)
file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cc ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cpp)
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform/*.cc ${BayesNet_SOURCE_DIR}/src/Platform/*.cpp)
file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/*.cc)
# Testing
# -------
if (ENABLE_TESTING)
MESSAGE("Testing enabled")
add_git_submodule("lib/catch2")

View File

@@ -1,6 +1,26 @@
SHELL := /bin/bash
.DEFAULT_GOAL := help
.PHONY: coverage setup help build test
.PHONY: coverage setup help buildr buildd test clean debug release
f_release = build_release
f_debug = build_debug
app_targets = BayesNet
test_targets = unit_tests_bayesnet
n_procs = -j 16
define ClearTests
@for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \
echo ">>> Cleaning $$t..." ; \
rm -f $(f_debug)/tests/$$t ; \
fi ; \
done
@nfiles="$(find . -name "*.gcda" -print0)" ; \
if test "${nfiles}" != "" ; then \
find . -name "*.gcda" -print0 | xargs -0 rm 2>/dev/null ;\
fi ;
endef
setup: ## Install dependencies for tests and coverage
@if [ "$(shell uname)" = "Darwin" ]; then \
@@ -11,63 +31,55 @@ setup: ## Install dependencies for tests and coverage
pip install gcovr; \
fi
dest ?= ../discretizbench
copy: ## Copy binary files to selected folder
@echo "Destination folder: $(dest)"
make build
@echo ">>> Copying files to $(dest)"
@cp build/src/Platform/main $(dest)
@cp build/src/Platform/list $(dest)
@cp build/src/Platform/manage $(dest)
@cp build/src/Platform/best $(dest)
@echo ">>> Done"
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
@echo ">>> Creating dependency graph diagram of the project...";
$(MAKE) debug
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
build: ## Build the main and BayesNetSample
cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32
buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) $(n_procs)
clean: ## Clean the debug info
@echo ">>> Cleaning Debug BayesNet ...";
find . -name "*.gcda" -print0 | xargs -0 rm
buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) $(n_procs)
clean: ## Clean the tests info
@echo ">>> Cleaning Debug BayesNet tests...";
$(call ClearTests)
@echo ">>> Done";
clang-uml: ## Create uml class and sequence diagrams
clang-uml -p --add-compile-flag -I /usr/lib/gcc/x86_64-redhat-linux/8/include/
debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet...";
@if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
cmake --build build -t main -t BayesNetSample -t manage -t list -t best -t unit_tests -j 32;
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@mkdir $(f_debug);
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
@echo ">>> Done";
release: ## Build a Release version of the project
@echo ">>> Building Release BayesNet...";
@if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32;
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release);
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
@echo ">>> Done";
test: ## Run tests
@echo "* Running tests...";
find . -name "*.gcda" -print0 | xargs -0 rm
@cd build; \
cmake --build . --target unit_tests ;
@cd build/tests; \
./unit_tests;
opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet & Platform tests...";
@$(MAKE) clean
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
@for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \
cd $(f_debug)/tests ; \
./$$t $(opt) ; \
fi ; \
done
@echo ">>> Done";
coverage: ## Run tests and generate coverage report (build/index.html)
@echo "*Building tests...";
find . -name "*.gcda" -print0 | xargs -0 rm
@cd build; \
cmake --build . --target unit_tests ;
@cd build/tests; \
./unit_tests;
gcovr ;
@echo ">>> Building tests with coverage..."
@$(MAKE) test
@gcovr $(f_debug)/tests
@echo ">>> Done";
help: ## Show help message
@IFS=$$'\n' ; \

View File

@@ -1,37 +1,22 @@
# BayesNet
Bayesian Network Classifier with libtorch from scratch
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
## 0. Setup
### libxlswriter
Before compiling BayesNet.
```bash
cd lib/libxlsxwriter
make
sudo make install
```
It has to be installed in /usr/local/lib otherwise CMakeLists.txt has to be modified accordingly
Environment variable has to be set:
```bash
export LD_LIBRARY_PATH=/usr/local/lib
```
Bayesian Network Classifiers using libtorch from scratch
### Release
```bash
make release
make buildr
```
### Debug & Tests
```bash
make debug
make test
make coverage
```
## 1. Introduction

View File

@@ -11,3 +11,4 @@ static constexpr std::string_view project_name = " @PROJECT_NAME@ ";
static constexpr std::string_view project_version = "@PROJECT_VERSION@";
static constexpr std::string_view project_description = "@PROJECT_DESCRIPTION@";
static constexpr std::string_view git_sha = "@GIT_SHA@";
static constexpr std::string_view data_path = "@BayesNet_SOURCE_DIR@/tests/data/";

View File

@@ -1,25 +0,0 @@
Type Si
Type Fe
Type RI
Type Na
Type Ba
Type Ca
Type Al
Type K
Type Mg
Fe RI
Fe Ba
Fe Ca
RI Na
RI Ba
RI Ca
RI Al
RI K
RI Mg
Ba Ca
Ba Al
Ca Al
Ca K
Ca Mg
Al K
K Mg

View File

@@ -1,645 +0,0 @@
class att215
class att25
class att131
class att95
class att122
class att17
class att28
class att5
class att121
class att214
class att197
class att116
class att182
class att60
class att168
class att178
class att206
class att89
class att77
class att209
class att73
class att126
class att16
class att74
class att27
class att61
class att20
class att101
class att85
class att76
class att137
class att211
class att143
class att14
class att40
class att210
class att155
class att170
class att160
class att23
class att162
class att203
class att164
class att107
class att62
class att42
class att71
class att128
class att138
class att83
class att171
class att92
class att163
class att49
class att161
class att158
class att176
class att11
class att145
class att4
class att172
class att196
class att58
class att68
class att169
class att80
class att32
class att175
class att87
class att88
class att159
class att18
class att52
class att98
class att136
class att150
class att156
class att110
class att100
class att63
class att148
class att90
class att167
class att35
class att205
class att51
class att21
class att142
class att46
class att134
class att39
class att102
class att208
class att130
class att149
class att96
class att75
class att118
class att78
class att213
class att112
class att38
class att174
class att189
class att70
class att179
class att59
class att79
class att15
class att47
class att124
class att34
class att54
class att191
class att86
class att56
class att151
class att66
class att173
class att44
class att198
class att139
class att216
class att129
class att152
class att69
class att81
class att50
class att153
class att41
class att204
class att188
class att26
class att13
class att117
class att114
class att10
class att64
class att200
class att9
class att3
class att119
class att45
class att104
class att140
class att30
class att183
class att146
class att141
class att202
class att194
class att24
class att147
class att8
class att212
class att123
class att166
class att187
class att127
class att190
class att105
class att106
class att184
class att82
class att2
class att135
class att154
class att111
class att115
class att99
class att22
class att84
class att207
class att94
class att177
class att103
class att93
class att201
class att43
class att36
class att12
class att125
class att165
class att180
class att195
class att157
class att48
class att6
class att113
class att193
class att91
class att72
class att31
class att132
class att33
class att57
class att144
class att192
class att185
class att37
class att53
class att120
class att186
class att199
class att65
class att108
class att133
class att29
class att19
class att7
class att97
class att67
class att55
class att1
class att109
class att181
att215 att25
att215 att131
att215 att95
att25 att131
att25 att121
att25 att73
att25 att61
att25 att85
att25 att169
att25 att13
att131 att95
att131 att122
att131 att17
att131 att28
att131 att121
att131 att214
att131 att116
att131 att126
att131 att143
att95 att122
att95 att17
att95 att28
att95 att5
att95 att214
att95 att116
att95 att60
att95 att143
att95 att155
att95 att71
att122 att182
att122 att170
att17 att5
att17 att197
att17 att89
att17 att77
att17 att161
att28 att206
att28 att16
att28 att76
att28 att172
att28 att124
att28 att64
att5 att197
att5 att89
att5 att209
att121 att73
att214 att178
att214 att58
att214 att142
att197 att209
att197 att101
att116 att182
att116 att60
att116 att168
att116 att178
att116 att206
att116 att126
att116 att16
att116 att27
att116 att20
att116 att211
att116 att164
att116 att128
att182 att27
att182 att14
att60 att168
att60 att156
att168 att156
att168 att96
att178 att20
att178 att58
att178 att142
att178 att130
att206 att74
att206 att170
att206 att158
att89 att77
att89 att137
att89 att149
att89 att173
att77 att137
att77 att161
att209 att101
att209 att41
att73 att61
att73 att157
att126 att162
att126 att138
att126 att150
att16 att74
att16 att76
att16 att40
att16 att4
att74 att14
att74 att62
att27 att171
att61 att85
att61 att169
att20 att211
att20 att210
att20 att164
att20 att176
att101 att41
att85 att13
att76 att40
att76 att160
att137 att149
att211 att210
att211 att162
att211 att171
att211 att163
att211 att175
att211 att79
att143 att155
att143 att23
att143 att71
att143 att83
att143 att11
att14 att98
att40 att160
att40 att4
att40 att196
att40 att52
att210 att42
att210 att114
att155 att23
att155 att203
att155 att107
att155 att11
att170 att158
att160 att52
att23 att203
att162 att138
att162 att18
att162 att150
att162 att90
att162 att174
att203 att107
att203 att49
att203 att59
att203 att191
att203 att119
att164 att62
att164 att42
att164 att128
att164 att92
att164 att163
att164 att176
att164 att145
att164 att68
att164 att80
att164 att98
att164 att110
att164 att205
att164 att21
att164 att213
att164 att112
att164 att38
att164 att56
att164 att44
att107 att59
att107 att47
att107 att191
att71 att83
att71 att167
att71 att35
att128 att92
att138 att18
att83 att167
att171 att87
att171 att159
att171 att63
att171 att51
att171 att39
att171 att75
att163 att49
att163 att175
att163 att87
att163 att79
att163 att151
att163 att139
att163 att187
att163 att91
att161 att173
att176 att145
att176 att172
att176 att68
att176 att80
att176 att32
att176 att110
att176 att205
att176 att21
att176 att134
att176 att56
att4 att196
att4 att88
att4 att136
att4 att100
att4 att148
att4 att208
att172 att112
att172 att184
att196 att88
att196 att136
att196 att100
att196 att208
att58 att46
att68 att32
att32 att200
att87 att159
att87 att63
att87 att75
att87 att15
att87 att99
att159 att195
att18 att90
att18 att102
att18 att78
att18 att198
att52 att124
att98 att86
att150 att174
att150 att66
att156 att96
att156 att216
att156 att204
att156 att24
att156 att84
att100 att148
att63 att51
att63 att3
att63 att183
att90 att102
att90 att78
att167 att35
att167 att179
att35 att179
att51 att39
att51 att3
att21 att134
att21 att213
att21 att38
att21 att189
att21 att129
att21 att81
att21 att117
att21 att9
att142 att46
att142 att130
att142 att118
att142 att10
att142 att202
att142 att190
att142 att106
att46 att70
att46 att34
att46 att166
att134 att2
att102 att54
att130 att118
att130 att10
att130 att202
att149 att125
att96 att216
att96 att24
att75 att15
att75 att99
att118 att70
att78 att198
att213 att189
att38 att50
att38 att26
att174 att54
att174 att66
att174 att30
att189 att86
att189 att129
att189 att69
att189 att81
att189 att153
att189 att117
att189 att9
att189 att45
att189 att105
att70 att34
att59 att47
att79 att151
att79 att139
att79 att187
att79 att127
att79 att103
att79 att43
att79 att91
att79 att19
att124 att64
att54 att114
att54 att30
att191 att119
att86 att194
att56 att44
att56 att152
att56 att50
att56 att188
att56 att26
att56 att104
att56 att140
att56 att146
att56 att194
att56 att8
att56 att2
att56 att133
att56 att1
att173 att125
att173 att113
att44 att152
att44 att188
att44 att200
att44 att212
att44 att1
att139 att103
att139 att43
att139 att31
att139 att199
att139 att7
att216 att204
att216 att36
att216 att12
att216 att180
att216 att108
att129 att69
att152 att140
att69 att153
att81 att45
att153 att141
att41 att53
att204 att12
att13 att157
att114 att6
att114 att186
att10 att190
att64 att184
att200 att104
att9 att146
att9 att141
att9 att177
att9 att37
att9 att133
att9 att109
att9 att181
att3 att183
att3 att147
att3 att123
att3 att135
att3 att111
att45 att105
att45 att177
att45 att93
att45 att201
att45 att193
att45 att37
att45 att97
att140 att8
att30 att6
att183 att147
att183 att123
att202 att166
att202 att106
att202 att82
att24 att84
att24 att36
att147 att135
att8 att212
att166 att82
att187 att127
att187 att115
att127 att115
att105 att93
att106 att154
att82 att154
att82 att22
att135 att111
att135 att207
att154 att22
att154 att94
att111 att207
att22 att94
att84 att48
att177 att165
att103 att195
att103 att109
att93 att201
att93 att165
att93 att193
att93 att33
att201 att33
att201 att57
att36 att180
att36 att72
att36 att132
att36 att144
att125 att113
att125 att185
att125 att65
att125 att29
att180 att48
att180 att72
att180 att192
att180 att108
att6 att186
att113 att185
att113 att53
att193 att97
att91 att31
att91 att19
att72 att132
att72 att192
att31 att199
att31 att67
att132 att144
att132 att120
att33 att57
att144 att120
att185 att65
att199 att7
att199 att67
att199 att55
att65 att29
att67 att55
att109 att181

View File

@@ -1,859 +0,0 @@
class att215
class att25
class att131
class att95
class att122
class att17
class att28
class att5
class att121
class att214
class att197
class att116
class att182
class att60
class att168
class att178
class att206
class att89
class att77
class att209
class att73
class att126
class att16
class att74
class att27
class att61
class att20
class att101
class att85
class att76
class att137
class att211
class att143
class att14
class att40
class att210
class att155
class att170
class att160
class att23
class att162
class att203
class att164
class att107
class att62
class att42
class att71
class att128
class att138
class att83
class att171
class att92
class att163
class att49
class att161
class att158
class att176
class att11
class att145
class att4
class att172
class att196
class att58
class att68
class att169
class att80
class att32
class att175
class att87
class att88
class att159
class att18
class att52
class att98
class att136
class att150
class att156
class att110
class att100
class att63
class att148
class att90
class att167
class att35
class att205
class att51
class att21
class att142
class att46
class att134
class att39
class att102
class att208
class att130
class att149
class att96
class att75
class att118
class att78
class att213
class att112
class att38
class att174
class att189
class att70
class att179
class att59
class att79
class att15
class att47
class att124
class att34
class att54
class att191
class att86
class att56
class att151
class att66
class att173
class att44
class att198
class att139
class att216
class att129
class att152
class att69
class att81
class att50
class att153
class att41
class att204
class att188
class att26
class att13
class att117
class att114
class att10
class att64
class att200
class att9
class att3
class att119
class att45
class att104
class att140
class att30
class att183
class att146
class att141
class att202
class att194
class att24
class att147
class att8
class att212
class att123
class att166
class att187
class att127
class att190
class att105
class att106
class att184
class att82
class att2
class att135
class att154
class att111
class att115
class att99
class att22
class att84
class att207
class att94
class att177
class att103
class att93
class att201
class att43
class att36
class att12
class att125
class att165
class att180
class att195
class att157
class att48
class att6
class att113
class att193
class att91
class att72
class att31
class att132
class att33
class att57
class att144
class att192
class att185
class att37
class att53
class att120
class att186
class att199
class att65
class att108
class att133
class att29
class att19
class att7
class att97
class att67
class att55
class att1
class att109
class att181
att215 att25
att215 att131
att215 att95
att215 att17
att215 att214
att215 att143
att25 att131
att25 att95
att25 att122
att25 att121
att25 att73
att25 att61
att25 att85
att25 att169
att25 att13
att25 att157
att131 att95
att131 att122
att131 att17
att131 att28
att131 att5
att131 att121
att131 att214
att131 att116
att131 att182
att131 att60
att131 att126
att131 att16
att131 att27
att131 att20
att131 att143
att131 att155
att95 att122
att95 att17
att95 att28
att95 att5
att95 att121
att95 att214
att95 att197
att95 att116
att95 att60
att95 att168
att95 att178
att95 att143
att95 att155
att95 att23
att95 att71
att95 att167
att122 att28
att122 att182
att122 att170
att17 att5
att17 att197
att17 att89
att17 att77
att17 att209
att17 att137
att17 att161
att17 att41
att28 att206
att28 att16
att28 att76
att28 att40
att28 att210
att28 att160
att28 att172
att28 att124
att28 att64
att5 att197
att5 att89
att5 att77
att5 att209
att5 att101
att121 att73
att121 att61
att214 att116
att214 att178
att214 att206
att214 att58
att214 att142
att214 att46
att197 att89
att197 att209
att197 att101
att116 att182
att116 att60
att116 att168
att116 att178
att116 att206
att116 att73
att116 att126
att116 att16
att116 att74
att116 att27
att116 att20
att116 att211
att116 att164
att116 att128
att116 att92
att116 att176
att116 att68
att182 att27
att182 att14
att60 att168
att60 att156
att60 att96
att168 att126
att168 att156
att168 att96
att168 att216
att178 att20
att178 att211
att178 att58
att178 att142
att178 att130
att178 att166
att206 att74
att206 att170
att206 att158
att89 att77
att89 att137
att89 att149
att89 att173
att77 att137
att77 att161
att77 att149
att209 att101
att209 att41
att73 att61
att73 att85
att73 att13
att73 att157
att126 att162
att126 att138
att126 att18
att126 att150
att16 att74
att16 att76
att16 att40
att16 att4
att16 att196
att16 att136
att74 att14
att74 att62
att27 att171
att27 att63
att61 att85
att61 att169
att20 att76
att20 att211
att20 att210
att20 att170
att20 att164
att20 att128
att20 att176
att20 att80
att101 att41
att85 att169
att85 att13
att76 att14
att76 att40
att76 att160
att76 att4
att76 att52
att137 att161
att137 att149
att137 att173
att137 att125
att211 att210
att211 att162
att211 att164
att211 att62
att211 att42
att211 att171
att211 att163
att211 att175
att211 att79
att211 att151
att211 att43
att143 att155
att143 att23
att143 att203
att143 att71
att143 att83
att143 att11
att14 att98
att40 att160
att40 att4
att40 att196
att40 att88
att40 att52
att210 att162
att210 att42
att210 att114
att155 att23
att155 att203
att155 att107
att155 att11
att170 att158
att160 att52
att160 att124
att23 att203
att23 att107
att23 att71
att23 att11
att162 att138
att162 att18
att162 att150
att162 att90
att162 att102
att162 att174
att162 att66
att203 att107
att203 att49
att203 att59
att203 att47
att203 att191
att203 att119
att164 att62
att164 att42
att164 att128
att164 att171
att164 att92
att164 att163
att164 att158
att164 att176
att164 att145
att164 att172
att164 att58
att164 att68
att164 att80
att164 att32
att164 att98
att164 att156
att164 att110
att164 att205
att164 att21
att164 att134
att164 att213
att164 att112
att164 att38
att164 att189
att164 att56
att164 att44
att164 att152
att164 att8
att107 att83
att107 att49
att107 att59
att107 att47
att107 att191
att42 att138
att42 att54
att42 att114
att71 att83
att71 att167
att71 att35
att71 att179
att128 att92
att128 att112
att138 att18
att138 att150
att83 att167
att83 att35
att171 att87
att171 att159
att171 att63
att171 att51
att171 att39
att171 att75
att92 att163
att92 att145
att92 att56
att163 att49
att163 att175
att163 att87
att163 att79
att163 att151
att163 att139
att163 att187
att163 att127
att163 att103
att163 att91
att49 att37
att161 att173
att161 att113
att176 att145
att176 att172
att176 att68
att176 att80
att176 att32
att176 att175
att176 att98
att176 att110
att176 att205
att176 att21
att176 att134
att176 att213
att176 att56
att4 att196
att4 att88
att4 att136
att4 att100
att4 att148
att4 att208
att172 att112
att172 att184
att196 att88
att196 att136
att196 att100
att196 att148
att196 att208
att58 att142
att58 att46
att58 att34
att68 att32
att80 att38
att32 att110
att32 att21
att32 att44
att32 att200
att175 att87
att175 att159
att175 att79
att175 att187
att175 att115
att87 att159
att87 att63
att87 att51
att87 att75
att87 att15
att87 att99
att159 att75
att159 att15
att159 att195
att18 att90
att18 att102
att18 att78
att18 att198
att52 att124
att52 att64
att98 att86
att136 att100
att136 att208
att150 att90
att150 att174
att150 att66
att156 att205
att156 att96
att156 att216
att156 att204
att156 att24
att156 att84
att156 att36
att156 att12
att156 att108
att100 att148
att63 att51
att63 att39
att63 att3
att63 att183
att63 att147
att90 att102
att90 att78
att167 att35
att167 att179
att35 att179
att51 att39
att51 att3
att51 att183
att21 att134
att21 att213
att21 att38
att21 att189
att21 att129
att21 att81
att21 att153
att21 att117
att21 att9
att142 att46
att142 att130
att142 att118
att142 att70
att142 att10
att142 att202
att142 att190
att142 att106
att46 att130
att46 att118
att46 att70
att46 att34
att46 att166
att46 att82
att134 att2
att39 att3
att102 att78
att102 att174
att102 att54
att102 att198
att130 att118
att130 att10
att130 att202
att130 att190
att130 att106
att149 att125
att96 att216
att96 att204
att96 att24
att75 att15
att75 att99
att118 att70
att118 att10
att118 att202
att78 att198
att213 att189
att213 att129
att213 att69
att213 att81
att38 att50
att38 att26
att174 att54
att174 att66
att174 att30
att189 att86
att189 att129
att189 att69
att189 att81
att189 att153
att189 att117
att189 att9
att189 att45
att189 att141
att189 att105
att70 att34
att70 att154
att179 att59
att59 att47
att59 att191
att59 att119
att79 att86
att79 att151
att79 att139
att79 att187
att79 att127
att79 att103
att79 att43
att79 att193
att79 att91
att79 att19
att124 att64
att54 att114
att54 att30
att54 att6
att191 att119
att86 att194
att56 att44
att56 att152
att56 att50
att56 att188
att56 att26
att56 att200
att56 att104
att56 att140
att56 att146
att56 att194
att56 att8
att56 att2
att56 att133
att56 att1
att151 att139
att66 att30
att173 att125
att173 att113
att173 att185
att44 att152
att44 att50
att44 att188
att44 att200
att44 att104
att44 att140
att44 att194
att44 att212
att44 att1
att139 att26
att139 att99
att139 att103
att139 att43
att139 att91
att139 att31
att139 att199
att139 att7
att216 att204
att216 att24
att216 att84
att216 att36
att216 att12
att216 att180
att216 att108
att129 att69
att152 att188
att152 att140
att69 att153
att69 att9
att69 att177
att81 att45
att81 att105
att153 att117
att153 att141
att41 att53
att204 att12
att204 att180
att188 att146
att188 att212
att13 att157
att114 att6
att114 att186
att10 att190
att64 att184
att200 att104
att9 att45
att9 att146
att9 att141
att9 att177
att9 att37
att9 att133
att9 att109
att9 att181
att3 att183
att3 att147
att3 att123
att3 att135
att3 att111
att45 att105
att45 att177
att45 att93
att45 att201
att45 att165
att45 att193
att45 att33
att45 att37
att45 att133
att45 att97
att140 att8
att30 att6
att30 att186
att183 att147
att183 att123
att183 att135
att146 att2
att202 att166
att202 att106
att202 att82
att24 att84
att24 att36
att24 att132
att147 att123
att147 att135
att147 att111
att147 att207
att8 att212
att166 att82
att166 att22
att166 att94
att187 att127
att187 att115
att127 att115
att105 att184
att105 att93
att105 att201
att106 att154
att82 att154
att82 att22
att135 att111
att135 att207
att154 att22
att154 att94
att111 att207
att99 att195
att22 att94
att84 att48
att177 att93
att177 att165
att177 att181
att103 att195
att103 att97
att103 att109
att93 att201
att93 att165
att93 att193
att93 att33
att93 att57
att201 att33
att201 att57
att43 att31
att36 att180
att36 att48
att36 att72
att36 att132
att36 att144
att125 att113
att125 att185
att125 att65
att125 att29
att180 att48
att180 att72
att180 att192
att180 att108
att48 att72
att6 att186
att113 att185
att113 att53
att113 att65
att193 att97
att91 att31
att91 att199
att91 att19
att72 att132
att72 att144
att72 att192
att72 att120
att31 att199
att31 att7
att31 att67
att31 att55
att31 att1
att132 att144
att132 att120
att33 att57
att144 att192
att144 att120
att185 att53
att185 att65
att185 att29
att199 att19
att199 att7
att199 att67
att199 att55
att199 att109
att65 att29
att7 att67
att67 att55
att109 att181

View File

@@ -1,859 +0,0 @@
class att215
class att25
class att131
class att95
class att122
class att17
class att28
class att5
class att121
class att214
class att197
class att116
class att182
class att60
class att168
class att178
class att206
class att89
class att77
class att209
class att73
class att126
class att16
class att74
class att27
class att61
class att20
class att101
class att85
class att76
class att137
class att211
class att143
class att14
class att40
class att210
class att155
class att170
class att160
class att23
class att162
class att203
class att164
class att107
class att62
class att42
class att71
class att128
class att138
class att83
class att171
class att92
class att163
class att49
class att161
class att158
class att176
class att11
class att145
class att4
class att172
class att196
class att58
class att68
class att169
class att80
class att32
class att175
class att87
class att88
class att159
class att18
class att52
class att98
class att136
class att150
class att156
class att110
class att100
class att63
class att148
class att90
class att167
class att35
class att205
class att51
class att21
class att142
class att46
class att134
class att39
class att102
class att208
class att130
class att149
class att96
class att75
class att118
class att78
class att213
class att112
class att38
class att174
class att189
class att70
class att179
class att59
class att79
class att15
class att47
class att124
class att34
class att54
class att191
class att86
class att56
class att151
class att66
class att173
class att44
class att198
class att139
class att216
class att129
class att152
class att69
class att81
class att50
class att153
class att41
class att204
class att188
class att26
class att13
class att117
class att114
class att10
class att64
class att200
class att9
class att3
class att119
class att45
class att104
class att140
class att30
class att183
class att146
class att141
class att202
class att194
class att24
class att147
class att8
class att212
class att123
class att166
class att187
class att127
class att190
class att105
class att106
class att184
class att82
class att2
class att135
class att154
class att111
class att115
class att99
class att22
class att84
class att207
class att94
class att177
class att103
class att93
class att201
class att43
class att36
class att12
class att125
class att165
class att180
class att195
class att157
class att48
class att6
class att113
class att193
class att91
class att72
class att31
class att132
class att33
class att57
class att144
class att192
class att185
class att37
class att53
class att120
class att186
class att199
class att65
class att108
class att133
class att29
class att19
class att7
class att97
class att67
class att55
class att1
class att109
class att181
att215 att25
att215 att131
att215 att95
att215 att17
att215 att214
att215 att143
att25 att131
att25 att95
att25 att122
att25 att121
att25 att73
att25 att61
att25 att85
att25 att169
att25 att13
att25 att157
att131 att95
att131 att122
att131 att17
att131 att28
att131 att5
att131 att121
att131 att214
att131 att116
att131 att182
att131 att60
att131 att126
att131 att16
att131 att27
att131 att20
att131 att143
att131 att155
att95 att122
att95 att17
att95 att28
att95 att5
att95 att121
att95 att214
att95 att197
att95 att116
att95 att60
att95 att168
att95 att178
att95 att143
att95 att155
att95 att23
att95 att71
att95 att167
att122 att28
att122 att182
att122 att170
att17 att5
att17 att197
att17 att89
att17 att77
att17 att209
att17 att137
att17 att161
att17 att41
att28 att206
att28 att16
att28 att76
att28 att40
att28 att210
att28 att160
att28 att172
att28 att124
att28 att64
att5 att197
att5 att89
att5 att77
att5 att209
att5 att101
att121 att73
att121 att61
att214 att116
att214 att178
att214 att206
att214 att58
att214 att142
att214 att46
att197 att89
att197 att209
att197 att101
att116 att182
att116 att60
att116 att168
att116 att178
att116 att206
att116 att73
att116 att126
att116 att16
att116 att74
att116 att27
att116 att20
att116 att211
att116 att164
att116 att128
att116 att92
att116 att176
att116 att68
att182 att27
att182 att14
att60 att168
att60 att156
att60 att96
att168 att126
att168 att156
att168 att96
att168 att216
att178 att20
att178 att211
att178 att58
att178 att142
att178 att130
att178 att166
att206 att74
att206 att170
att206 att158
att89 att77
att89 att137
att89 att149
att89 att173
att77 att137
att77 att161
att77 att149
att209 att101
att209 att41
att73 att61
att73 att85
att73 att13
att73 att157
att126 att162
att126 att138
att126 att18
att126 att150
att16 att74
att16 att76
att16 att40
att16 att4
att16 att196
att16 att136
att74 att14
att74 att62
att27 att171
att27 att63
att61 att85
att61 att169
att20 att76
att20 att211
att20 att210
att20 att170
att20 att164
att20 att128
att20 att176
att20 att80
att101 att41
att85 att169
att85 att13
att76 att14
att76 att40
att76 att160
att76 att4
att76 att52
att137 att161
att137 att149
att137 att173
att137 att125
att211 att210
att211 att162
att211 att164
att211 att62
att211 att42
att211 att171
att211 att163
att211 att175
att211 att79
att211 att151
att211 att43
att143 att155
att143 att23
att143 att203
att143 att71
att143 att83
att143 att11
att14 att98
att40 att160
att40 att4
att40 att196
att40 att88
att40 att52
att210 att162
att210 att42
att210 att114
att155 att23
att155 att203
att155 att107
att155 att11
att170 att158
att160 att52
att160 att124
att23 att203
att23 att107
att23 att71
att23 att11
att162 att138
att162 att18
att162 att150
att162 att90
att162 att102
att162 att174
att162 att66
att203 att107
att203 att49
att203 att59
att203 att47
att203 att191
att203 att119
att164 att62
att164 att42
att164 att128
att164 att171
att164 att92
att164 att163
att164 att158
att164 att176
att164 att145
att164 att172
att164 att58
att164 att68
att164 att80
att164 att32
att164 att98
att164 att156
att164 att110
att164 att205
att164 att21
att164 att134
att164 att213
att164 att112
att164 att38
att164 att189
att164 att56
att164 att44
att164 att152
att164 att8
att107 att83
att107 att49
att107 att59
att107 att47
att107 att191
att42 att138
att42 att54
att42 att114
att71 att83
att71 att167
att71 att35
att71 att179
att128 att92
att128 att112
att138 att18
att138 att150
att83 att167
att83 att35
att171 att87
att171 att159
att171 att63
att171 att51
att171 att39
att171 att75
att92 att163
att92 att145
att92 att56
att163 att49
att163 att175
att163 att87
att163 att79
att163 att151
att163 att139
att163 att187
att163 att127
att163 att103
att163 att91
att49 att37
att161 att173
att161 att113
att176 att145
att176 att172
att176 att68
att176 att80
att176 att32
att176 att175
att176 att98
att176 att110
att176 att205
att176 att21
att176 att134
att176 att213
att176 att56
att4 att196
att4 att88
att4 att136
att4 att100
att4 att148
att4 att208
att172 att112
att172 att184
att196 att88
att196 att136
att196 att100
att196 att148
att196 att208
att58 att142
att58 att46
att58 att34
att68 att32
att80 att38
att32 att110
att32 att21
att32 att44
att32 att200
att175 att87
att175 att159
att175 att79
att175 att187
att175 att115
att87 att159
att87 att63
att87 att51
att87 att75
att87 att15
att87 att99
att159 att75
att159 att15
att159 att195
att18 att90
att18 att102
att18 att78
att18 att198
att52 att124
att52 att64
att98 att86
att136 att100
att136 att208
att150 att90
att150 att174
att150 att66
att156 att205
att156 att96
att156 att216
att156 att204
att156 att24
att156 att84
att156 att36
att156 att12
att156 att108
att100 att148
att63 att51
att63 att39
att63 att3
att63 att183
att63 att147
att90 att102
att90 att78
att167 att35
att167 att179
att35 att179
att51 att39
att51 att3
att51 att183
att21 att134
att21 att213
att21 att38
att21 att189
att21 att129
att21 att81
att21 att153
att21 att117
att21 att9
att142 att46
att142 att130
att142 att118
att142 att70
att142 att10
att142 att202
att142 att190
att142 att106
att46 att130
att46 att118
att46 att70
att46 att34
att46 att166
att46 att82
att134 att2
att39 att3
att102 att78
att102 att174
att102 att54
att102 att198
att130 att118
att130 att10
att130 att202
att130 att190
att130 att106
att149 att125
att96 att216
att96 att204
att96 att24
att75 att15
att75 att99
att118 att70
att118 att10
att118 att202
att78 att198
att213 att189
att213 att129
att213 att69
att213 att81
att38 att50
att38 att26
att174 att54
att174 att66
att174 att30
att189 att86
att189 att129
att189 att69
att189 att81
att189 att153
att189 att117
att189 att9
att189 att45
att189 att141
att189 att105
att70 att34
att70 att154
att179 att59
att59 att47
att59 att191
att59 att119
att79 att86
att79 att151
att79 att139
att79 att187
att79 att127
att79 att103
att79 att43
att79 att193
att79 att91
att79 att19
att124 att64
att54 att114
att54 att30
att54 att6
att191 att119
att86 att194
att56 att44
att56 att152
att56 att50
att56 att188
att56 att26
att56 att200
att56 att104
att56 att140
att56 att146
att56 att194
att56 att8
att56 att2
att56 att133
att56 att1
att151 att139
att66 att30
att173 att125
att173 att113
att173 att185
att44 att152
att44 att50
att44 att188
att44 att200
att44 att104
att44 att140
att44 att194
att44 att212
att44 att1
att139 att26
att139 att99
att139 att103
att139 att43
att139 att91
att139 att31
att139 att199
att139 att7
att216 att204
att216 att24
att216 att84
att216 att36
att216 att12
att216 att180
att216 att108
att129 att69
att152 att188
att152 att140
att69 att153
att69 att9
att69 att177
att81 att45
att81 att105
att153 att117
att153 att141
att41 att53
att204 att12
att204 att180
att188 att146
att188 att212
att13 att157
att114 att6
att114 att186
att10 att190
att64 att184
att200 att104
att9 att45
att9 att146
att9 att141
att9 att177
att9 att37
att9 att133
att9 att109
att9 att181
att3 att183
att3 att147
att3 att123
att3 att135
att3 att111
att45 att105
att45 att177
att45 att93
att45 att201
att45 att165
att45 att193
att45 att33
att45 att37
att45 att133
att45 att97
att140 att8
att30 att6
att30 att186
att183 att147
att183 att123
att183 att135
att146 att2
att202 att166
att202 att106
att202 att82
att24 att84
att24 att36
att24 att132
att147 att123
att147 att135
att147 att111
att147 att207
att8 att212
att166 att82
att166 att22
att166 att94
att187 att127
att187 att115
att127 att115
att105 att184
att105 att93
att105 att201
att106 att154
att82 att154
att82 att22
att135 att111
att135 att207
att154 att22
att154 att94
att111 att207
att99 att195
att22 att94
att84 att48
att177 att93
att177 att165
att177 att181
att103 att195
att103 att97
att103 att109
att93 att201
att93 att165
att93 att193
att93 att33
att93 att57
att201 att33
att201 att57
att43 att31
att36 att180
att36 att48
att36 att72
att36 att132
att36 att144
att125 att113
att125 att185
att125 att65
att125 att29
att180 att48
att180 att72
att180 att192
att180 att108
att48 att72
att6 att186
att113 att185
att113 att53
att113 att65
att193 att97
att91 att31
att91 att199
att91 att19
att72 att132
att72 att144
att72 att192
att72 att120
att31 att199
att31 att7
att31 att67
att31 att55
att31 att1
att132 att144
att132 att120
att33 att57
att144 att192
att144 att120
att185 att53
att185 att65
att185 att29
att199 att19
att199 att7
att199 att67
att199 att55
att199 att109
att65 att29
att7 att67
att67 att55
att109 att181

BIN
docs/BoostAODE.docx Normal file

Binary file not shown.

View File

@@ -1,4 +1,4 @@
filter = src/
exclude-directories = build/lib/
exclude-directories = build_debug/lib/
print-summary = yes
sort-percentage = yes

View File

@@ -4,11 +4,9 @@
#include <map>
#include <iostream>
using namespace std;
ArffFiles::ArffFiles() = default;
vector<string> ArffFiles::getLines() const
std::vector<std::string> ArffFiles::getLines() const
{
return lines;
}
@@ -18,48 +16,48 @@ unsigned long int ArffFiles::getSize() const
return lines.size();
}
vector<pair<string, string>> ArffFiles::getAttributes() const
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
{
return attributes;
}
string ArffFiles::getClassName() const
std::string ArffFiles::getClassName() const
{
return className;
}
string ArffFiles::getClassType() const
std::string ArffFiles::getClassType() const
{
return classType;
}
vector<vector<float>>& ArffFiles::getX()
std::vector<std::vector<float>>& ArffFiles::getX()
{
return X;
}
vector<int>& ArffFiles::getY()
std::vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::loadCommon(string fileName)
void ArffFiles::loadCommon(std::string fileName)
{
ifstream file(fileName);
std::ifstream file(fileName);
if (!file.is_open()) {
throw invalid_argument("Unable to open file");
throw std::invalid_argument("Unable to open file");
}
string line;
string keyword;
string attribute;
string type;
string type_w;
std::string line;
std::string keyword;
std::string attribute;
std::string type;
std::string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line);
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
std::stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
@@ -74,35 +72,35 @@ void ArffFiles::loadCommon(string fileName)
}
file.close();
if (attributes.empty())
throw invalid_argument("No attributes found");
throw std::invalid_argument("No attributes found");
}
void ArffFiles::load(const string& fileName, bool classLast)
void ArffFiles::load(const std::string& fileName, bool classLast)
{
int labelIndex;
loadCommon(fileName);
if (classLast) {
className = get<0>(attributes.back());
classType = get<1>(attributes.back());
className = std::get<0>(attributes.back());
classType = std::get<1>(attributes.back());
attributes.pop_back();
labelIndex = static_cast<int>(attributes.size());
} else {
className = get<0>(attributes.front());
classType = get<1>(attributes.front());
className = std::get<0>(attributes.front());
classType = std::get<1>(attributes.front());
attributes.erase(attributes.begin());
labelIndex = 0;
}
generateDataset(labelIndex);
}
void ArffFiles::load(const string& fileName, const string& name)
void ArffFiles::load(const std::string& fileName, const std::string& name)
{
int labelIndex;
loadCommon(fileName);
bool found = false;
for (int i = 0; i < attributes.size(); ++i) {
if (attributes[i].first == name) {
className = get<0>(attributes[i]);
classType = get<1>(attributes[i]);
className = std::get<0>(attributes[i]);
classType = std::get<1>(attributes[i]);
attributes.erase(attributes.begin() + i);
labelIndex = i;
found = true;
@@ -110,19 +108,19 @@ void ArffFiles::load(const string& fileName, const string& name)
}
}
if (!found) {
throw invalid_argument("Class name not found");
throw std::invalid_argument("Class name not found");
}
generateDataset(labelIndex);
}
void ArffFiles::generateDataset(int labelIndex)
{
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
auto yy = vector<string>(lines.size(), "");
auto removeLines = vector<int>(); // Lines with missing values
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
auto yy = std::vector<std::string>(lines.size(), "");
auto removeLines = std::vector<int>(); // Lines with missing values
for (size_t i = 0; i < lines.size(); i++) {
stringstream ss(lines[i]);
string value;
std::stringstream ss(lines[i]);
std::string value;
int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) {
@@ -146,21 +144,21 @@ void ArffFiles::generateDataset(int labelIndex)
y = factorize(yy);
}
string ArffFiles::trim(const string& source)
std::string ArffFiles::trim(const std::string& source)
{
string s(source);
std::string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s;
}
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
{
vector<int> yy;
std::vector<int> yy;
yy.reserve(labels_t.size());
map<string, int> labelMap;
std::map<std::string, int> labelMap;
int i = 0;
for (const string& label : labels_t) {
for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}

View File

@@ -4,31 +4,29 @@
#include <string>
#include <vector>
using namespace std;
class ArffFiles {
private:
vector<string> lines;
vector<pair<string, string>> attributes;
string className;
string classType;
vector<vector<float>> X;
vector<int> y;
std::vector<std::string> lines;
std::vector<std::pair<std::string, std::string>> attributes;
std::string className;
std::string classType;
std::vector<std::vector<float>> X;
std::vector<int> y;
void generateDataset(int);
void loadCommon(string);
void loadCommon(std::string);
public:
ArffFiles();
void load(const string&, bool = true);
void load(const string&, const string&);
vector<string> getLines() const;
void load(const std::string&, bool = true);
void load(const std::string&, const std::string&);
std::vector<std::string> getLines() const;
unsigned long int getSize() const;
string getClassName() const;
string getClassType() const;
static string trim(const string&);
vector<vector<float>>& getX();
vector<int>& getY();
vector<pair<string, string>> getAttributes() const;
static vector<int> factorize(const vector<string>& labels_t);
std::string getClassName() const;
std::string getClassType() const;
static std::string trim(const std::string&);
std::vector<std::vector<float>>& getX();
std::vector<int>& getY();
std::vector<std::pair<std::string, std::string>> getAttributes() const;
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
};
#endif

Submodule lib/argparse deleted from b0930ab028

1
lib/folding Submodule

Submodule lib/folding added at 37316a54e0

View File

@@ -1,8 +0,0 @@
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,283 +0,0 @@
#include <iostream>
#include <torch/torch.h>
#include <string>
#include <map>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "ArffFiles.h"
#include "BayesMetrics.h"
#include "CPPFImdlp.h"
#include "Folding.h"
#include "Models.h"
#include "modelRegister.h"
#include <fstream>
using namespace std;
const string PATH = "../../data/";
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features)
{
vector<mdlp::labels_t>Xd;
map<string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xd.push_back(xd);
}
return { Xd, maxes };
}
bool file_exists(const std::string& name)
{
if (FILE* file = fopen(name.c_str(), "r")) {
fclose(file);
return true;
} else {
return false;
}
}
pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vector<vector<int>> X, vector<int> y)
{
vector<vector<int>> Xr; // nxm
vector<int> yr;
for (int col = 0; col < X.size(); ++col) {
Xr.push_back(vector<int>());
}
for (auto index : indices) {
for (int col = 0; col < X.size(); ++col) {
Xr[col].push_back(X[col][index]);
}
yr.push_back(y[index]);
}
return { Xr, yr };
}
int main(int argc, char** argv)
{
torch::Tensor weights_ = torch::full({ 10 }, 1.0 / 10, torch::kFloat64);
torch::Tensor y_ = torch::tensor({ 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }, torch::kInt32);
torch::Tensor ypred = torch::tensor({ 1, 1, 1, 0, 0, 1, 1, 1, 1, 0 }, torch::kInt32);
cout << "Initial weights_: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << ", ";
}
cout << "end." << endl;
cout << "y_: " << endl;
for (int i = 0; i < 10; i++) {
cout << y_.index({ i }).item<int>() << ", ";
}
cout << "end." << endl;
cout << "ypred: " << endl;
for (int i = 0; i < 10; i++) {
cout << ypred.index({ i }).item<int>() << ", ";
}
cout << "end." << endl;
auto mask_wrong = ypred != y_;
auto mask_right = ypred == y_;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double epsilon_t = masked_weights.sum().item<double>();
cout << "epsilon_t: " << epsilon_t << endl;
double wt = (1 - epsilon_t) / epsilon_t;
cout << "wt: " << wt << endl;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
cout << "alpha_t: " << alpha_t << endl;
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
cout << "exp(alpha_t): " << exp(alpha_t) << endl;
cout << "exp(-alpha_t): " << exp(-alpha_t) << endl;
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
cout << "totalWeights: " << totalWeights << endl;
cout << "Before normalization: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << endl;
}
weights_ = weights_ / totalWeights;
cout << "After normalization: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << endl;
}
map<string, bool> datasets = {
{"diabetes", true},
{"ecoli", true},
{"glass", true},
{"iris", true},
{"kdd_JapaneseVowels", false},
{"letter", true},
{"liver-disorders", true},
{"mfeat-factors", true},
};
auto valid_datasets = vector<string>();
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
[](const pair<string, bool>& pair) { return pair.first; });
argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-d", "--dataset")
.help("Dataset file name")
.action([valid_datasets](const std::string& value) {
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
return value;
}
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
}
);
program.add_argument("-p", "--path")
.help(" folder where the data files are located, default")
.default_value(string{ PATH }
);
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString())
.action([](const std::string& value) {
static const vector<string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
}
);
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw runtime_error(err.what());
}
catch (...) {
throw runtime_error("Number of folds must be an integer");
}});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors, dump_cpt;
string model_name, file_name, path, complete_file_name;
int nFolds, seed;
try {
program.parse_args(argc, argv);
file_name = program.get<string>("dataset");
path = program.get<string>("path");
model_name = program.get<string>("model");
complete_file_name = path + file_name + ".arff";
stratified = program.get<bool>("stratified");
tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds");
seed = program.get<int>("seed");
dump_cpt = program.get<bool>("dumpcpt");
class_last = datasets[file_name];
if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
}
}
catch (const exception& err) {
cerr << err.what() << endl;
cerr << program;
exit(1);
}
/*
* Begin Processing
*/
auto handler = ArffFiles();
handler.load(complete_file_name, class_last);
// Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
vector<string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features),
[](const pair<string, string>& item) { return item.first; });
// Discretize Dataset
auto [Xd, maxes] = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<string, vector<int>> states;
for (auto feature : features) {
states[feature] = vector<int>(maxes[feature]);
}
states[className] = vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states);
if (dump_cpt) {
cout << "--- CPT Tables ---" << endl;
clf->dump_cpt();
}
auto lines = clf->show();
for (auto line : lines) {
cout << line << endl;
}
cout << "--- Topological Order ---" << endl;
auto order = clf->topological_order();
for (auto name : order) {
cout << name << ", ";
}
cout << "end." << endl;
auto score = clf->score(Xd, y);
cout << "Score: " << score << endl;
auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot");
file << graph;
file.close();
cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
string stratified_string = stratified ? " Stratified" : "";
cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
cout << "==========================================" << endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
}
float total_score = 0, total_score_train = 0, score_train, score_test;
platform::Fold* fold;
if (stratified)
fold = new platform::StratifiedKFold(nFolds, y, seed);
else
fold = new platform::KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i);
cout << "Fold: " << i + 1 << endl;
if (tensors) {
auto ttrain = torch::tensor(train, torch::kInt64);
auto ttest = torch::tensor(test, torch::kInt64);
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
torch::Tensor ytraint = yt.index({ ttrain });
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest });
clf->fit(Xtraint, ytraint, features, className, states);
auto temp = clf->predict(Xtraint);
score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt);
} else {
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
auto [Xtest, ytest] = extract_indices(test, Xd, y);
clf->fit(Xtrain, ytrain, features, className, states);
score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest);
}
if (dump_cpt) {
cout << "--- CPT Tables ---" << endl;
clf->dump_cpt();
}
total_score_train += score_train;
total_score += score_test;
cout << "Score Train: " << score_train << endl;
cout << "Score Test : " << score_test << endl;
cout << "-------------------------------------------------------------------------------" << endl;
}
cout << "**********************************************************************************" << endl;
cout << "Average Score Train: " << total_score_train / nFolds << endl;
cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
}

34
src/AODE.cc Normal file
View File

@@ -0,0 +1,34 @@
#include "AODE.h"
namespace bayesnet {
AODE::AODE(bool predict_voting) : Ensemble(predict_voting)
{
validHyperparameters = { "predict_voting" };
}
void AODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
}
}
void AODE::buildModel(const torch::Tensor& weights)
{
models.clear();
significanceModels.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i));
}
n_models = models.size();
significanceModels = std::vector<double>(n_models, 1.0);
}
std::vector<std::string> AODE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}
}

View File

@@ -4,12 +4,13 @@
#include "SPODE.h"
namespace bayesnet {
class AODE : public Ensemble {
public:
AODE(bool predict_voting = true);
virtual ~AODE() {};
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& title = "AODE") const override;
protected:
void buildModel(const torch::Tensor& weights) override;
public:
AODE();
virtual ~AODE() {};
vector<string> graph(const string& title = "AODE") const override;
};
}
#endif

View File

@@ -1,17 +1,30 @@
#include "AODELd.h"
#include "Models.h"
namespace bayesnet {
using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
{
validHyperparameters = { "predict_voting" };
}
void AODELd::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
}
}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
@@ -26,7 +39,7 @@ namespace bayesnet {
models.push_back(std::make_unique<SPODELd>(i));
}
n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
significanceModels = std::vector<double>(n_models, 1.0);
}
void AODELd::trainModel(const torch::Tensor& weights)
{
@@ -34,7 +47,7 @@ namespace bayesnet {
model->fit(Xf, y, features, className, states);
}
}
vector<string> AODELd::graph(const string& name) const
std::vector<std::string> AODELd::graph(const std::string& name) const
{
return Ensemble::graph(name);
}

View File

@@ -5,17 +5,16 @@
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
class AODELd : public Ensemble, public Proposal {
public:
AODELd(bool predict_voting = true);
virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
protected:
void trainModel(const torch::Tensor& weights) override;
void buildModel(const torch::Tensor& weights) override;
public:
AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) override;
virtual ~AODELd() = default;
vector<string> graph(const string& name = "AODELd") const override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !AODELD_H

41
src/BaseClassifier.h Normal file
View File

@@ -0,0 +1,41 @@
#ifndef BASE_H
#define BASE_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <vector>
namespace bayesnet {
enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier {
public:
// X is nxm std::vector, y is nx1 std::vector
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0;
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
torch::Tensor virtual predict_proba(torch::Tensor& X) = 0;
std::vector<std::vector<double>> virtual predict_proba(std::vector<std::vector<int >>& X) = 0;
status_t virtual getStatus() const = 0;
float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() const = 0;
int virtual getClassNumStates() const = 0;
std::vector<std::string> virtual show() const = 0;
std::vector<std::string> virtual graph(const std::string& title = "") const = 0;
virtual std::string getVersion() = 0;
std::vector<std::string> virtual topological_order() = 0;
std::vector<std::string> virtual getNotes() const = 0;
void virtual dump_cpt()const = 0;
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
std::vector<std::string> validHyperparameters;
};
}
#endif

View File

@@ -1,16 +1,16 @@
#include "BayesMetrics.h"
#include "Mst.h"
namespace bayesnet {
//samples is nxm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates)
//samples is n+1xm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: samples(samples)
, features(features)
, className(className)
, classNumStates(classNumStates)
{
}
//samples is nxm vector used to fit the model
Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates)
//samples is nxm std::vector used to fit the model
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: features(features)
, className(className)
, classNumStates(classNumStates)
@@ -21,7 +21,7 @@ namespace bayesnet {
}
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
}
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
{
// Return the K Best features
auto n = samples.size(0) - 1;
@@ -56,25 +56,15 @@ namespace bayesnet {
}
return featuresKBest;
}
vector<double> Metrics::getScoresKBest() const
std::vector<double> Metrics::getScoresKBest() const
{
return scoresKBest;
}
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
{
vector<pair<string, string>> result;
for (int i = 0; i < source.size(); ++i) {
string temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
}
}
return result;
}
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
{
auto result = vector<double>();
auto source = vector<string>(features);
auto result = std::vector<double>();
auto source = std::vector<std::string>(features);
source.push_back(className);
auto combinations = doCombinations(source);
// Compute class prior
@@ -110,7 +100,7 @@ namespace bayesnet {
return matrix;
}
// To use in Python
vector<float> Metrics::conditionalEdgeWeights(vector<float>& weights_)
std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_)
{
const torch::Tensor weights = torch::tensor(weights_);
auto matrix = conditionalEdge(weights);
@@ -131,7 +121,7 @@ namespace bayesnet {
{
int numSamples = firstFeature.sizes()[0];
torch::Tensor featureCounts = secondFeature.bincount(weights);
unordered_map<int, unordered_map<int, double>> jointCounts;
std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
double totalWeight = 0;
for (auto i = 0; i < numSamples; i++) {
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
@@ -165,7 +155,7 @@ namespace bayesnet {
and the indices of the weights as nodes of this square matrix using
Kruskal algorithm
*/
vector<pair<int, int>> Metrics::maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root)
std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
{
auto mst = MST(features, weights, root);
return mst.maximumSpanningTree();

49
src/BayesMetrics.h Normal file
View File

@@ -0,0 +1,49 @@
#ifndef BAYESNET_METRICS_H
#define BAYESNET_METRICS_H
#include <torch/torch.h>
#include <vector>
#include <string>
namespace bayesnet {
class Metrics {
private:
int classNumStates = 0;
std::vector<double> scoresKBest;
std::vector<int> featuresKBest; // sorted indices of the features
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
protected:
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
std::string className;
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
std::vector<std::string> features;
template <class T>
std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
{
std::vector<std::pair<T, T>> result;
for (int i = 0; i < source.size(); ++i) {
T temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
}
}
return result;
}
template <class T>
T pop_first(std::vector<T>& v)
{
T temp = v[0];
v.erase(v.begin());
return temp;
}
public:
Metrics() = default;
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
std::vector<double> getScoresKBest() const;
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
torch::Tensor conditionalEdge(const torch::Tensor& weights);
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
};
}
#endif

View File

@@ -1,18 +0,0 @@
#include "AODE.h"
namespace bayesnet {
AODE::AODE() : Ensemble() {}
void AODE::buildModel(const torch::Tensor& weights)
{
models.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i));
}
n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
}
vector<string> AODE::graph(const string& title) const
{
return Ensemble::graph(title);
}
}

View File

@@ -1,36 +0,0 @@
#ifndef BASE_H
#define BASE_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <vector>
namespace bayesnet {
using namespace std;
enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier {
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
public:
// X is nxm vector, y is nx1 vector
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0;
vector<int> virtual predict(vector<vector<int>>& X) = 0;
status_t virtual getStatus() const = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() const = 0;
vector<string> virtual show() const = 0;
vector<string> virtual graph(const string& title = "") const = 0;
const string inline getVersion() const { return "0.2.0"; };
vector<string> virtual topological_order() = 0;
void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;
};
}
#endif

View File

@@ -1,32 +0,0 @@
#ifndef BAYESNET_METRICS_H
#define BAYESNET_METRICS_H
#include <torch/torch.h>
#include <vector>
#include <string>
namespace bayesnet {
using namespace std;
using namespace torch;
class Metrics {
private:
Tensor samples; // nxm tensor used to fit the model
vector<string> features;
string className;
int classNumStates = 0;
vector<double> scoresKBest;
vector<int> featuresKBest; // sorted indices of the features
double entropy(const Tensor& feature, const Tensor& weights);
double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<pair<string, string>> doCombinations(const vector<string>&);
public:
Metrics() = default;
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates);
vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending=false, unsigned k = 0);
vector<double> getScoresKBest() const;
double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python
Tensor conditionalEdge(const torch::Tensor& weights);
vector<pair<int, int>> maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root);
};
}
#endif

View File

@@ -1,148 +0,0 @@
#include "BoostAODE.h"
#include <set>
#include "BayesMetrics.h"
#include "Colors.h"
#include "Folding.h"
#include <limits.h>
namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() {}
void BoostAODE::buildModel(const torch::Tensor& weights)
{
// Models shall be built in trainModel
}
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
}
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
}
if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"];
}
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
}
}
void BoostAODE::validationInit()
{
auto y_ = dataset.index({ -1, "..." });
if (convergence) {
// Prepare train & validation sets from train data
auto fold = platform::StratifiedKFold(5, y_, 271);
dataset_ = torch::clone(dataset);
// save input dataset
auto [train, test] = fold.getFold(0);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
// Get train and validation sets
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
y_train = dataset.index({ -1, train_t });
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
y_test = dataset.index({ -1, test_t });
dataset = X_train;
m = X_train.size(1);
auto n_classes = states.at(className).size();
metrics = Metrics(dataset, features, className, n_classes);
// Build dataset with train data
buildDataset(y_train);
} else {
// Use all data to train
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
y_train = y_;
}
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
models.clear();
n_models = 0;
if (maxModels == 0)
maxModels = .1 * n > 10 ? .1 * n : n;
validationInit();
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool exitCondition = false;
unordered_set<int> featuresUsed;
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double delta = 1.0;
double threshold = 1e-4;
int tolerance = 5; // number of times the accuracy can be lower than the threshold
int count = 0; // number of times the accuracy is lower than the threshold
fitted = true; // to enable predict
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == maxModels
// epsiolon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing
while (!exitCondition) {
// Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
unique_ptr<Classifier> model;
auto feature = featureSelection[0];
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
bool found = false;
for (auto feat : featureSelection) {
if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
continue;
}
found = true;
feature = feat;
break;
}
if (!found) {
exitCondition = true;
continue;
}
}
featuresUsed.insert(feature);
model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
auto ypred = model->predict(X_train);
// Step 3.1: Compute the classifier amout of say
auto mask_wrong = ypred != y_train;
auto mask_right = ypred == y_train;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double epsilon_t = masked_weights.sum().item<double>();
double wt = (1 - epsilon_t) / epsilon_t;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
weights_ = weights_ / totalWeights;
// Step 3.4: Store classifier and its accuracy to weigh its future vote
models.push_back(std::move(model));
significanceModels.push_back(alpha_t);
n_models++;
if (convergence) {
auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) {
priorAccuracy = accuracy;
} else {
delta = accuracy - priorAccuracy;
}
if (delta < threshold) {
count++;
}
}
exitCondition = n_models == maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance;
}
if (featuresUsed.size() != features.size()) {
status = WARNING;
}
}
vector<string> BoostAODE::graph(const string& title) const
{
return Ensemble::graph(title);
}
}

View File

@@ -1,25 +0,0 @@
#ifndef BOOSTAODE_H
#define BOOSTAODE_H
#include "Ensemble.h"
#include "SPODE.h"
namespace bayesnet {
class BoostAODE : public Ensemble {
public:
BoostAODE();
virtual ~BoostAODE() {};
vector<string> graph(const string& title = "BoostAODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override;
private:
torch::Tensor dataset_;
torch::Tensor X_train, y_train, X_test, y_test;
void validationInit();
bool repeatSparent = false;
int maxModels = 0;
bool ascending = false; //Process KBest features ascending or descending order
bool convergence = false; //if true, stop when the model does not improve
};
}
#endif

View File

@@ -1,9 +0,0 @@
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,56 +0,0 @@
#ifndef CLASSIFIER_H
#define CLASSIFIER_H
#include <torch/torch.h>
#include "BaseClassifier.h"
#include "Network.h"
#include "BayesMetrics.h"
using namespace std;
using namespace torch;
namespace bayesnet {
class Classifier : public BaseClassifier {
private:
Classifier& build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights);
protected:
bool fitted;
int m, n; // m: number of samples, n: number of features
Network model;
Metrics metrics;
vector<string> features;
string className;
map<string, vector<int>> states;
Tensor dataset; // (n+1)xm tensor
status_t status = NORMAL;
void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override;
void checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters);
void buildDataset(torch::Tensor& y);
public:
Classifier(Network model);
virtual ~Classifier() = default;
Classifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) override;
void addNodes();
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
Tensor predict(Tensor& X) override;
status_t getStatus() const override { return status; }
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
vector<string> show() const override;
vector<string> topological_order() override;
void dump_cpt() const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
};
}
#endif

View File

@@ -1,142 +0,0 @@
#include "Ensemble.h"
namespace bayesnet {
using namespace torch;
Ensemble::Ensemble() : Classifier(Network()), n_models(0) {}
void Ensemble::trainModel(const torch::Tensor& weights)
{
n_models = models.size();
for (auto i = 0; i < n_models; ++i) {
// fit with vectors
models[i]->fit(dataset, features, className, states);
}
}
vector<int> Ensemble::voting(Tensor& y_pred)
{
auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final;
int numClasses = states.at(className).size();
// y_pred is m x n_models with the prediction of every model for each sample
for (int i = 0; i < y_pred.size(0); ++i) {
// votes store in each index (value of class) the significance added by each model
// i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
vector<double> votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
votes[y_pred_[i][j]] += significanceModels.at(j);
}
// argsort in descending order
auto indices = argsort(votes);
y_pred_final.push_back(indices[0]);
}
return y_pred_final;
}
Tensor Ensemble::predict(Tensor& X)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
}
Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32);
auto threads{ vector<thread>() };
mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(thread([&, i]() {
auto ypredict = models[i]->predict(X);
lock_guard<mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict);
}));
}
for (auto& thread : threads) {
thread.join();
}
return torch::tensor(voting(y_pred));
}
vector<int> Ensemble::predict(vector<vector<int>>& X)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
}
long m_ = X[0].size();
long n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
}
Tensor y_pred = torch::zeros({ m_, n_models }, kInt32);
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt32));
}
return voting(y_pred);
}
float Ensemble::score(Tensor& X, Tensor& y)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
}
auto y_pred = predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(0); ++i) {
if (y_pred[i].item<int>() == y[i].item<int>()) {
correct++;
}
}
return (double)correct / y_pred.size(0);
}
float Ensemble::score(vector<vector<int>>& X, vector<int>& y)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
}
auto y_pred = predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == y[i]) {
correct++;
}
}
return (double)correct / y_pred.size();
}
vector<string> Ensemble::show() const
{
auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->show();
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
vector<string> Ensemble::graph(const string& title) const
{
auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->graph(title + "_" + to_string(i));
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
int Ensemble::getNumberOfNodes() const
{
int nodes = 0;
for (auto i = 0; i < n_models; ++i) {
nodes += models[i]->getNumberOfNodes();
}
return nodes;
}
int Ensemble::getNumberOfEdges() const
{
int edges = 0;
for (auto i = 0; i < n_models; ++i) {
edges += models[i]->getNumberOfEdges();
}
return edges;
}
int Ensemble::getNumberOfStates() const
{
int nstates = 0;
for (auto i = 0; i < n_models; ++i) {
nstates += models[i]->getNumberOfStates();
}
return nstates;
}
}

View File

@@ -1,41 +0,0 @@
#ifndef ENSEMBLE_H
#define ENSEMBLE_H
#include <torch/torch.h>
#include "Classifier.h"
#include "BayesMetrics.h"
#include "bayesnetUtils.h"
using namespace std;
using namespace torch;
namespace bayesnet {
class Ensemble : public Classifier {
private:
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
protected:
unsigned n_models;
vector<unique_ptr<Classifier>> models;
vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override;
vector<int> voting(Tensor& y_pred);
public:
Ensemble();
virtual ~Ensemble() = default;
Tensor predict(Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
vector<string> show() const override;
vector<string> graph(const string& title) const override;
vector<string> topological_order() override
{
return vector<string>();
}
void dump_cpt() const override
{
}
};
}
#endif

View File

@@ -1,19 +0,0 @@
#ifndef KDBLD_H
#define KDBLD_H
#include "KDB.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class KDBLd : public KDB, public Proposal {
private:
public:
explicit KDBLd(int k);
virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !KDBLD_H

View File

@@ -1,35 +0,0 @@
#ifndef MST_H
#define MST_H
#include <torch/torch.h>
#include <vector>
#include <string>
namespace bayesnet {
using namespace std;
using namespace torch;
class MST {
private:
Tensor weights;
vector<string> features;
int root = 0;
public:
MST() = default;
MST(const vector<string>& features, const Tensor& weights, const int root);
vector<pair<int, int>> maximumSpanningTree();
};
class Graph {
private:
int V; // number of nodes in graph
vector <pair<float, pair<int, int>>> G; // vector for graph
vector <pair<float, pair<int, int>>> T; // vector for mst
vector<int> parent;
public:
explicit Graph(int V);
void addEdge(int u, int v, float wt);
int find_set(int i);
void union_set(int u, int v);
void kruskal_algorithm();
void display_mst();
vector <pair<float, pair<int, int>>> get_mst() { return T; }
};
}
#endif

View File

@@ -1,59 +0,0 @@
#ifndef NETWORK_H
#define NETWORK_H
#include "Node.h"
#include <map>
#include <vector>
namespace bayesnet {
class Network {
private:
map<string, unique_ptr<Node>> nodes;
bool fitted;
float maxThreads = 0.95;
int classNumStates;
vector<string> features; // Including classname
string className;
double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
vector<double> predict_sample(const vector<int>&);
vector<double> predict_sample(const torch::Tensor&);
vector<double> exactInference(map<string, int>&);
double computeFactor(map<string, int>&);
void completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights);
void setStates(const map<string, vector<int>>&);
public:
Network();
explicit Network(float);
explicit Network(Network&);
~Network() = default;
torch::Tensor& getSamples();
float getmaxThreads();
void addNode(const string&);
void addEdge(const string&, const string&);
map<string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures() const;
int getStates() const;
vector<pair<string, string>> getEdges() const;
int getNumEdges() const;
int getClassNumStates() const;
string getClassName() const;
void fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
vector<int> predict(const vector<vector<int>>&); // Return mx1 vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
vector<vector<double>> predict_proba(const vector<vector<int>>&); // Return mxn vector of probabilities
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const vector<vector<int>>&, const vector<int>&);
vector<string> topological_sort();
vector<string> show() const;
vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format
void initialize();
void dump_cpt() const;
inline string version() { return "0.2.0"; }
};
}
#endif

View File

@@ -1,37 +0,0 @@
#ifndef NODE_H
#define NODE_H
#include <torch/torch.h>
#include <unordered_set>
#include <vector>
#include <string>
namespace bayesnet {
using namespace std;
class Node {
private:
string name;
vector<Node*> parents;
vector<Node*> children;
int numStates; // number of states of the variable
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
vector<int64_t> dimensions; // dimensions of the cpTable
public:
vector<pair<string, string>> combinations(const vector<string>&);
explicit Node(const string&);
void clear();
void addParent(Node*);
void addChild(Node*);
void removeParent(Node*);
void removeChild(Node*);
string getName() const;
vector<Node*>& getParents();
vector<Node*>& getChildren();
torch::Tensor& getCPT();
void computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
int getNumStates() const;
void setNumStates(int);
unsigned minFill();
vector<string> graph(const string& clasName); // Returns a vector of strings representing the graph in graphviz format
float getFactorValue(map<string, int>&);
};
}
#endif

View File

@@ -1,19 +0,0 @@
#ifndef SPODELD_H
#define SPODELD_H
#include "SPODE.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class SPODELd : public SPODE, public Proposal {
public:
explicit SPODELd(int root);
virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
SPODELd& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !SPODELD_H

View File

@@ -1,19 +0,0 @@
#ifndef TANLD_H
#define TANLD_H
#include "TAN.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class TANLd : public TAN, public Proposal {
private:
public:
TANLd();
virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !TANLD_H

View File

@@ -1,27 +0,0 @@
#include "bayesnetUtils.h"
namespace bayesnet {
using namespace std;
using namespace torch;
// Return the indices in descending order
vector<int> argsort(vector<double>& nums)
{
int n = nums.size();
vector<int> indices(n);
iota(indices.begin(), indices.end(), 0);
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
return indices;
}
vector<vector<int>> tensorToVector(Tensor& tensor)
{
// convert mxn tensor to nxm vector
vector<vector<int>> result;
// Iterate over cols
for (int i = 0; i < tensor.size(1); ++i) {
auto col_tensor = tensor.index({ "...", i });
auto col = vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
result.push_back(col);
}
return result;
}
}

View File

@@ -1,11 +0,0 @@
#ifndef BAYESNET_UTILS_H
#define BAYESNET_UTILS_H
#include <torch/torch.h>
#include <vector>
namespace bayesnet {
using namespace std;
using namespace torch;
vector<int> argsort(vector<double>& nums);
vector<vector<int>> tensorToVector(Tensor& tensor);
}
#endif //BAYESNET_UTILS_H

230
src/BoostAODE.cc Normal file
View File

@@ -0,0 +1,230 @@
#include <set>
#include <functional>
#include <limits.h>
#include "BoostAODE.h"
#include "CFS.h"
#include "FCBF.h"
#include "IWSS.h"
#include "folding.hpp"
namespace bayesnet {
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
{
validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance", "predict_voting" };
}
void BoostAODE::buildModel(const torch::Tensor& weights)
{
// Models shall be built in trainModel
models.clear();
significanceModels.clear();
n_models = 0;
// Prepare the validation dataset
auto y_ = dataset.index({ -1, "..." });
if (convergence) {
// Prepare train & validation sets from train data
auto fold = folding::StratifiedKFold(5, y_, 271);
dataset_ = torch::clone(dataset);
// save input dataset
auto [train, test] = fold.getFold(0);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
// Get train and validation sets
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
y_train = dataset.index({ -1, train_t });
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
y_test = dataset.index({ -1, test_t });
dataset = X_train;
m = X_train.size(1);
auto n_classes = states.at(className).size();
metrics = Metrics(dataset, features, className, n_classes);
// Build dataset with train data
buildDataset(y_train);
} else {
// Use all data to train
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
y_train = y_;
}
}
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
hyperparameters.erase("repeatSparent");
}
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
hyperparameters.erase("maxModels");
}
if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"];
hyperparameters.erase("ascending");
}
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence");
}
if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold");
}
if (hyperparameters.contains("tolerance")) {
tolerance = hyperparameters["tolerance"];
hyperparameters.erase("tolerance");
}
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
if (hyperparameters.contains("select_features")) {
auto selectedAlgorithm = hyperparameters["select_features"];
std::vector<std::string> algos = { "IWSS", "FCBF", "CFS" };
selectFeatures = true;
algorithm = selectedAlgorithm;
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
throw std::invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]");
}
hyperparameters.erase("select_features");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
}
}
std::unordered_set<int> BoostAODE::initializeModels()
{
std::unordered_set<int> featuresUsed;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
int maxFeatures = 0;
if (algorithm == "CFS") {
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
} else if (algorithm == "IWSS") {
if (threshold < 0 || threshold >0.5) {
throw std::invalid_argument("Invalid threshold value for IWSS [0, 0.5]");
}
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
} else if (algorithm == "FCBF") {
if (threshold < 1e-7 || threshold > 1) {
throw std::invalid_argument("Invalid threshold value [1e-7, 1]");
}
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
}
featureSelector->fit();
auto cfsFeatures = featureSelector->getFeatures();
for (const int& feature : cfsFeatures) {
// std::cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << std::endl;
featuresUsed.insert(feature);
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
models.push_back(std::move(model));
significanceModels.push_back(1.0);
n_models++;
}
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + algorithm);
delete featureSelector;
return featuresUsed;
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
fitted = true;
// Algorithm based on the adaboost algorithm for classification
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
std::unordered_set<int> featuresUsed;
if (selectFeatures) {
featuresUsed = initializeModels();
}
bool resetMaxModels = false;
if (maxModels == 0) {
maxModels = .1 * n > 10 ? .1 * n : n;
resetMaxModels = true; // Flag to unset maxModels
}
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool exitCondition = false;
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double delta = 1.0;
double convergence_threshold = 1e-4;
int count = 0; // number of times the accuracy is lower than the convergence_threshold
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == maxModels
// epsilon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing
while (!exitCondition) {
// Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
std::unique_ptr<Classifier> model;
auto feature = featureSelection[0];
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
bool used = true;
for (const auto& feat : featureSelection) {
if (std::find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
continue;
}
used = false;
feature = feat;
break;
}
if (used) {
exitCondition = true;
continue;
}
}
model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
auto ypred = model->predict(X_train);
// Step 3.1: Compute the classifier amout of say
auto mask_wrong = ypred != y_train;
auto mask_right = ypred == y_train;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double epsilon_t = masked_weights.sum().item<double>();
if (epsilon_t > 0.5) {
// Inverse the weights policy (plot ln(wt))
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
// learner is better than random guess" (Zhi-Hua Zhou, 2012)
break;
}
double wt = (1 - epsilon_t) / epsilon_t;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
weights_ = weights_ / totalWeights;
// Step 3.4: Store classifier and its accuracy to weigh its future vote
featuresUsed.insert(feature);
models.push_back(std::move(model));
significanceModels.push_back(alpha_t);
n_models++;
if (convergence) {
auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) {
priorAccuracy = accuracy;
} else {
delta = accuracy - priorAccuracy;
}
if (delta < convergence_threshold) {
count++;
}
priorAccuracy = accuracy;
}
exitCondition = n_models >= maxModels && repeatSparent || count > tolerance;
}
if (featuresUsed.size() != features.size()) {
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
status = WARNING;
}
notes.push_back("Number of models: " + std::to_string(n_models));
if (resetMaxModels) {
maxModels = 0;
}
}
std::vector<std::string> BoostAODE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}
}

33
src/BoostAODE.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef BOOSTAODE_H
#define BOOSTAODE_H
#include "Ensemble.h"
#include <map>
#include "SPODE.h"
#include "FeatureSelect.h"
namespace bayesnet {
class BoostAODE : public Ensemble {
public:
BoostAODE(bool predict_voting = true);
virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override;
private:
torch::Tensor dataset_;
torch::Tensor X_train, y_train, X_test, y_test;
std::unordered_set<int> initializeModels();
// Hyperparameters
bool repeatSparent = false; // if true, a feature can be selected more than once
int maxModels = 0;
int tolerance = 0;
bool ascending = false; //Process KBest features ascending or descending order
bool convergence = false; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection
std::string algorithm = ""; // Selected feature selection algorithm
FeatureSelect* featureSelector = nullptr;
double threshold = -1;
};
}
#endif

72
src/CFS.cc Normal file
View File

@@ -0,0 +1,72 @@
#include "CFS.h"
#include <limits>
#include "bayesnetUtils.h"
namespace bayesnet {
void CFS::fit()
{
initialize();
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto continueCondition = true;
auto feature = featureOrder[0];
selectedFeatures.push_back(feature);
selectedScores.push_back(suLabels[feature]);
selectedFeatures.erase(selectedFeatures.begin());
while (continueCondition) {
double merit = std::numeric_limits<double>::lowest();
int bestFeature = -1;
for (auto feature : featureOrder) {
selectedFeatures.push_back(feature);
// Compute merit with selectedFeatures
auto meritNew = computeMeritCFS();
if (meritNew > merit) {
merit = meritNew;
bestFeature = feature;
}
selectedFeatures.pop_back();
}
if (bestFeature == -1) {
// meritNew has to be nan due to constant features
break;
}
selectedFeatures.push_back(bestFeature);
selectedScores.push_back(merit);
featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), bestFeature), featureOrder.end());
continueCondition = computeContinueCondition(featureOrder);
}
fitted = true;
}
bool CFS::computeContinueCondition(const std::vector<int>& featureOrder)
{
if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) {
return false;
}
if (selectedScores.size() >= 5) {
/*
"To prevent the best first search from exploring the entire
feature subset search space, a stopping criterion is imposed.
The search will terminate if five consecutive fully expanded
subsets show no improvement over the current best subset."
as stated in Mark A.Hall Thesis
*/
double item_ant = std::numeric_limits<double>::lowest();
int num = 0;
std::vector<double> lastFive(selectedScores.end() - 5, selectedScores.end());
for (auto item : lastFive) {
if (item_ant == std::numeric_limits<double>::lowest()) {
item_ant = item;
}
if (item > item_ant) {
break;
} else {
num++;
item_ant = item;
}
}
if (num == 5) {
return false;
}
}
return true;
}
}

20
src/CFS.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef CFS_H
#define CFS_H
#include <torch/torch.h>
#include <vector>
#include "FeatureSelect.h"
namespace bayesnet {
class CFS : public FeatureSelect {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
CFS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights)
{
}
virtual ~CFS() {};
void fit() override;
private:
bool computeContinueCondition(const std::vector<int>& featureOrder);
};
}
#endif

13
src/CMakeLists.txt Normal file
View File

@@ -0,0 +1,13 @@
include_directories(
${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR}/src
${CMAKE_BINARY_DIR}/configured_files/include
)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc )
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -2,10 +2,9 @@
#include "bayesnetUtils.h"
namespace bayesnet {
using namespace torch;
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
Classifier& Classifier::build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights)
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
this->features = features;
this->className = className;
@@ -21,7 +20,7 @@ namespace bayesnet {
fitted = true;
return *this;
}
void Classifier::buildDataset(Tensor& ytmp)
void Classifier::buildDataset(torch::Tensor& ytmp)
{
try {
auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
@@ -29,8 +28,8 @@ namespace bayesnet {
}
catch (const std::exception& e) {
std::cerr << e.what() << '\n';
cout << "X dimensions: " << dataset.sizes() << "\n";
cout << "y dimensions: " << ytmp.sizes() << "\n";
std::cout << "X dimensions: " << dataset.sizes() << "\n";
std::cout << "y dimensions: " << ytmp.sizes() << "\n";
exit(1);
}
}
@@ -39,7 +38,7 @@ namespace bayesnet {
model.fit(dataset, weights, features, className, states);
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states)
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
dataset = X;
buildDataset(y);
@@ -47,24 +46,24 @@ namespace bayesnet {
return build(features, className, states, weights);
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states)
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, kInt32);
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
for (int i = 0; i < X.size(); ++i) {
dataset.index_put_({ i, "..." }, torch::tensor(X[i], kInt32));
dataset.index_put_({ i, "..." }, torch::tensor(X[i], torch::kInt32));
}
auto ytmp = torch::tensor(y, kInt32);
auto ytmp = torch::tensor(y, torch::kInt32);
buildDataset(ytmp);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states)
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights)
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
this->dataset = dataset;
return build(features, className, states, weights);
@@ -72,57 +71,76 @@ namespace bayesnet {
void Classifier::checkFitParameters()
{
if (torch::is_floating_point(dataset)) {
throw invalid_argument("dataset (X, y) must be of type Integer");
throw std::invalid_argument("dataset (X, y) must be of type Integer");
}
if (n != features.size()) {
throw invalid_argument("Classifier: X " + to_string(n) + " and features " + to_string(features.size()) + " must have the same number of features");
throw std::invalid_argument("Classifier: X " + std::to_string(n) + " and features " + std::to_string(features.size()) + " must have the same number of features");
}
if (states.find(className) == states.end()) {
throw invalid_argument("className not found in states");
throw std::invalid_argument("className not found in states");
}
for (auto feature : features) {
if (states.find(feature) == states.end()) {
throw invalid_argument("feature [" + feature + "] not found in states");
throw std::invalid_argument("feature [" + feature + "] not found in states");
}
}
}
Tensor Classifier::predict(Tensor& X)
torch::Tensor Classifier::predict(torch::Tensor& X)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error(CLASSIFIER_NOT_FITTED);
}
return model.predict(X);
}
vector<int> Classifier::predict(vector<vector<int>>& X)
std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error(CLASSIFIER_NOT_FITTED);
}
auto m_ = X[0].size();
auto n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
}
auto yp = model.predict(Xd);
return yp;
}
float Classifier::score(Tensor& X, Tensor& y)
torch::Tensor Classifier::predict_proba(torch::Tensor& X)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error(CLASSIFIER_NOT_FITTED);
}
Tensor y_pred = predict(X);
return model.predict_proba(X);
}
std::vector<std::vector<double>> Classifier::predict_proba(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw std::logic_error(CLASSIFIER_NOT_FITTED);
}
auto m_ = X[0].size();
auto n_ = X.size();
std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
// Convert to nxm vector
for (auto i = 0; i < n_; i++) {
Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
}
auto yp = model.predict_proba(Xd);
return yp;
}
float Classifier::score(torch::Tensor& X, torch::Tensor& y)
{
torch::Tensor y_pred = predict(X);
return (y_pred == y).sum().item<float>() / y.size(0);
}
float Classifier::score(vector<vector<int>>& X, vector<int>& y)
float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error(CLASSIFIER_NOT_FITTED);
}
return model.score(X, y);
}
vector<string> Classifier::show() const
std::vector<std::string> Classifier::show() const
{
return model.show();
}
@@ -137,7 +155,7 @@ namespace bayesnet {
int Classifier::getNumberOfNodes() const
{
// Features does not include class
return fitted ? model.getFeatures().size() + 1 : 0;
return fitted ? model.getFeatures().size() : 0;
}
int Classifier::getNumberOfEdges() const
{
@@ -147,7 +165,11 @@ namespace bayesnet {
{
return fitted ? model.getStates() : 0;
}
vector<string> Classifier::topological_order()
int Classifier::getClassNumStates() const
{
return fitted ? model.getClassNumStates() : 0;
}
std::vector<std::string> Classifier::topological_order()
{
return model.topological_sort();
}
@@ -155,18 +177,8 @@ namespace bayesnet {
{
model.dump_cpt();
}
void Classifier::checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters)
void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
void Classifier::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const vector<string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
//For classifiers that don't have hyperparameters
}
}

59
src/Classifier.h Normal file
View File

@@ -0,0 +1,59 @@
#ifndef CLASSIFIER_H
#define CLASSIFIER_H
#include <torch/torch.h>
#include "BaseClassifier.h"
#include "Network.h"
#include "BayesMetrics.h"
namespace bayesnet {
class Classifier : public BaseClassifier {
public:
Classifier(Network model);
virtual ~Classifier() = default;
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override;
void addNodes();
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
int getClassNumStates() const override;
torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
torch::Tensor predict_proba(torch::Tensor& X) override;
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
status_t getStatus() const override { return status; }
std::string getVersion() override { return { project_version.begin(), project_version.end() }; };
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
std::vector<std::string> show() const override;
std::vector<std::string> topological_order() override;
std::vector<std::string> getNotes() const override { return notes; }
void dump_cpt() const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
protected:
bool fitted;
unsigned int m, n; // m: number of samples, n: number of features
Network model;
Metrics metrics;
std::vector<std::string> features;
std::string className;
std::map<std::string, std::vector<int>> states;
torch::Tensor dataset; // (n+1)xm tensor
status_t status = NORMAL;
std::vector<std::string> notes; // Used to store messages occurred during the fit process
void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override;
void buildDataset(torch::Tensor& y);
private:
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
};
}
#endif

216
src/Ensemble.cc Normal file
View File

@@ -0,0 +1,216 @@
#include "Ensemble.h"
namespace bayesnet {
Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting)
{
};
const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted";
void Ensemble::trainModel(const torch::Tensor& weights)
{
n_models = models.size();
for (auto i = 0; i < n_models; ++i) {
// fit with std::vectors
models[i]->fit(dataset, features, className, states);
}
}
std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X)
{
std::vector<int> y_pred;
for (auto i = 0; i < X.size(); ++i) {
auto max = std::max_element(X[i].begin(), X[i].end());
y_pred.push_back(std::distance(X[i].begin(), max));
}
return y_pred;
}
torch::Tensor Ensemble::compute_arg_max(torch::Tensor& X)
{
auto y_pred = torch::argmax(X, 1);
return y_pred;
}
torch::Tensor Ensemble::voting(torch::Tensor& votes)
{
// Convert m x n_models tensor to a m x n_class_states with voting probabilities
auto y_pred_ = votes.accessor<int, 2>();
std::vector<int> y_pred_final;
int numClasses = states.at(className).size();
// votes is m x n_models with the prediction of every model for each sample
auto result = torch::zeros({ votes.size(0), numClasses }, torch::kFloat32);
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
for (int i = 0; i < votes.size(0); ++i) {
// n_votes store in each index (value of class) the significance added by each model
// i.e. n_votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
std::vector<double> n_votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
n_votes[y_pred_[i][j]] += significanceModels.at(j);
}
result[i] = torch::tensor(n_votes);
}
// To only do one division and gain precision
result /= sum;
return result;
}
std::vector<std::vector<double>> Ensemble::predict_proba(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw std::logic_error(ENSEMBLE_NOT_FITTED);
}
return predict_voting ? predict_average_voting(X) : predict_average_proba(X);
}
torch::Tensor Ensemble::predict_proba(torch::Tensor& X)
{
if (!fitted) {
throw std::logic_error(ENSEMBLE_NOT_FITTED);
}
return predict_voting ? predict_average_voting(X) : predict_average_proba(X);
}
std::vector<int> Ensemble::predict(std::vector<std::vector<int>>& X)
{
auto res = predict_proba(X);
return compute_arg_max(res);
}
torch::Tensor Ensemble::predict(torch::Tensor& X)
{
auto res = predict_proba(X);
return compute_arg_max(res);
}
torch::Tensor Ensemble::predict_average_proba(torch::Tensor& X)
{
auto n_states = models[0]->getClassNumStates();
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict_proba(X);
std::lock_guard<std::mutex> lock(mtx);
y_pred += ypredict * significanceModels[i];
}));
}
for (auto& thread : threads) {
thread.join();
}
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
y_pred /= sum;
return y_pred;
}
std::vector<std::vector<double>> Ensemble::predict_average_proba(std::vector<std::vector<int>>& X)
{
auto n_states = models[0]->getClassNumStates();
std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0));
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict_proba(X);
assert(ypredict.size() == y_pred.size());
assert(ypredict[0].size() == y_pred[0].size());
std::lock_guard<std::mutex> lock(mtx);
// Multiply each prediction by the significance of the model and then add it to the final prediction
for (auto j = 0; j < ypredict.size(); ++j) {
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
}
}));
}
for (auto& thread : threads) {
thread.join();
}
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
//Divide each element of the prediction by the sum of the significances
for (auto j = 0; j < y_pred.size(); ++j) {
std::transform(y_pred[j].begin(), y_pred[j].end(), y_pred[j].begin(), [sum](double x) { return x / sum; });
}
return y_pred;
}
std::vector<std::vector<double>> Ensemble::predict_average_voting(std::vector<std::vector<int>>& X)
{
torch::Tensor Xt = bayesnet::vectorToTensor(X, false);
auto y_pred = predict_average_voting(Xt);
std::vector<std::vector<double>> result = tensorToVectorDouble(y_pred);
return result;
}
torch::Tensor Ensemble::predict_average_voting(torch::Tensor& X)
{
// Build a m x n_models tensor with the predictions of each model
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict(X);
std::lock_guard<std::mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict);
}));
}
for (auto& thread : threads) {
thread.join();
}
return voting(y_pred);
}
float Ensemble::score(torch::Tensor& X, torch::Tensor& y)
{
auto y_pred = predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(0); ++i) {
if (y_pred[i].item<int>() == y[i].item<int>()) {
correct++;
}
}
return (double)correct / y_pred.size(0);
}
float Ensemble::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{
auto y_pred = predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == y[i]) {
correct++;
}
}
return (double)correct / y_pred.size();
}
std::vector<std::string> Ensemble::show() const
{
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->show();
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
std::vector<std::string> Ensemble::graph(const std::string& title) const
{
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->graph(title + "_" + std::to_string(i));
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
int Ensemble::getNumberOfNodes() const
{
int nodes = 0;
for (auto i = 0; i < n_models; ++i) {
nodes += models[i]->getNumberOfNodes();
}
return nodes;
}
int Ensemble::getNumberOfEdges() const
{
int edges = 0;
for (auto i = 0; i < n_models; ++i) {
edges += models[i]->getNumberOfEdges();
}
return edges;
}
int Ensemble::getNumberOfStates() const
{
int nstates = 0;
for (auto i = 0; i < n_models; ++i) {
nstates += models[i]->getNumberOfStates();
}
return nstates;
}
}

46
src/Ensemble.h Normal file
View File

@@ -0,0 +1,46 @@
#ifndef ENSEMBLE_H
#define ENSEMBLE_H
#include <torch/torch.h>
#include "Classifier.h"
#include "BayesMetrics.h"
#include "bayesnetUtils.h"
namespace bayesnet {
class Ensemble : public Classifier {
public:
Ensemble(bool predict_voting = true);
virtual ~Ensemble() = default;
torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
torch::Tensor predict_proba(torch::Tensor& X) override;
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
std::vector<std::string> show() const override;
std::vector<std::string> graph(const std::string& title) const override;
std::vector<std::string> topological_order() override
{
return std::vector<std::string>();
}
void dump_cpt() const override
{
}
protected:
torch::Tensor predict_average_voting(torch::Tensor& X);
std::vector<std::vector<double>> predict_average_voting(std::vector<std::vector<int>>& X);
torch::Tensor predict_average_proba(torch::Tensor& X);
std::vector<std::vector<double>> predict_average_proba(std::vector<std::vector<int>>& X);
torch::Tensor compute_arg_max(torch::Tensor& X);
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X);
torch::Tensor voting(torch::Tensor& votes);
unsigned n_models;
std::vector<std::unique_ptr<Classifier>> models;
std::vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override;
bool predict_voting;
};
}
#endif

44
src/FCBF.cc Normal file
View File

@@ -0,0 +1,44 @@
#include "bayesnetUtils.h"
#include "FCBF.h"
namespace bayesnet {
FCBF::FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
{
if (threshold < 1e-7) {
throw std::invalid_argument("Threshold cannot be less than 1e-7");
}
}
void FCBF::fit()
{
initialize();
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto featureOrderCopy = featureOrder;
for (const auto& feature : featureOrder) {
// Don't self compare
featureOrderCopy.erase(featureOrderCopy.begin());
if (suLabels.at(feature) == 0.0) {
// The feature has been removed from the list
continue;
}
if (suLabels.at(feature) < threshold) {
break;
}
// Remove redundant features
for (const auto& featureCopy : featureOrderCopy) {
double value = computeSuFeatures(feature, featureCopy);
if (value >= suLabels.at(featureCopy)) {
// Remove feature from list
suLabels[featureCopy] = 0.0;
}
}
selectedFeatures.push_back(feature);
selectedScores.push_back(suLabels[feature]);
if (selectedFeatures.size() == maxFeatures) {
break;
}
}
fitted = true;
}
}

17
src/FCBF.h Normal file
View File

@@ -0,0 +1,17 @@
#ifndef FCBF_H
#define FCBF_H
#include <torch/torch.h>
#include <vector>
#include "FeatureSelect.h"
namespace bayesnet {
class FCBF : public FeatureSelect {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
virtual ~FCBF() {};
void fit() override;
private:
double threshold = -1;
};
}
#endif

79
src/FeatureSelect.cc Normal file
View File

@@ -0,0 +1,79 @@
#include "FeatureSelect.h"
#include <limits>
#include "bayesnetUtils.h"
namespace bayesnet {
FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
{
}
void FeatureSelect::initialize()
{
selectedFeatures.clear();
selectedScores.clear();
}
double FeatureSelect::symmetricalUncertainty(int a, int b)
{
/*
Compute symmetrical uncertainty. Normalize* information gain (mutual
information) with the entropies of the features in order to compensate
the bias due to high cardinality features. *Range [0, 1]
(https://www.sciencedirect.com/science/article/pii/S0020025519303603)
*/
auto x = samples.index({ a, "..." });
auto y = samples.index({ b, "..." });
auto mu = mutualInformation(x, y, weights);
auto hx = entropy(x, weights);
auto hy = entropy(y, weights);
return 2.0 * mu / (hx + hy);
}
void FeatureSelect::computeSuLabels()
{
// Compute Simmetrical Uncertainty between features and labels
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
for (int i = 0; i < features.size(); ++i) {
suLabels.push_back(symmetricalUncertainty(i, -1));
}
}
double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature)
{
// Compute Simmetrical Uncertainty between features
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
try {
return suFeatures.at({ firstFeature, secondFeature });
}
catch (const std::out_of_range& e) {
double result = symmetricalUncertainty(firstFeature, secondFeature);
suFeatures[{firstFeature, secondFeature}] = result;
return result;
}
}
double FeatureSelect::computeMeritCFS()
{
double result;
double rcf = 0;
for (auto feature : selectedFeatures) {
rcf += suLabels[feature];
}
double rff = 0;
int n = selectedFeatures.size();
for (const auto& item : doCombinations(selectedFeatures)) {
rff += computeSuFeatures(item.first, item.second);
}
return rcf / sqrt(n + (n * n - n) * rff);
}
std::vector<int> FeatureSelect::getFeatures() const
{
if (!fitted) {
throw std::runtime_error("FeatureSelect not fitted");
}
return selectedFeatures;
}
std::vector<double> FeatureSelect::getScores() const
{
if (!fitted) {
throw std::runtime_error("FeatureSelect not fitted");
}
return selectedScores;
}
}

30
src/FeatureSelect.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef FEATURE_SELECT_H
#define FEATURE_SELECT_H
#include <torch/torch.h>
#include <vector>
#include "BayesMetrics.h"
namespace bayesnet {
class FeatureSelect : public Metrics {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights);
virtual ~FeatureSelect() {};
virtual void fit() = 0;
std::vector<int> getFeatures() const;
std::vector<double> getScores() const;
protected:
void initialize();
void computeSuLabels();
double computeSuFeatures(const int a, const int b);
double symmetricalUncertainty(int a, int b);
double computeMeritCFS();
const torch::Tensor& weights;
int maxFeatures;
std::vector<int> selectedFeatures;
std::vector<double> selectedScores;
std::vector<double> suLabels;
std::map<std::pair<int, int>, double> suFeatures;
bool fitted = false;
};
}
#endif

47
src/IWSS.cc Normal file
View File

@@ -0,0 +1,47 @@
#include "IWSS.h"
#include <limits>
#include "bayesnetUtils.h"
namespace bayesnet {
IWSS::IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
{
if (threshold < 0 || threshold > .5) {
throw std::invalid_argument("Threshold has to be in [0, 0.5]");
}
}
void IWSS::fit()
{
initialize();
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto featureOrderCopy = featureOrder;
// Add first and second features to result
// First with its own score
auto first_feature = pop_first(featureOrderCopy);
selectedFeatures.push_back(first_feature);
selectedScores.push_back(suLabels.at(first_feature));
// Second with the score of the candidates
selectedFeatures.push_back(pop_first(featureOrderCopy));
auto merit = computeMeritCFS();
selectedScores.push_back(merit);
for (const auto feature : featureOrderCopy) {
selectedFeatures.push_back(feature);
// Compute merit with selectedFeatures
auto meritNew = computeMeritCFS();
double delta = merit != 0.0 ? abs(merit - meritNew) / merit : 0.0;
if (meritNew > merit || delta < threshold) {
if (meritNew > merit) {
merit = meritNew;
}
selectedScores.push_back(meritNew);
} else {
selectedFeatures.pop_back();
break;
}
if (selectedFeatures.size() == maxFeatures) {
break;
}
}
fitted = true;
}
}

17
src/IWSS.h Normal file
View File

@@ -0,0 +1,17 @@
#ifndef IWSS_H
#define IWSS_H
#include <torch/torch.h>
#include <vector>
#include "FeatureSelect.h"
namespace bayesnet {
class IWSS : public FeatureSelect {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
virtual ~IWSS() {};
void fit() override;
private:
double threshold = -1;
};
}
#endif

View File

@@ -1,14 +1,13 @@
#include "KDB.h"
namespace bayesnet {
using namespace torch;
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::setHyperparameters(nlohmann::json& hyperparameters)
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta)
{
validHyperparameters = { "k", "theta" };
}
void KDB::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const vector<string> validKeys = { "k", "theta" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("k")) {
k = hyperparameters["k"];
}
@@ -40,16 +39,16 @@ namespace bayesnet {
// 1. For each feature Xi, compute mutual information, I(X;C),
// where C is the class.
addNodes();
const Tensor& y = dataset.index({ -1, "..." });
vector<double> mi;
const torch::Tensor& y = dataset.index({ -1, "..." });
std::vector<double> mi;
for (auto i = 0; i < features.size(); i++) {
Tensor firstFeature = dataset.index({ i, "..." });
torch::Tensor firstFeature = dataset.index({ i, "..." });
mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
}
// 2. Compute class conditional mutual information I(Xi;XjIC), f or each
auto conditionalEdgeWeights = metrics.conditionalEdge(weights);
// 3. Let the used variable list, S, be empty.
vector<int> S;
std::vector<int> S;
// 4. Let the DAG network being constructed, BN, begin with a single
// class node, C.
// 5. Repeat until S includes all domain features
@@ -67,9 +66,9 @@ namespace bayesnet {
S.push_back(idx);
}
}
void KDB::add_m_edges(int idx, vector<int>& S, Tensor& weights)
void KDB::add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights)
{
auto n_edges = min(k, static_cast<int>(S.size()));
auto n_edges = std::min(k, static_cast<int>(S.size()));
auto cond_w = clone(weights);
bool exit_cond = k == 0;
int num = 0;
@@ -81,7 +80,7 @@ namespace bayesnet {
model.addEdge(features[max_minfo], features[idx]);
num++;
}
catch (const invalid_argument& e) {
catch (const std::invalid_argument& e) {
// Loops are not allowed
}
}
@@ -91,11 +90,11 @@ namespace bayesnet {
exit_cond = num == n_edges || candidates.size(0) == 0;
}
}
vector<string> KDB::graph(const string& title) const
std::vector<std::string> KDB::graph(const std::string& title) const
{
string header{ title };
std::string header{ title };
if (title == "KDB") {
header += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")";
header += " (k=" + std::to_string(k) + ", theta=" + std::to_string(theta) + ")";
}
return model.graph(header);
}

View File

@@ -4,20 +4,18 @@
#include "Classifier.h"
#include "bayesnetUtils.h"
namespace bayesnet {
using namespace std;
using namespace torch;
class KDB : public Classifier {
private:
int k;
float theta;
void add_m_edges(int idx, vector<int>& S, Tensor& weights);
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
protected:
void buildModel(const torch::Tensor& weights) override;
public:
explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {};
void setHyperparameters(nlohmann::json& hyperparameters) override;
vector<string> graph(const string& name = "KDB") const override;
virtual ~KDB() = default;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override;
};
}
#endif

View File

@@ -1,16 +1,15 @@
#include "KDBLd.h"
namespace bayesnet {
using namespace std;
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
@@ -18,12 +17,12 @@ namespace bayesnet {
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor KDBLd::predict(Tensor& X)
torch::Tensor KDBLd::predict(torch::Tensor& X)
{
auto Xt = prepareX(X);
return KDB::predict(Xt);
}
vector<string> KDBLd::graph(const string& name) const
std::vector<std::string> KDBLd::graph(const std::string& name) const
{
return KDB::graph(name);
}

18
src/KDBLd.h Normal file
View File

@@ -0,0 +1,18 @@
#ifndef KDBLD_H
#define KDBLD_H
#include "KDB.h"
#include "Proposal.h"
namespace bayesnet {
class KDBLd : public KDB, public Proposal {
private:
public:
explicit KDBLd(int k);
virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override;
torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };
};
}
#endif // !KDBLD_H

View File

@@ -1,13 +1,13 @@
#include "Mst.h"
#include <vector>
#include <list>
/*
Based on the code from https://www.softwaretestinghelp.com/minimum-spanning-tree-tutorial/
*/
namespace bayesnet {
using namespace std;
Graph::Graph(int V) : V(V), parent(vector<int>(V))
Graph::Graph(int V) : V(V), parent(std::vector<int>(V))
{
for (int i = 0; i < V; i++)
parent[i] = i;
@@ -34,36 +34,45 @@ namespace bayesnet {
void Graph::kruskal_algorithm()
{
// sort the edges ordered on decreasing weight
sort(G.begin(), G.end(), [](const auto& left, const auto& right) {return left.first > right.first;});
stable_sort(G.begin(), G.end(), [](const auto& left, const auto& right) {return left.first > right.first;});
for (int i = 0; i < G.size(); i++) {
int uSt, vEd;
uSt = find_set(G[i].second.first);
vEd = find_set(G[i].second.second);
if (uSt != vEd) {
T.push_back(G[i]); // add to mst vector
T.push_back(G[i]); // add to mst std::vector
union_set(uSt, vEd);
}
}
}
void Graph::display_mst()
{
cout << "Edge :" << " Weight" << endl;
std::cout << "Edge :" << " Weight" << std::endl;
for (int i = 0; i < T.size(); i++) {
cout << T[i].second.first << " - " << T[i].second.second << " : "
std::cout << T[i].second.first << " - " << T[i].second.second << " : "
<< T[i].first;
cout << endl;
std::cout << std::endl;
}
}
vector<pair<int, int>> reorder(vector<pair<float, pair<int, int>>> T, int root_original)
void insertElement(std::list<int>& variables, int variable)
{
auto result = vector<pair<int, int>>();
auto visited = vector<int>();
auto nextVariables = unordered_set<int>();
nextVariables.emplace(root_original);
if (std::find(variables.begin(), variables.end(), variable) == variables.end()) {
variables.push_front(variable);
}
}
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
{
// Create the edges of a DAG from the MST
// replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted
auto result = std::vector<std::pair<int, int>>();
auto visited = std::vector<int>();
auto nextVariables = std::list<int>();
nextVariables.push_front(root_original);
while (nextVariables.size() > 0) {
int root = *nextVariables.begin();
nextVariables.erase(nextVariables.begin());
int root = nextVariables.front();
nextVariables.pop_front();
for (int i = 0; i < T.size(); ++i) {
auto [weight, edge] = T[i];
auto [from, to] = edge;
@@ -71,10 +80,10 @@ namespace bayesnet {
visited.insert(visited.begin(), i);
if (from == root) {
result.push_back({ from, to });
nextVariables.emplace(to);
insertElement(nextVariables, to);
} else {
result.push_back({ to, from });
nextVariables.emplace(from);
insertElement(nextVariables, from);
}
}
}
@@ -94,12 +103,11 @@ namespace bayesnet {
return result;
}
MST::MST(const vector<string>& features, const Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
vector<pair<int, int>> MST::maximumSpanningTree()
MST::MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
std::vector<std::pair<int, int>> MST::maximumSpanningTree()
{
auto num_features = features.size();
Graph g(num_features);
// Make a complete graph
for (int i = 0; i < num_features - 1; ++i) {
for (int j = i + 1; j < num_features; ++j) {

33
src/Mst.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef MST_H
#define MST_H
#include <torch/torch.h>
#include <vector>
#include <string>
namespace bayesnet {
class MST {
private:
torch::Tensor weights;
std::vector<std::string> features;
int root = 0;
public:
MST() = default;
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
std::vector<std::pair<int, int>> maximumSpanningTree();
};
class Graph {
private:
int V; // number of nodes in graph
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
std::vector<int> parent;
public:
explicit Graph(int V);
void addEdge(int u, int v, float wt);
int find_set(int i);
void union_set(int u, int v);
void kruskal_algorithm();
void display_mst();
std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; }
};
}
#endif

View File

@@ -3,18 +3,18 @@
#include "Network.h"
#include "bayesnetUtils.h"
namespace bayesnet {
Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
Network::Network() : features(std::vector<std::string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
Network::Network(float maxT) : features(std::vector<std::string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
getmaxThreads()), fitted(other.fitted)
{
for (const auto& pair : other.nodes) {
nodes[pair.first] = std::make_unique<Node>(*pair.second);
for (const auto& node : other.nodes) {
nodes[node.first] = std::make_unique<Node>(*node.second);
}
}
void Network::initialize()
{
features = vector<string>();
features = std::vector<std::string>();
className = "";
classNumStates = 0;
fitted = false;
@@ -29,10 +29,10 @@ namespace bayesnet {
{
return samples;
}
void Network::addNode(const string& name)
void Network::addNode(const std::string& name)
{
if (name == "") {
throw invalid_argument("Node name cannot be empty");
throw std::invalid_argument("Node name cannot be empty");
}
if (nodes.find(name) != nodes.end()) {
return;
@@ -42,7 +42,7 @@ namespace bayesnet {
}
nodes[name] = std::make_unique<Node>(name);
}
vector<string> Network::getFeatures() const
std::vector<std::string> Network::getFeatures() const
{
return features;
}
@@ -58,11 +58,11 @@ namespace bayesnet {
}
return result;
}
string Network::getClassName() const
std::string Network::getClassName() const
{
return className;
}
bool Network::isCyclic(const string& nodeId, unordered_set<string>& visited, unordered_set<string>& recStack)
bool Network::isCyclic(const std::string& nodeId, std::unordered_set<std::string>& visited, std::unordered_set<std::string>& recStack)
{
if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet
{
@@ -78,78 +78,78 @@ namespace bayesnet {
recStack.erase(nodeId); // remove node from recursion stack before function ends
return false;
}
void Network::addEdge(const string& parent, const string& child)
void Network::addEdge(const std::string& parent, const std::string& child)
{
if (nodes.find(parent) == nodes.end()) {
throw invalid_argument("Parent node " + parent + " does not exist");
throw std::invalid_argument("Parent node " + parent + " does not exist");
}
if (nodes.find(child) == nodes.end()) {
throw invalid_argument("Child node " + child + " does not exist");
throw std::invalid_argument("Child node " + child + " does not exist");
}
// Temporarily add edge to check for cycles
nodes[parent]->addChild(nodes[child].get());
nodes[child]->addParent(nodes[parent].get());
unordered_set<string> visited;
unordered_set<string> recStack;
std::unordered_set<std::string> visited;
std::unordered_set<std::string> recStack;
if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
{
// remove problematic edge
nodes[parent]->removeChild(nodes[child].get());
nodes[child]->removeParent(nodes[parent].get());
throw invalid_argument("Adding this edge forms a cycle in the graph.");
throw std::invalid_argument("Adding this edge forms a cycle in the graph.");
}
}
map<string, std::unique_ptr<Node>>& Network::getNodes()
std::map<std::string, std::unique_ptr<Node>>& Network::getNodes()
{
return nodes;
}
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights)
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
if (weights.size(0) != n_samples) {
throw invalid_argument("Weights (" + to_string(weights.size(0)) + ") must have the same number of elements as samples (" + to_string(n_samples) + ") in Network::fit");
throw std::invalid_argument("Weights (" + std::to_string(weights.size(0)) + ") must have the same number of elements as samples (" + std::to_string(n_samples) + ") in Network::fit");
}
if (n_samples != n_samples_y) {
throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")");
throw std::invalid_argument("X and y must have the same number of samples in Network::fit (" + std::to_string(n_samples) + " != " + std::to_string(n_samples_y) + ")");
}
if (n_features != featureNames.size()) {
throw invalid_argument("X and features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(featureNames.size()) + ")");
throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")");
}
if (n_features != features.size() - 1) {
throw invalid_argument("X and local features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(features.size() - 1) + ")");
throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")");
}
if (find(features.begin(), features.end(), className) == features.end()) {
throw invalid_argument("className not found in Network::features");
throw std::invalid_argument("className not found in Network::features");
}
for (auto& feature : featureNames) {
if (find(features.begin(), features.end(), feature) == features.end()) {
throw invalid_argument("Feature " + feature + " not found in Network::features");
throw std::invalid_argument("Feature " + feature + " not found in Network::features");
}
if (states.find(feature) == states.end()) {
throw invalid_argument("Feature " + feature + " not found in states");
throw std::invalid_argument("Feature " + feature + " not found in states");
}
}
}
void Network::setStates(const map<string, vector<int>>& states)
void Network::setStates(const std::map<std::string, std::vector<int>>& states)
{
// Set states to every Node in the network
for_each(features.begin(), features.end(), [this, &states](const string& feature) {
for_each(features.begin(), features.end(), [this, &states](const std::string& feature) {
nodes.at(feature)->setNumStates(states.at(feature).size());
});
classNumStates = nodes.at(className)->getNumStates();
}
// X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className;
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
torch::Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X , ytmp }, 0);
for (int i = 0; i < featureNames.size(); ++i) {
auto row_feature = X.index({ i, "..." });
}
completeFit(states, weights);
}
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className;
@@ -157,7 +157,7 @@ namespace bayesnet {
completeFit(states, weights);
}
// input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights_, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
@@ -170,11 +170,11 @@ namespace bayesnet {
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(states, weights);
}
void Network::completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights)
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
vector<thread> threads;
std::vector<std::thread> threads;
for (auto& node : nodes) {
threads.emplace_back([this, &node, &weights]() {
node.second->computeCPT(samples, features, laplaceSmoothing, weights);
@@ -188,12 +188,12 @@ namespace bayesnet {
torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict()");
throw std::logic_error("You must call fit() before calling predict()");
}
torch::Tensor result;
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
for (int i = 0; i < samples.size(1); ++i) {
const Tensor sample = samples.index({ "...", i });
const torch::Tensor sample = samples.index({ "...", i });
auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64);
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
@@ -201,36 +201,35 @@ namespace bayesnet {
}
if (proba)
return result;
else
return result.argmax(1);
}
// Return mxn tensor of probabilities
Tensor Network::predict_proba(const Tensor& samples)
torch::Tensor Network::predict_proba(const torch::Tensor& samples)
{
return predict_tensor(samples, true);
}
// Return mxn tensor of probabilities
Tensor Network::predict(const Tensor& samples)
torch::Tensor Network::predict(const torch::Tensor& samples)
{
return predict_tensor(samples, false);
}
// Return mx1 vector of predictions
// tsamples is nxm vector of samples
vector<int> Network::predict(const vector<vector<int>>& tsamples)
// Return mx1 std::vector of predictions
// tsamples is nxm std::vector of samples
std::vector<int> Network::predict(const std::vector<std::vector<int>>& tsamples)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict()");
throw std::logic_error("You must call fit() before calling predict()");
}
vector<int> predictions;
vector<int> sample;
std::vector<int> predictions;
std::vector<int> sample;
for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear();
for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]);
}
vector<double> classProbabilities = predict_sample(sample);
std::vector<double> classProbabilities = predict_sample(sample);
// Find the class with the maximum posterior probability
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
int predictedClass = distance(classProbabilities.begin(), maxElem);
@@ -238,14 +237,15 @@ namespace bayesnet {
}
return predictions;
}
// Return mxn vector of probabilities
vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
// Return mxn std::vector of probabilities
// tsamples is nxm std::vector of samples
std::vector<std::vector<double>> Network::predict_proba(const std::vector<std::vector<int>>& tsamples)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict_proba()");
throw std::logic_error("You must call fit() before calling predict_proba()");
}
vector<vector<double>> predictions;
vector<int> sample;
std::vector<std::vector<double>> predictions;
std::vector<int> sample;
for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear();
for (int col = 0; col < tsamples.size(); ++col) {
@@ -255,9 +255,9 @@ namespace bayesnet {
}
return predictions;
}
double Network::score(const vector<vector<int>>& tsamples, const vector<int>& labels)
double Network::score(const std::vector<std::vector<int>>& tsamples, const std::vector<int>& labels)
{
vector<int> y_pred = predict(tsamples);
std::vector<int> y_pred = predict(tsamples);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == labels[i]) {
@@ -266,35 +266,35 @@ namespace bayesnet {
}
return (double)correct / y_pred.size();
}
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const vector<int>& sample)
// Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const std::vector<int>& sample)
{
// Ensure the sample size is equal to the number of features
if (sample.size() != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size()) +
") does not match the number of features (" + to_string(features.size() - 1) + ")");
throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
map<string, int> evidence;
std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(); ++i) {
evidence[features[i]] = sample[i];
}
return exactInference(evidence);
}
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const Tensor& sample)
// Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const torch::Tensor& sample)
{
// Ensure the sample size is equal to the number of features
if (sample.size(0) != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size(0)) +
") does not match the number of features (" + to_string(features.size() - 1) + ")");
throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
map<string, int> evidence;
std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(0); ++i) {
evidence[features[i]] = sample[i].item<int>();
}
return exactInference(evidence);
}
double Network::computeFactor(map<string, int>& completeEvidence)
double Network::computeFactor(std::map<std::string, int>& completeEvidence)
{
double result = 1.0;
for (auto& node : getNodes()) {
@@ -302,17 +302,17 @@ namespace bayesnet {
}
return result;
}
vector<double> Network::exactInference(map<string, int>& evidence)
std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
{
vector<double> result(classNumStates, 0.0);
vector<thread> threads;
mutex mtx;
std::vector<double> result(classNumStates, 0.0);
std::vector<std::thread> threads;
std::mutex mtx;
for (int i = 0; i < classNumStates; ++i) {
threads.emplace_back([this, &result, &evidence, i, &mtx]() {
auto completeEvidence = map<string, int>(evidence);
auto completeEvidence = std::map<std::string, int>(evidence);
completeEvidence[getClassName()] = i;
double factor = computeFactor(completeEvidence);
lock_guard<mutex> lock(mtx);
std::lock_guard<std::mutex> lock(mtx);
result[i] = factor;
});
}
@@ -324,12 +324,12 @@ namespace bayesnet {
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result;
}
vector<string> Network::show() const
std::vector<std::string> Network::show() const
{
vector<string> result;
std::vector<std::string> result;
// Draw the network
for (auto& node : nodes) {
string line = node.first + " -> ";
std::string line = node.first + " -> ";
for (auto child : node.second->getChildren()) {
line += child->getName() + ", ";
}
@@ -337,12 +337,12 @@ namespace bayesnet {
}
return result;
}
vector<string> Network::graph(const string& title) const
std::vector<std::string> Network::graph(const std::string& title) const
{
auto output = vector<string>();
auto output = std::vector<std::string>();
auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n";
string header = prefix + title + suffix;
std::string header = prefix + title + suffix;
output.push_back(header);
for (auto& node : nodes) {
auto result = node.second->graph(className);
@@ -351,9 +351,9 @@ namespace bayesnet {
output.push_back("}\n");
return output;
}
vector<pair<string, string>> Network::getEdges() const
std::vector<std::pair<std::string, std::string>> Network::getEdges() const
{
auto edges = vector<pair<string, string>>();
auto edges = std::vector<std::pair<std::string, std::string>>();
for (const auto& node : nodes) {
auto head = node.first;
for (const auto& child : node.second->getChildren()) {
@@ -367,7 +367,7 @@ namespace bayesnet {
{
return getEdges().size();
}
vector<string> Network::topological_sort()
std::vector<std::string> Network::topological_sort()
{
/* Check if al the fathers of every node are before the node */
auto result = features;
@@ -394,10 +394,10 @@ namespace bayesnet {
ending = false;
}
} else {
throw logic_error("Error in topological sort because of node " + feature + " is not in result");
throw std::logic_error("Error in topological sort because of node " + feature + " is not in result");
}
} else {
throw logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
throw std::logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
}
}
}
@@ -407,8 +407,8 @@ namespace bayesnet {
void Network::dump_cpt() const
{
for (auto& node : nodes) {
cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl;
cout << node.second->getCPT() << endl;
std::cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
std::cout << node.second->getCPT() << std::endl;
}
}
}

63
src/Network.h Normal file
View File

@@ -0,0 +1,63 @@
#ifndef NETWORK_H
#define NETWORK_H
#include "Node.h"
#include <map>
#include <vector>
#include "config.h"
namespace bayesnet {
class Network {
public:
Network();
explicit Network(float);
explicit Network(Network&);
~Network() = default;
torch::Tensor& getSamples();
float getmaxThreads();
void addNode(const std::string&);
void addEdge(const std::string&, const std::string&);
std::map<std::string, std::unique_ptr<Node>>& getNodes();
std::vector<std::string> getFeatures() const;
int getStates() const;
std::vector<std::pair<std::string, std::string>> getEdges() const;
int getNumEdges() const;
int getClassNumStates() const;
std::string getClassName() const;
/*
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
*/
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
std::vector<std::vector<double>> predict_proba(const std::vector<std::vector<int>>&); // Return mxn std::vector of probabilities
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const std::vector<std::vector<int>>&, const std::vector<int>&);
std::vector<std::string> topological_sort();
std::vector<std::string> show() const;
std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
void initialize();
void dump_cpt() const;
inline std::string version() { return { project_version.begin(), project_version.end() }; }
private:
std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted;
float maxThreads = 0.95;
int classNumStates;
std::vector<std::string> features; // Including classname
std::string className;
double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&);
std::vector<double> exactInference(std::map<std::string, int>&);
double computeFactor(std::map<std::string, int>&);
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const std::map<std::string, std::vector<int>>&);
};
}
#endif

View File

@@ -3,7 +3,7 @@
namespace bayesnet {
Node::Node(const std::string& name)
: name(name), numStates(0), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
: name(name), numStates(0), cpTable(torch::Tensor()), parents(std::vector<Node*>()), children(std::vector<Node*>())
{
}
void Node::clear()
@@ -14,7 +14,7 @@ namespace bayesnet {
dimensions.clear();
numStates = 0;
}
string Node::getName() const
std::string Node::getName() const
{
return name;
}
@@ -34,11 +34,11 @@ namespace bayesnet {
{
children.push_back(child);
}
vector<Node*>& Node::getParents()
std::vector<Node*>& Node::getParents()
{
return parents;
}
vector<Node*>& Node::getChildren()
std::vector<Node*>& Node::getChildren()
{
return children;
}
@@ -63,28 +63,28 @@ namespace bayesnet {
*/
unsigned Node::minFill()
{
unordered_set<string> neighbors;
std::unordered_set<std::string> neighbors;
for (auto child : children) {
neighbors.emplace(child->getName());
}
for (auto parent : parents) {
neighbors.emplace(parent->getName());
}
auto source = vector<string>(neighbors.begin(), neighbors.end());
auto source = std::vector<std::string>(neighbors.begin(), neighbors.end());
return combinations(source).size();
}
vector<pair<string, string>> Node::combinations(const vector<string>& source)
std::vector<std::pair<std::string, std::string>> Node::combinations(const std::vector<std::string>& source)
{
vector<pair<string, string>> result;
std::vector<std::pair<std::string, std::string>> result;
for (int i = 0; i < source.size(); ++i) {
string temp = source[i];
std::string temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
}
}
return result;
}
void Node::computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
{
dimensions.clear();
// Get dimensions of the CPT
@@ -96,7 +96,7 @@ namespace bayesnet {
// Fill table with counts
auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) {
throw logic_error("Feature " + name + " not found in dataset");
throw std::logic_error("Feature " + name + " not found in dataset");
}
int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
@@ -105,7 +105,7 @@ namespace bayesnet {
for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName());
if (pos == features.end()) {
throw logic_error("Feature parent " + parent->getName() + " not found in dataset");
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
}
int parent_index = pos - features.begin();
coordinates.push_back(dataset.index({ parent_index, n_sample }));
@@ -116,17 +116,17 @@ namespace bayesnet {
// Normalize the counts
cpTable = cpTable / cpTable.sum(0);
}
float Node::getFactorValue(map<string, int>& evidence)
float Node::getFactorValue(std::map<std::string, int>& evidence)
{
c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<float>();
}
vector<string> Node::graph(const string& className)
std::vector<std::string> Node::graph(const std::string& className)
{
auto output = vector<string>();
auto output = std::vector<std::string>();
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
output.push_back(name + " [shape=circle" + suffix + "] \n");
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); });

36
src/Node.h Normal file
View File

@@ -0,0 +1,36 @@
#ifndef NODE_H
#define NODE_H
#include <torch/torch.h>
#include <unordered_set>
#include <vector>
#include <string>
namespace bayesnet {
class Node {
private:
std::string name;
std::vector<Node*> parents;
std::vector<Node*> children;
int numStates; // number of states of the variable
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
std::vector<int64_t> dimensions; // dimensions of the cpTable
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
public:
explicit Node(const std::string&);
void clear();
void addParent(Node*);
void addChild(Node*);
void removeParent(Node*);
void removeChild(Node*);
std::string getName() const;
std::vector<Node*>& getParents();
std::vector<Node*>& getChildren();
torch::Tensor& getCPT();
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
int getNumStates() const;
void setNumStates(int);
unsigned minFill();
std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
float getFactorValue(std::map<std::string, int>&);
};
}
#endif

View File

@@ -1,305 +0,0 @@
#include <filesystem>
#include <fstream>
#include <iostream>
#include <sstream>
#include <set>
#include "BestResults.h"
#include "Result.h"
#include "Colors.h"
namespace fs = std::filesystem;
// function ftime_to_string, Code taken from
// https://stackoverflow.com/a/58237530/1389271
template <typename TP>
std::string ftime_to_string(TP tp)
{
using namespace std::chrono;
auto sctp = time_point_cast<system_clock::duration>(tp - TP::clock::now()
+ system_clock::now());
auto tt = system_clock::to_time_t(sctp);
std::tm* gmt = std::gmtime(&tt);
std::stringstream buffer;
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
return buffer.str();
}
namespace platform {
string BestResults::build()
{
auto files = loadResultFiles();
if (files.size() == 0) {
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
exit(1);
}
json bests;
for (const auto& file : files) {
auto result = Result(path, file);
auto data = result.load();
for (auto const& item : data.at("results")) {
bool update = false;
if (bests.contains(item.at("dataset").get<string>())) {
if (item.at("score").get<double>() > bests[item.at("dataset").get<string>()].at(0).get<double>()) {
update = true;
}
} else {
update = true;
}
if (update) {
bests[item.at("dataset").get<string>()] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
}
}
}
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl;
}
ofstream file(bestFileName);
file << bests;
file.close();
return bestFileName;
}
string BestResults::bestResultFile()
{
return "best_results_" + score + "_" + model + ".json";
}
pair<string, string> getModelScore(string name)
{
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
int i = 0;
auto pos = name.find("_");
auto pos2 = name.find("_", pos + 1);
string score = name.substr(pos + 1, pos2 - pos - 1);
pos = name.find("_", pos2 + 1);
string model = name.substr(pos2 + 1, pos - pos2 - 1);
return { model, score };
}
vector<string> BestResults::loadResultFiles()
{
vector<string> files;
using std::filesystem::directory_iterator;
string fileModel, fileScore;
for (const auto& file : directory_iterator(path)) {
auto fileName = file.path().filename().string();
if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) {
tie(fileModel, fileScore) = getModelScore(fileName);
if (score == fileScore && (model == fileModel || model == "any")) {
files.push_back(fileName);
}
}
}
return files;
}
json BestResults::loadFile(const string& fileName)
{
ifstream resultData(fileName);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + fileName + "]");
}
set<string> BestResults::getModels()
{
set<string> models;
auto files = loadResultFiles();
if (files.size() == 0) {
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
exit(1);
}
string fileModel, fileScore;
for (const auto& file : files) {
// extract the model from the file name
tie(fileModel, fileScore) = getModelScore(file);
// add the model to the vector of models
models.insert(fileModel);
}
return models;
}
void BestResults::buildAll()
{
auto models = getModels();
for (const auto& model : models) {
cout << "Building best results for model: " << model << endl;
this->model = model;
build();
}
model = "any";
}
void BestResults::reportSingle()
{
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
exit(1);
}
auto date = ftime_to_string(filesystem::last_write_time(bestFileName));
auto data = loadFile(bestFileName);
cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
cout << "--------------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset Score File Hyperparameters" << endl;
cout << "=== ========================= =========== ================================================================== ================================================= " << endl;
auto i = 0;
bool odd = true;
for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
cout << setw(66) << item.value().at(2).get<string>() << " ";
cout << item.value().at(1) << " ";
cout << endl;
odd = !odd;
}
}
json BestResults::buildTableResults(set<string> models)
{
int numberOfDatasets = 0;
bool first = true;
json origin;
json table;
auto maxDate = filesystem::file_time_type::max();
for (const auto& model : models) {
this->model = model;
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
exit(1);
}
auto dateWrite = filesystem::last_write_time(bestFileName);
if (dateWrite < maxDate) {
maxDate = dateWrite;
}
auto data = loadFile(bestFileName);
if (first) {
// Get the number of datasets of the first file and check that is the same for all the models
first = false;
numberOfDatasets = data.size();
origin = data;
} else {
if (numberOfDatasets != data.size()) {
cerr << Colors::MAGENTA() << "The number of datasets in the best results files is not the same for all the models." << Colors::RESET() << endl;
exit(1);
}
}
table[model] = data;
}
table["dateTable"] = ftime_to_string(maxDate);
return table;
}
void BestResults::printTableResults(set<string> models, json table)
{
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
cout << "------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset ";
for (const auto& model : models) {
cout << setw(12) << left << model << " ";
}
cout << endl;
cout << "=== ========================= ";
for (const auto& model : models) {
cout << "============ ";
}
cout << endl;
auto i = 0;
bool odd = true;
map<string, double> totals;
map<string, int> ranks;
for (const auto& model : models) {
totals[model] = 0.0;
}
json origin = table.begin().value();
for (auto const& item : origin.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
double maxValue = 0;
vector<pair<string, double>> ranksOrder;
// Find out the max value for this dataset
for (const auto& model : models) {
double value = table[model].at(item.key()).at(0).get<double>();
if (value > maxValue) {
maxValue = value;
}
ranksOrder.push_back({ model, value });
}
// sort the ranksOrder vector by value
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
return a.second > b.second;
});
// Assign the ranks
for (int i = 0; i < ranksOrder.size(); i++) {
ranks[ranksOrder[i].first] = i + 1;
}
// Print the row with red colors on max values
for (const auto& model : models) {
string efectiveColor = color;
double value = table[model].at(item.key()).at(0).get<double>();
if (value == maxValue) {
efectiveColor = Colors::RED();
}
totals[model] += value;
cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
}
cout << endl;
odd = !odd;
}
cout << Colors::GREEN() << "=== ========================= ";
for (const auto& model : models) {
cout << "============ ";
}
cout << endl;
cout << Colors::GREEN() << setw(30) << " Totals...................";
double max = 0.0;
for (const auto& total : totals) {
if (total.second > max) {
max = total.second;
}
}
for (const auto& model : models) {
string efectiveColor = Colors::GREEN();
if (totals[model] == max) {
efectiveColor = Colors::RED();
}
cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
}
// Output the averaged ranks
cout << endl;
int min = 1;
for (const auto& rank : ranks) {
if (rank.second < min) {
min = rank.second;
}
}
cout << Colors::GREEN() << setw(30) << " Averaged ranks...........";
for (const auto& model : models) {
string efectiveColor = Colors::GREEN();
if (ranks[model] == min) {
efectiveColor = Colors::RED();
}
cout << efectiveColor << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " ";
}
cout << endl;
}
void BestResults::reportAll()
{
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
// Print the table of results
printTableResults(models, table);
}
}

View File

@@ -1,28 +0,0 @@
#ifndef BESTRESULTS_H
#define BESTRESULTS_H
#include <string>
#include <set>
#include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json;
namespace platform {
class BestResults {
public:
explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {}
string build();
void reportSingle();
void reportAll();
void buildAll();
private:
set<string> getModels();
vector<string> loadResultFiles();
json buildTableResults(set<string> models);
void printTableResults(set<string> models, json table);
string bestResultFile();
json loadFile(const string& fileName);
string path;
string score;
string model;
};
}
#endif //BESTRESULTS_H

View File

@@ -1,10 +0,0 @@
#ifndef BESTSCORE_H
#define BESTSCORE_H
#include <string>
class BestScore {
public:
static std::string title() { return "STree_default (linear-ovo)"; }
static double score() { return 22.109799; }
static std::string scoreName() { return "accuracy"; }
};
#endif

View File

@@ -1,19 +0,0 @@
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
add_executable(list list.cc platformUtils Datasets.cc)
add_executable(best best.cc BestResults.cc Result.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
target_link_libraries(best stdc++fs)
else()
target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
endif()
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,14 +0,0 @@
#ifndef COLORS_H
#define COLORS_H
class Colors {
public:
static std::string MAGENTA() { return "\033[1;35m"; }
static std::string BLUE() { return "\033[1;34m"; }
static std::string CYAN() { return "\033[1;36m"; }
static std::string GREEN() { return "\033[1;32m"; }
static std::string YELLOW() { return "\033[1;33m"; }
static std::string RED() { return "\033[1;31m"; }
static std::string WHITE() { return "\033[1;37m"; }
static std::string RESET() { return "\033[0m"; }
};
#endif // COLORS_H

View File

@@ -1,268 +0,0 @@
#include "Datasets.h"
#include "platformUtils.h"
#include "ArffFiles.h"
#include <fstream>
namespace platform {
void Datasets::load()
{
ifstream catalog(path + "/all.txt");
if (catalog.is_open()) {
string line;
while (getline(catalog, line)) {
vector<string> tokens = split(line, ',');
string name = tokens[0];
string className = tokens[1];
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
}
catalog.close();
} else {
throw invalid_argument("Unable to open catalog file. [" + path + "/all.txt" + "]");
}
}
vector<string> Datasets::getNames()
{
vector<string> result;
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result;
}
vector<string> Datasets::getFeatures(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getFeatures();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
map<string, vector<int>> Datasets::getStates(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getStates();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
void Datasets::loadDataset(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
return;
} else {
datasets.at(name)->load();
}
}
string Datasets::getClassName(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getClassName();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNSamples(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getNSamples();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNClasses(const string& name)
{
if (datasets.at(name)->isLoaded()) {
auto className = datasets.at(name)->getClassName();
if (discretize) {
auto states = getStates(name);
return states.at(className).size();
}
auto [Xv, yv] = getVectors(name);
return *max_element(yv.begin(), yv.end()) + 1;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
vector<int> Datasets::getClassesCounts(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
auto [Xv, yv] = datasets.at(name)->getVectors();
vector<int> counts(*max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) {
counts[y]++;
}
return counts;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(const string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectors();
}
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(const string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectorsDiscretized();
}
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getTensors();
}
bool Datasets::isDataset(const string& name) const
{
return datasets.find(name) != datasets.end();
}
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{
}
string Dataset::getName() const
{
return name;
}
string Dataset::getClassName() const
{
return className;
}
vector<string> Dataset::getFeatures() const
{
if (loaded) {
return features;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNFeatures() const
{
if (loaded) {
return n_features;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNSamples() const
{
if (loaded) {
return n_samples;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
map<string, vector<int>> Dataset::getStates() const
{
if (loaded) {
return states;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Dataset::getVectors()
{
if (loaded) {
return { Xv, yv };
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<int>>&, vector<int>&> Dataset::getVectorsDiscretized()
{
if (loaded) {
return { Xd, yv };
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
{
if (loaded) {
buildTensors();
return { X, y };
} else {
throw invalid_argument("Dataset not loaded.");
}
}
void Dataset::load_csv()
{
ifstream file(path + "/" + name + ".csv");
if (file.is_open()) {
string line;
getline(file, line);
vector<string> tokens = split(line, ',');
features = vector<string>(tokens.begin(), tokens.end() - 1);
className = tokens.back();
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(vector<float>());
}
while (getline(file, line)) {
tokens = split(line, ',');
for (auto i = 0; i < features.size(); ++i) {
Xv[i].push_back(stof(tokens[i]));
}
yv.push_back(stoi(tokens.back()));
}
file.close();
} else {
throw invalid_argument("Unable to open dataset file.");
}
}
void Dataset::computeStates()
{
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
}
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0);
}
void Dataset::load_arff()
{
auto arff = ArffFiles();
arff.load(path + "/" + name + ".arff", className);
// Get Dataset X, y
Xv = arff.getX();
yv = arff.getY();
// Get className & Features
className = arff.getClassName();
auto attributes = arff.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
}
void Dataset::load()
{
if (loaded) {
return;
}
if (fileType == CSV) {
load_csv();
} else if (fileType == ARFF) {
load_arff();
}
if (discretize) {
Xd = discretizeDataset(Xv, yv);
computeStates();
}
n_samples = Xv[0].size();
n_features = Xv.size();
loaded = true;
}
void Dataset::buildTensors()
{
if (discretize) {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
} else {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
}
for (int i = 0; i < features.size(); ++i) {
if (discretize) {
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
} else {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
}
}
y = torch::tensor(yv, torch::kInt32);
}
}

View File

@@ -1,68 +0,0 @@
#ifndef DATASETS_H
#define DATASETS_H
#include <torch/torch.h>
#include <map>
#include <vector>
#include <string>
namespace platform {
using namespace std;
enum fileType_t { CSV, ARFF };
class Dataset {
private:
string path;
string name;
fileType_t fileType;
string className;
int n_samples{ 0 }, n_features{ 0 };
vector<string> features;
map<string, vector<int>> states;
bool loaded;
bool discretize;
torch::Tensor X, y;
vector<vector<float>> Xv;
vector<vector<int>> Xd;
vector<int> yv;
void buildTensors();
void load_csv();
void load_arff();
void computeStates();
public:
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&);
string getName() const;
string getClassName() const;
vector<string> getFeatures() const;
map<string, vector<int>> getStates() const;
pair<vector<vector<float>>&, vector<int>&> getVectors();
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures() const;
int getNSamples() const;
void load();
const bool inline isLoaded() const { return loaded; };
};
class Datasets {
private:
string path;
fileType_t fileType;
map<string, unique_ptr<Dataset>> datasets;
bool discretize;
void load(); // Loads the list of datasets
public:
explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
vector<string> getNames();
vector<string> getFeatures(const string& name) const;
int getNSamples(const string& name) const;
string getClassName(const string& name) const;
int getNClasses(const string& name);
vector<int> getClassesCounts(const string& name) const;
map<string, vector<int>> getStates(const string& name) const;
pair<vector<vector<float>>&, vector<int>&> getVectors(const string& name);
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(const string& name);
pair<torch::Tensor&, torch::Tensor&> getTensors(const string& name);
bool isDataset(const string& name) const;
void loadDataset(const string& name) const;
};
};
#endif

View File

@@ -1,62 +0,0 @@
#ifndef DOTENV_H
#define DOTENV_H
#include <string>
#include <map>
#include <fstream>
#include <sstream>
#include "platformUtils.h"
namespace platform {
class DotEnv {
private:
std::map<std::string, std::string> env;
std::string trim(const std::string& str)
{
std::string result = str;
result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) {
return !std::isspace(ch);
}));
result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), result.end());
return result;
}
public:
DotEnv()
{
std::ifstream file(".env");
if (!file.is_open()) {
std::cerr << "File .env not found" << std::endl;
exit(1);
}
std::string line;
while (std::getline(file, line)) {
line = trim(line);
if (line.empty() || line[0] == '#') {
continue;
}
std::istringstream iss(line);
std::string key, value;
if (std::getline(iss, key, '=') && std::getline(iss, value)) {
env[key] = value;
}
}
}
std::string get(const std::string& key)
{
return env[key];
}
std::vector<int> getSeeds()
{
auto seeds = std::vector<int>();
auto seeds_str = env["seeds"];
seeds_str = trim(seeds_str);
seeds_str = seeds_str.substr(1, seeds_str.size() - 2);
auto seeds_str_split = split(seeds_str, ',');
transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) {
return stoi(str);
});
return seeds;
}
};
}
#endif

View File

@@ -1,219 +0,0 @@
#include "Experiment.h"
#include "Datasets.h"
#include "Models.h"
#include "ReportConsole.h"
#include <fstream>
namespace platform {
using json = nlohmann::json;
string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
string Experiment::get_file_name()
{
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
return result;
}
json Experiment::build_json()
{
json result;
result["title"] = title;
result["date"] = get_date();
result["time"] = get_time();
result["model"] = model;
result["version"] = model_version;
result["platform"] = platform;
result["score_name"] = score_name;
result["language"] = language;
result["language_version"] = language_version;
result["discretized"] = discretized;
result["stratified"] = stratified;
result["folds"] = nfolds;
result["seeds"] = randomSeeds;
result["duration"] = duration;
result["results"] = json::array();
for (const auto& r : results) {
json j;
j["dataset"] = r.getDataset();
j["hyperparameters"] = r.getHyperparameters();
j["samples"] = r.getSamples();
j["features"] = r.getFeatures();
j["classes"] = r.getClasses();
j["score_train"] = r.getScoreTrain();
j["score_test"] = r.getScoreTest();
j["score"] = r.getScoreTest();
j["score_std"] = r.getScoreTestStd();
j["score_train_std"] = r.getScoreTrainStd();
j["score_test_std"] = r.getScoreTestStd();
j["train_time"] = r.getTrainTime();
j["train_time_std"] = r.getTrainTimeStd();
j["test_time"] = r.getTestTime();
j["test_time_std"] = r.getTestTimeStd();
j["time"] = r.getTestTime() + r.getTrainTime();
j["time_std"] = r.getTestTimeStd() + r.getTrainTimeStd();
j["scores_train"] = r.getScoresTrain();
j["scores_test"] = r.getScoresTest();
j["times_train"] = r.getTimesTrain();
j["times_test"] = r.getTimesTest();
j["nodes"] = r.getNodes();
j["leaves"] = r.getLeaves();
j["depth"] = r.getDepth();
result["results"].push_back(j);
}
return result;
}
void Experiment::save(const string& path)
{
json data = build_json();
ofstream file(path + "/" + get_file_name());
file << data;
file.close();
}
void Experiment::report()
{
json data = build_json();
ReportConsole report(data);
report.show();
}
void Experiment::show()
{
json data = build_json();
cout << data.dump(4) << endl;
}
void Experiment::go(vector<string> filesToProcess, const string& path)
{
cout << "*** Starting experiment: " << title << " ***" << endl;
for (auto fileName : filesToProcess) {
cout << "- " << setw(20) << left << fileName << " " << right << flush;
cross_validation(path, fileName);
cout << endl;
}
}
string getColor(bayesnet::status_t status)
{
switch (status) {
case bayesnet::NORMAL:
return Colors::GREEN();
case bayesnet::WARNING:
return Colors::YELLOW();
case bayesnet::ERROR:
return Colors::RED();
default:
return Colors::RESET();
}
}
void showProgress(int fold, const string& color, const string& phase)
{
string prefix = phase == "a" ? "" : "\b\b\b\b";
cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
}
void Experiment::cross_validation(const string& path, const string& fileName)
{
auto datasets = platform::Datasets(path, discretized, platform::ARFF);
// Get dataset
auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName);
auto features = datasets.getFeatures(fileName);
auto samples = datasets.getNSamples(fileName);
auto className = datasets.getClassName(fileName);
cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
// Prepare Result
auto result = Result();
auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters);
// Initialize results vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
auto edges = torch::zeros({ nResults }, torch::kFloat64);
auto num_states = torch::zeros({ nResults }, torch::kFloat64);
Timer train_timer, test_timer;
int item = 0;
for (auto seed : randomSeeds) {
cout << "(" << seed << ") doing Fold: " << flush;
Fold* fold;
if (stratified)
fold = new StratifiedKFold(nfolds, y, seed);
else
fold = new KFold(nfolds, y.size(0), seed);
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion());
if (hyperparameters.size() != 0) {
clf->setHyperparameters(hyperparameters);
}
// Split train - test dataset
train_timer.start();
auto [train, test] = fold->getFold(nfold);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
auto X_train = X.index({ "...", train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
// Train model
clf->fit(X_train, y_train, features, className, states);
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration();
// Score train
auto accuracy_train_value = clf->score(X_train, y_train);
// Test model
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value;
cout << "\b\b\b, " << flush;
// Store results and times in vector
result.addScoreTrain(accuracy_train_value);
result.addScoreTest(accuracy_test_value);
result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>());
item++;
clf.reset();
}
cout << "end. " << flush;
delete fold;
}
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
result.setDataset(fileName);
addResult(result);
}
}

View File

@@ -1,117 +0,0 @@
#ifndef EXPERIMENT_H
#define EXPERIMENT_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <string>
#include <chrono>
#include "Folding.h"
#include "BaseClassifier.h"
#include "TAN.h"
#include "KDB.h"
#include "AODE.h"
using namespace std;
namespace platform {
using json = nlohmann::json;
class Timer {
private:
chrono::high_resolution_clock::time_point begin;
public:
Timer() = default;
~Timer() = default;
void start() { begin = chrono::high_resolution_clock::now(); }
double getDuration()
{
chrono::high_resolution_clock::time_point end = chrono::high_resolution_clock::now();
chrono::duration<double> time_span = chrono::duration_cast<chrono::duration<double>>(end - begin);
return time_span.count();
}
};
class Result {
private:
string dataset, model_version;
json hyperparameters;
int samples{ 0 }, features{ 0 }, classes{ 0 };
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
vector<double> scores_train, scores_test, times_train, times_test;
public:
Result() = default;
Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; }
Result& setClasses(int classes) { this->classes = classes; return *this; }
Result& setScoreTrain(double score) { this->score_train = score; return *this; }
Result& setScoreTest(double score) { this->score_test = score; return *this; }
Result& setScoreTrainStd(double score_std) { this->score_train_std = score_std; return *this; }
Result& setScoreTestStd(double score_std) { this->score_test_std = score_std; return *this; }
Result& setTrainTime(double train_time) { this->train_time = train_time; return *this; }
Result& setTrainTimeStd(double train_time_std) { this->train_time_std = train_time_std; return *this; }
Result& setTestTime(double test_time) { this->test_time = test_time; return *this; }
Result& setTestTimeStd(double test_time_std) { this->test_time_std = test_time_std; return *this; }
Result& setNodes(float nodes) { this->nodes = nodes; return *this; }
Result& setLeaves(float leaves) { this->leaves = leaves; return *this; }
Result& setDepth(float depth) { this->depth = depth; return *this; }
Result& addScoreTrain(double score) { scores_train.push_back(score); return *this; }
Result& addScoreTest(double score) { scores_test.push_back(score); return *this; }
Result& addTimeTrain(double time) { times_train.push_back(time); return *this; }
Result& addTimeTest(double time) { times_test.push_back(time); return *this; }
const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; }
const string& getDataset() const { return dataset; }
const json& getHyperparameters() const { return hyperparameters; }
const int getSamples() const { return samples; }
const int getFeatures() const { return features; }
const int getClasses() const { return classes; }
const double getScoreTrain() const { return score_train; }
const double getScoreTest() const { return score_test; }
const double getScoreTrainStd() const { return score_train_std; }
const double getScoreTestStd() const { return score_test_std; }
const double getTrainTime() const { return train_time; }
const double getTrainTimeStd() const { return train_time_std; }
const double getTestTime() const { return test_time; }
const double getTestTimeStd() const { return test_time_std; }
const float getNodes() const { return nodes; }
const float getLeaves() const { return leaves; }
const float getDepth() const { return depth; }
const vector<double>& getScoresTrain() const { return scores_train; }
const vector<double>& getScoresTest() const { return scores_test; }
const vector<double>& getTimesTrain() const { return times_train; }
const vector<double>& getTimesTest() const { return times_test; }
};
class Experiment {
private:
string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
vector<Result> results;
vector<int> randomSeeds;
json hyperparameters = "{}";
int nfolds{ 0 };
float duration{ 0 };
json build_json();
public:
Experiment();
Experiment& setTitle(const string& title) { this->title = title; return *this; }
Experiment& setModel(const string& model) { this->model = model; return *this; }
Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; }
Experiment& setScoreName(const string& score_name) { this->score_name = score_name; return *this; }
Experiment& setModelVersion(const string& model_version) { this->model_version = model_version; return *this; }
Experiment& setLanguage(const string& language) { this->language = language; return *this; }
Experiment& setLanguageVersion(const string& language_version) { this->language_version = language_version; return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
string get_file_name();
void save(const string& path);
void cross_validation(const string& path, const string& fileName);
void go(vector<string> filesToProcess, const string& path);
void show();
void report();
};
}
#endif

View File

@@ -1,97 +0,0 @@
#include "Folding.h"
#include <algorithm>
#include <map>
namespace platform {
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
{
random_device rd;
random_seed = default_random_engine(seed == -1 ? rd() : seed);
srand(seed == -1 ? time(0) : seed);
}
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector<int>(n))
{
iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
shuffle(indices.begin(), indices.end(), random_seed);
}
pair<vector<int>, vector<int>> KFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
}
int nTest = n / k;
auto train = vector<int>();
auto test = vector<int>();
for (int i = 0; i < n; i++) {
if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
test.push_back(indices[i]);
} else {
train.push_back(indices[i]);
}
}
return { train, test };
}
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{
n = y.numel();
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build();
}
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed)
: Fold(k, y.size(), seed)
{
this->y = y;
n = y.size();
build();
}
void StratifiedKFold::build()
{
stratified_indices = vector<vector<int>>(k);
int fold_size = n / k;
// Compute class counts and indices
auto class_indices = map<int, vector<int>>();
vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
for (auto i = 0; i < n; ++i) {
class_counts[y[i]]++;
class_indices[y[i]].push_back(i);
}
// Shuffle class indices
for (auto& [cls, indices] : class_indices) {
shuffle(indices.begin(), indices.end(), random_seed);
}
// Assign indices to folds
for (auto label = 0; label < class_counts.size(); ++label) {
auto num_samples_to_take = class_counts[label] / k;
if (num_samples_to_take == 0)
continue;
auto remainder_samples_to_take = class_counts[label] % k;
for (auto fold = 0; fold < k; ++fold) {
auto it = next(class_indices[label].begin(), num_samples_to_take);
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
class_indices[label].erase(class_indices[label].begin(), it);
}
while (remainder_samples_to_take > 0) {
int fold = (rand() % static_cast<int>(k));
if (stratified_indices[fold].size() == fold_size + 1) {
continue;
}
auto it = next(class_indices[label].begin(), 1);
stratified_indices[fold].push_back(*class_indices[label].begin());
class_indices[label].erase(class_indices[label].begin(), it);
remainder_samples_to_take--;
}
}
}
pair<vector<int>, vector<int>> StratifiedKFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
}
vector<int> test_indices = stratified_indices[nFold];
vector<int> train_indices;
for (int i = 0; i < k; ++i) {
if (i == nFold) continue;
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());
}
return { train_indices, test_indices };
}
}

View File

@@ -1,38 +0,0 @@
#ifndef FOLDING_H
#define FOLDING_H
#include <torch/torch.h>
#include <vector>
#include <random>
using namespace std;
namespace platform {
class Fold {
protected:
int k;
int n;
int seed;
default_random_engine random_seed;
public:
Fold(int k, int n, int seed = -1);
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default;
int getNumberOfFolds() { return k; }
};
class KFold : public Fold {
private:
vector<int> indices;
public:
KFold(int k, int n, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
};
class StratifiedKFold : public Fold {
private:
vector<int> y;
vector<vector<int>> stratified_indices;
void build();
public:
StratifiedKFold(int k, const vector<int>& y, int seed = -1);
StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
};
}
#endif

View File

@@ -1,54 +0,0 @@
#include "Models.h"
namespace platform {
using namespace std;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
Models* Models::factory = nullptr;;
Models* Models::instance()
{
//manages singleton
if (factory == nullptr)
factory = new Models();
return factory;
}
void Models::registerFactoryFunction(const string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
functionRegistry[name] = classFactoryFunction;
}
shared_ptr<bayesnet::BaseClassifier> Models::create(const string& name)
{
bayesnet::BaseClassifier* instance = nullptr;
// find name in the registry and call factory method.
auto it = functionRegistry.find(name);
if (it != functionRegistry.end())
instance = it->second();
// wrap instance in a shared ptr and return
if (instance != nullptr)
return unique_ptr<bayesnet::BaseClassifier>(instance);
else
return nullptr;
}
vector<string> Models::getNames()
{
vector<string> names;
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
[](const pair<string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
return names;
}
string Models::toString()
{
string result = "";
for (const auto& pair : functionRegistry) {
result += pair.first + ", ";
}
return "{" + result.substr(0, result.size() - 2) + "}";
}
Registrar::Registrar(const string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
Models::instance()->registerFactoryFunction(name, classFactoryFunction);
}
}

View File

@@ -1,37 +0,0 @@
#ifndef MODELS_H
#define MODELS_H
#include <map>
#include "BaseClassifier.h"
#include "AODE.h"
#include "TAN.h"
#include "KDB.h"
#include "SPODE.h"
#include "TANLd.h"
#include "KDBLd.h"
#include "SPODELd.h"
#include "AODELd.h"
#include "BoostAODE.h"
namespace platform {
class Models {
private:
map<string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
static Models* factory; //singleton
Models() {};
public:
Models(Models&) = delete;
void operator=(const Models&) = delete;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
static Models* instance();
shared_ptr<bayesnet::BaseClassifier> create(const string& name);
void registerFactoryFunction(const string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
vector<string> getNames();
string toString();
};
class Registrar {
public:
Registrar(const string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
};
}
#endif

View File

@@ -1,12 +0,0 @@
#ifndef PATHS_H
#define PATHS_H
#include <string>
namespace platform {
class Paths {
public:
static std::string datasets() { return "datasets/"; }
static std::string results() { return "results/"; }
static std::string excel() { return "excel/"; }
};
}
#endif

View File

@@ -1,114 +0,0 @@
#include <sstream>
#include <locale>
#include "Datasets.h"
#include "ReportBase.h"
#include "BestScore.h"
namespace platform {
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
{
stringstream oss;
oss << "Better than ZeroR + " << setprecision(1) << fixed << margin * 100 << "%";
meaning = {
{Symbols::equal_best, "Equal to best"},
{Symbols::better_best, "Better than best"},
{Symbols::cross, "Less than or equal to ZeroR"},
{Symbols::upward_arrow, oss.str()}
};
}
string ReportBase::fromVector(const string& key)
{
stringstream oss;
string sep = "";
oss << "[";
for (auto& item : data[key]) {
oss << sep << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
string ReportBase::fVector(const string& title, const json& data, const int width, const int precision)
{
stringstream oss;
string sep = "";
oss << title << "[";
for (const auto& item : data) {
oss << sep << fixed << setw(width) << setprecision(precision) << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
void ReportBase::show()
{
header();
body();
}
string ReportBase::compareResult(const string& dataset, double result)
{
string status = " ";
if (compare) {
double best = bestResult(dataset, data["model"].get<string>());
if (result == best) {
status = Symbols::equal_best;
} else if (result > best) {
status = Symbols::better_best;
}
} else {
if (data["score_name"].get<string>() == "accuracy") {
auto dt = Datasets(Paths::datasets(), false);
dt.loadDataset(dataset);
auto numClasses = dt.getNClasses(dataset);
if (numClasses == 2) {
vector<int> distribution = dt.getClassesCounts(dataset);
double nSamples = dt.getNSamples(dataset);
vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin);
if (mark > 1) {
mark = 0.9995;
}
status = result < mark ? Symbols::cross : result > mark ? Symbols::upward_arrow : "=";
}
}
}
if (status != " ") {
auto item = summary.find(status);
if (item != summary.end()) {
summary[status]++;
} else {
summary[status] = 1;
}
}
return status;
}
double ReportBase::bestResult(const string& dataset, const string& model)
{
double value = 0.0;
if (bestResults.size() == 0) {
// try to load the best results
string score = data["score_name"];
replace(score.begin(), score.end(), '_', '-');
string fileName = "best_results_" + score + "_" + model + ".json";
ifstream resultData(Paths::results() + "/" + fileName);
if (resultData.is_open()) {
bestResults = json::parse(resultData);
} else {
existBestFile = false;
}
}
try {
value = bestResults.at(dataset).at(0);
}
catch (exception) {
value = 1.0;
}
return value;
}
bool ReportBase::getExistBestFile()
{
return existBestFile;
}
}

View File

@@ -1,46 +0,0 @@
#ifndef REPORTBASE_H
#define REPORTBASE_H
#include <string>
#include <iostream>
#include "Paths.h"
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
using namespace std;
class Symbols {
public:
inline static const string check_mark{ "\u2714" };
inline static const string exclamation{ "\u2757" };
inline static const string black_star{ "\u2605" };
inline static const string cross{ "\u2717" };
inline static const string upward_arrow{ "\u27B6" };
inline static const string down_arrow{ "\u27B4" };
inline static const string equal_best{ check_mark };
inline static const string better_best{ black_star };
};
class ReportBase {
public:
explicit ReportBase(json data_, bool compare);
virtual ~ReportBase() = default;
void show();
protected:
json data;
string fromVector(const string& key);
string fVector(const string& title, const json& data, const int width, const int precision);
bool getExistBestFile();
virtual void header() = 0;
virtual void body() = 0;
virtual void showSummary() = 0;
string compareResult(const string& dataset, double result);
map<string, int> summary;
double margin;
map<string, string> meaning;
bool compare;
private:
double bestResult(const string& dataset, const string& model);
json bestResults;
bool existBestFile = true;
};
};
#endif

View File

@@ -1,112 +0,0 @@
#include <sstream>
#include <locale>
#include "ReportConsole.h"
#include "BestScore.h"
namespace platform {
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
string ReportConsole::headerLine(const string& text, int utf = 0)
{
int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n;
return "* " + text + string(n + utf, ' ') + "*\n";
}
void ReportConsole::header()
{
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
stringstream oss;
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
cout << headerLine("Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>());
cout << headerLine(data["title"].get<string>());
cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
oss << "Execution took " << setprecision(2) << fixed << data["duration"].get<float>() << " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<string>();
cout << headerLine(oss.str());
cout << headerLine("Score is " + data["score_name"].get<string>());
cout << string(MAXL, '*') << endl;
cout << endl;
}
void ReportConsole::body()
{
cout << Colors::GREEN() << " # Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl;
cout << "=== ========================= ====== ===== === ========= ========= ========= =============== =================== ====================" << endl;
json lastResult;
double totalScore = 0.0;
bool odd = true;
int index = 0;
for (const auto& r : data["results"]) {
if (selectedIndex != -1 && index != selectedIndex) {
index++;
continue;
}
auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color;
cout << setw(3) << index++ << " ";
cout << setw(25) << left << r["dataset"].get<string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>();
const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
cout << status;
cout << setw(12) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " ";
try {
cout << r["hyperparameters"].get<string>();
}
catch (const exception& err) {
cout << r["hyperparameters"];
}
cout << endl;
lastResult = r;
totalScore += r["score"].get<double>();
odd = !odd;
}
if (data["results"].size() == 1 || selectedIndex != -1) {
cout << string(MAXL, '*') << endl;
cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
cout << string(MAXL, '*') << endl;
} else {
footer(totalScore);
}
}
void ReportConsole::showSummary()
{
for (const auto& item : summary) {
stringstream oss;
oss << setw(3) << left << item.first;
oss << setw(3) << right << item.second << " ";
oss << left << meaning.at(item.first);
cout << headerLine(oss.str(), 2);
}
}
void ReportConsole::footer(double totalScore)
{
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
showSummary();
auto score = data["score_name"].get<string>();
if (score == BestScore::scoreName()) {
stringstream oss;
oss << score << " compared to " << BestScore::title() << " .: " << totalScore / BestScore::score();
cout << headerLine(oss.str());
}
if (!getExistBestFile() && compare) {
cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
}
cout << string(MAXL, '*') << endl << Colors::RESET();
}
}

View File

@@ -1,24 +0,0 @@
#ifndef REPORTCONSOLE_H
#define REPORTCONSOLE_H
#include <string>
#include <iostream>
#include "ReportBase.h"
#include "Colors.h"
namespace platform {
using namespace std;
const int MAXL = 133;
class ReportConsole : public ReportBase {
public:
explicit ReportConsole(json data_, bool compare = false, int index = -1) : ReportBase(data_, compare), selectedIndex(index) {};
virtual ~ReportConsole() = default;
private:
int selectedIndex;
string headerLine(const string& text, int utf);
void header() override;
void body() override;
void footer(double totalScore);
void showSummary() override;
};
};
#endif

View File

@@ -1,333 +0,0 @@
#include <sstream>
#include <locale>
#include "ReportExcel.h"
#include "BestScore.h"
namespace platform {
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook) : ReportBase(data_, compare), row(0), workbook(workbook)
{
normalSize = 14; //font size for report body
colorTitle = 0xB1A0C7;
colorOdd = 0xDCE6F1;
colorEven = 0xFDE9D9;
createFile();
}
lxw_workbook* ReportExcel::getWorkbook()
{
return workbook;
}
lxw_format* ReportExcel::efectiveStyle(const string& style)
{
lxw_format* efectiveStyle;
if (style == "") {
efectiveStyle = NULL;
} else {
string suffix = row % 2 ? "_odd" : "_even";
efectiveStyle = styles.at(style + suffix);
}
return efectiveStyle;
}
void ReportExcel::writeString(int row, int col, const string& text, const string& style)
{
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
}
void ReportExcel::writeInt(int row, int col, const int number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::writeDouble(int row, int col, const double number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 6, 1);
vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void ReportExcel::addColor(lxw_format* style, bool odd)
{
uint32_t efectiveColor = odd ? colorEven : colorOdd;
format_set_bg_color(style, lxw_color_t(efectiveColor));
}
void ReportExcel::createStyle(const string& name, lxw_format* style, bool odd)
{
addColor(style, odd);
if (name == "textCentered") {
format_set_align(style, LXW_ALIGN_CENTER);
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "text") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "bodyHeader") {
format_set_bold(style);
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_CENTER);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
format_set_bg_color(style, lxw_color_t(colorTitle));
} else if (name == "result") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "0.0000000");
} else if (name == "time") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "#,##0.000000");
} else if (name == "ints") {
format_set_font_size(style, normalSize);
format_set_num_format(style, "###,##0");
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "floats") {
format_set_border(style, LXW_BORDER_THIN);
format_set_font_size(style, normalSize);
format_set_num_format(style, "#,##0.00");
}
}
void ReportExcel::createFormats()
{
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
lxw_format* style;
for (string name : styleNames) {
lxw_format* style = workbook_add_format(workbook);
style = workbook_add_format(workbook);
createStyle(name, style, true);
styles[name + "_odd"] = style;
style = workbook_add_format(workbook);
createStyle(name, style, false);
styles[name + "_even"] = style;
}
// Header 1st line
lxw_format* headerFirst = workbook_add_format(workbook);
format_set_bold(headerFirst);
format_set_font_size(headerFirst, 18);
format_set_align(headerFirst, LXW_ALIGN_CENTER);
format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerFirst, LXW_BORDER_THIN);
format_set_bg_color(headerFirst, lxw_color_t(colorTitle));
// Header rest
lxw_format* headerRest = workbook_add_format(workbook);
format_set_bold(headerRest);
format_set_align(headerRest, LXW_ALIGN_CENTER);
format_set_font_size(headerRest, 16);
format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerRest, LXW_BORDER_THIN);
format_set_bg_color(headerRest, lxw_color_t(colorOdd));
// Header small
lxw_format* headerSmall = workbook_add_format(workbook);
format_set_bold(headerSmall);
format_set_align(headerSmall, LXW_ALIGN_LEFT);
format_set_font_size(headerSmall, 12);
format_set_border(headerSmall, LXW_BORDER_THIN);
format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER);
format_set_bg_color(headerSmall, lxw_color_t(colorOdd));
// Summary style
lxw_format* summaryStyle = workbook_add_format(workbook);
format_set_bold(summaryStyle);
format_set_font_size(summaryStyle, 16);
format_set_border(summaryStyle, LXW_BORDER_THIN);
format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER);
styles["headerFirst"] = headerFirst;
styles["headerRest"] = headerRest;
styles["headerSmall"] = headerSmall;
styles["summaryStyle"] = summaryStyle;
}
void ReportExcel::setProperties()
{
char line[data["title"].get<string>().size() + 1];
strcpy(line, data["title"].get<string>().c_str());
lxw_doc_properties properties = {
.title = line,
.subject = (char*)"Machine learning results",
.author = (char*)"Ricardo Montañana Gómez",
.manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta",
.company = (char*)"UCLM",
.comments = (char*)"Created with libxlsxwriter and c++",
};
workbook_set_properties(workbook, &properties);
}
void ReportExcel::createFile()
{
if (workbook == NULL) {
workbook = workbook_new((Paths::excel() + fileName).c_str());
}
const string name = data["model"].get<string>();
string suffix = "";
string efectiveName;
int num = 1;
// Create a sheet with the name of the model
while (true) {
efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = to_string(++num);
} else {
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break;
}
if (num > 100) {
throw invalid_argument("Couldn't create sheet " + efectiveName);
}
}
cout << "Adding sheet " << efectiveName << " to " << Paths::excel() + fileName << endl;
setProperties();
createFormats();
formatColumns();
}
void ReportExcel::closeFile()
{
workbook_close(workbook);
}
void ReportExcel::header()
{
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
stringstream oss;
string message = data["model"].get<string>() + " ver. " + data["version"].get<string>() + " " +
data["language"].get<string>() + " ver. " + data["language_version"].get<string>() +
" with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>();
worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<string>()).c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
oss << setprecision(2) << fixed << data["duration"].get<float>() << " s";
worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
oss.str("");
oss.clear();
oss << setprecision(2) << fixed << data["duration"].get<float>() / 3600 << " h";
worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Stratified: " << (data["stratified"].get<bool>() ? "True" : "False");
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False");
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
}
void ReportExcel::body()
{
auto head = vector<string>(
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time",
"Time Std.", "Hyperparameters" });
int col = 0;
for (const auto& item : head) {
writeString(5, col++, item, "bodyHeader");
}
row = 6;
col = 0;
int hypSize = 22;
json lastResult;
double totalScore = 0.0;
string hyperparameters;
for (const auto& r : data["results"]) {
writeString(row, col, r["dataset"].get<string>(), "text");
writeInt(row, col + 1, r["samples"].get<int>(), "ints");
writeInt(row, col + 2, r["features"].get<int>(), "ints");
writeInt(row, col + 3, r["classes"].get<int>(), "ints");
writeDouble(row, col + 4, r["nodes"].get<float>(), "floats");
writeDouble(row, col + 5, r["leaves"].get<float>(), "floats");
writeDouble(row, col + 6, r["depth"].get<double>(), "floats");
writeDouble(row, col + 7, r["score"].get<double>(), "result");
writeDouble(row, col + 8, r["score_std"].get<double>(), "result");
const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
writeString(row, col + 9, status, "textCentered");
writeDouble(row, col + 10, r["time"].get<double>(), "time");
writeDouble(row, col + 11, r["time_std"].get<double>(), "time");
try {
hyperparameters = r["hyperparameters"].get<string>();
}
catch (const exception& err) {
stringstream oss;
oss << r["hyperparameters"];
hyperparameters = oss.str();
}
if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, col + 12, hyperparameters, "text");
lastResult = r;
totalScore += r["score"].get<double>();
row++;
}
// Set the right column width of hyperparameters with the maximum length
worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL);
// Show totals if only one dataset is present in the result
if (data["results"].size() == 1) {
for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
row++;
col = 1;
writeString(row, col, group, "text");
for (double item : lastResult[group]) {
string style = group.find("scores") != string::npos ? "result" : "time";
writeDouble(row, ++col, item, style);
}
}
// Set with of columns to show those totals completely
worksheet_set_column(worksheet, 1, 1, 12, NULL);
for (int i = 2; i < 7; ++i) {
// doesn't work with from col to col, so...
worksheet_set_column(worksheet, i, i, 15, NULL);
}
} else {
footer(totalScore, row);
}
}
void ReportExcel::showSummary()
{
for (const auto& item : summary) {
worksheet_write_string(worksheet, row + 2, 1, item.first.c_str(), styles["summaryStyle"]);
worksheet_write_number(worksheet, row + 2, 2, item.second, styles["summaryStyle"]);
worksheet_merge_range(worksheet, row + 2, 3, row + 2, 5, meaning.at(item.first).c_str(), styles["summaryStyle"]);
row += 1;
}
}
void ReportExcel::footer(double totalScore, int row)
{
showSummary();
row += 4 + summary.size();
auto score = data["score_name"].get<string>();
if (score == BestScore::scoreName()) {
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestScore::title() + " .:").c_str(), efectiveStyle("text"));
writeDouble(row, 6, totalScore / BestScore::score(), "result");
}
if (!getExistBestFile() && compare) {
worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]);
}
}
}

View File

@@ -1,42 +0,0 @@
#ifndef REPORTEXCEL_H
#define REPORTEXCEL_H
#include<map>
#include "xlsxwriter.h"
#include "ReportBase.h"
#include "Colors.h"
namespace platform {
using namespace std;
const int MAXLL = 128;
class ReportExcel : public ReportBase {
public:
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook);
lxw_workbook* getWorkbook();
private:
void writeString(int row, int col, const string& text, const string& style = "");
void writeInt(int row, int col, const int number, const string& style = "");
void writeDouble(int row, int col, const double number, const string& style = "");
void formatColumns();
void createFormats();
void setProperties();
void createFile();
void closeFile();
lxw_workbook* workbook;
lxw_worksheet* worksheet;
map<string, lxw_format*> styles;
int row;
int normalSize; //font size for report body
uint32_t colorTitle;
uint32_t colorOdd;
uint32_t colorEven;
const string fileName = "some_results.xlsx";
void header() override;
void body() override;
void showSummary() override;
void footer(double totalScore, int row);
void createStyle(const string& name, lxw_format* style, bool odd);
void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const string& name);
};
};
#endif // !REPORTEXCEL_H

View File

@@ -1,51 +0,0 @@
#include <filesystem>
#include <fstream>
#include <sstream>
#include "Result.h"
#include "Colors.h"
#include "BestScore.h"
namespace platform {
Result::Result(const string& path, const string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
if (scoreName == BestScore::scoreName()) {
score /= BestScore::score();
}
title = data["title"];
duration = data["duration"];
model = data["model"];
complete = data["results"].size() > 1;
}
json Result::load() const
{
ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
string Result::to_string() const
{
stringstream oss;
oss << date << " ";
oss << setw(12) << left << model << " ";
oss << setw(11) << left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << setw(1) << " " << completeString << " ";
oss << setw(9) << setprecision(3) << fixed << duration << " ";
oss << setw(50) << left << title << " ";
return oss.str();
}
}

View File

@@ -1,37 +0,0 @@
#ifndef RESULT_H
#define RESULT_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using namespace std;
using json = nlohmann::json;
class Result {
public:
Result(const string& path, const string& filename);
json load() const;
string to_string() const;
string getFilename() const { return filename; };
string getDate() const { return date; };
double getScore() const { return score; };
string getTitle() const { return title; };
double getDuration() const { return duration; };
string getModel() const { return model; };
string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; };
private:
string path;
string filename;
string date;
double score;
string title;
double duration;
string model;
string scoreName;
bool complete;
};
};
#endif

View File

@@ -1,268 +0,0 @@
#include <filesystem>
#include "platformUtils.h"
#include "Results.h"
#include "ReportConsole.h"
#include "ReportExcel.h"
#include "BestScore.h"
#include "Colors.h"
namespace platform {
void Results::load()
{
using std::filesystem::directory_iterator;
for (const auto& file : directory_iterator(path)) {
auto filename = file.path().filename().string();
if (filename.find(".json") != string::npos && filename.find("results_") == 0) {
auto result = Result(path, filename);
bool addResult = true;
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete())
addResult = false;
if (addResult)
files.push_back(result);
}
}
if (max == 0) {
max = files.size();
}
}
void Results::show() const
{
cout << Colors::GREEN() << "Results found: " << files.size() << endl;
cout << "-------------------" << endl;
if (complete) {
cout << Colors::MAGENTA() << "Only listing complete results" << endl;
}
if (partial) {
cout << Colors::MAGENTA() << "Only listing partial results" << endl;
}
auto i = 0;
cout << Colors::GREEN() << " # Date Model Score Name Score C/P Duration Title" << endl;
cout << "=== ========== ============ =========== =========== === ========= =============================================================" << endl;
bool odd = true;
for (const auto& result : files) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << result.to_string() << endl;
if (i == max && max != 0) {
break;
}
odd = !odd;
}
}
int Results::getIndex(const string& intent) const
{
string color;
if (intent == "delete") {
color = Colors::RED();
} else {
color = Colors::YELLOW();
}
cout << color << "Choose result to " << intent << " (cancel=-1): ";
string line;
getline(cin, line);
int index = stoi(line);
if (index >= -1 && index < static_cast<int>(files.size())) {
return index;
}
cout << "Invalid index" << endl;
return -1;
}
void Results::report(const int index, const bool excelReport)
{
cout << Colors::YELLOW() << "Reporting " << files.at(index).getFilename() << endl;
auto data = files.at(index).load();
if (excelReport) {
ReportExcel reporter(data, compare, workbook);
reporter.show();
openExcel = true;
workbook = reporter.getWorkbook();
} else {
ReportConsole reporter(data, compare);
reporter.show();
}
}
void Results::showIndex(const int index, const int idx) const
{
auto data = files.at(index).load();
if (idx < 0 or idx >= static_cast<int>(data["results"].size())) {
cout << "Invalid index" << endl;
return;
}
cout << Colors::YELLOW() << "Showing " << files.at(index).getFilename() << endl;
ReportConsole reporter(data, compare, idx);
reporter.show();
}
void Results::menu()
{
char option;
int index;
bool finished = false;
string color, context;
string filename, line, options = "qldhsre";
while (!finished) {
if (indexList) {
color = Colors::GREEN();
context = " (quit='q', list='l', delete='d', hide='h', sort='s', report='r', excel='e'): ";
options = "qldhsre";
} else {
color = Colors::MAGENTA();
context = " (quit='q', list='l'): ";
options = "ql";
}
cout << Colors::RESET() << color;
cout << "Choose option " << context;
getline(cin, line);
if (line.size() == 0)
continue;
if (options.find(line[0]) != string::npos) {
if (line.size() > 1) {
cout << "Invalid option" << endl;
continue;
}
option = line[0];
} else {
if (all_of(line.begin(), line.end(), ::isdigit)) {
int idx = stoi(line);
if (indexList) {
// The value is about the files list
index = idx;
if (index >= 0 && index < max) {
report(index, false);
indexList = false;
continue;
}
} else {
// The value is about the result showed on screen
showIndex(index, idx);
continue;
}
}
cout << "Invalid option" << endl;
continue;
}
switch (option) {
case 'q':
finished = true;
break;
case 'l':
show();
indexList = true;
break;
case 'd':
index = getIndex("delete");
if (index == -1)
break;
filename = files[index].getFilename();
cout << "Deleting " << filename << endl;
remove((path + "/" + filename).c_str());
files.erase(files.begin() + index);
cout << "File: " + filename + " deleted!" << endl;
show();
indexList = true;
break;
case 'h':
index = getIndex("hide");
if (index == -1)
break;
filename = files[index].getFilename();
cout << "Hiding " << filename << endl;
rename((path + "/" + filename).c_str(), (path + "/." + filename).c_str());
files.erase(files.begin() + index);
show();
menu();
indexList = true;
break;
case 's':
sortList();
indexList = true;
show();
break;
case 'r':
index = getIndex("report");
if (index == -1)
break;
indexList = false;
report(index, false);
break;
case 'e':
index = getIndex("excel");
if (index == -1)
break;
indexList = true;
report(index, true);
break;
default:
cout << "Invalid option" << endl;
}
}
}
void Results::sortList()
{
cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): ";
string line;
char option;
getline(cin, line);
if (line.size() == 0)
return;
if (line.size() > 1) {
cout << "Invalid option" << endl;
return;
}
option = line[0];
switch (option) {
case 'd':
sortDate();
break;
case 's':
sortScore();
break;
case 'u':
sortDuration();
break;
case 'm':
sortModel();
break;
default:
cout << "Invalid option" << endl;
}
}
void Results::sortDate()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDate() > b.getDate();
});
}
void Results::sortModel()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getModel() > b.getModel();
});
}
void Results::sortDuration()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDuration() > b.getDuration();
});
}
void Results::sortScore()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getScore() > b.getScore();
});
}
void Results::manage()
{
if (files.size() == 0) {
cout << "No results found!" << endl;
exit(0);
}
sortDate();
show();
menu();
if (openExcel) {
workbook_close(workbook);
}
cout << Colors::RESET() << "Done!" << endl;
}
}

View File

@@ -1,47 +0,0 @@
#ifndef RESULTS_H
#define RESULTS_H
#include "xlsxwriter.h"
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
#include "Result.h"
namespace platform {
using namespace std;
using json = nlohmann::json;
class Results {
public:
Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) :
path(path), max(max), model(model), scoreName(score), complete(complete), partial(partial), compare(compare)
{
load();
};
void manage();
private:
string path;
int max;
string model;
string scoreName;
bool complete;
bool partial;
bool indexList = true;
bool openExcel = false;
bool compare;
lxw_workbook* workbook = NULL;
vector<Result> files;
void load(); // Loads the list of results
void show() const;
void report(const int index, const bool excelReport);
void showIndex(const int index, const int idx) const;
int getIndex(const string& intent) const;
void menu();
void sortList();
void sortDate();
void sortScore();
void sortModel();
void sortDuration();
};
};
#endif

View File

@@ -1,63 +0,0 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "Paths.h"
#include "BestResults.h"
#include "Colors.h"
using namespace std;
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
argparse::ArgumentParser program("best");
program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
try {
program.parse_args(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
if (model == "" || score == "") {
throw runtime_error("Model and score name must be supplied");
}
}
catch (const exception& err) {
cerr << err.what() << endl;
cerr << program;
exit(1);
}
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
if (!report && !build) {
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
cerr << program;
exit(1);
}
auto results = platform::BestResults(platform::Paths::results(), score, model);
if (build) {
if (model == "any") {
results.buildAll();
} else {
string fileName = results.build();
cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl;
}
}
if (report) {
if (model == "any") {
results.reportAll();
} else {
results.reportSingle();
}
}
return 0;
}

Some files were not shown because too many files have changed in this diff Show More