Compare commits

...

69 Commits

Author SHA1 Message Date
97ca8ac084 Move check valid hyperparameters to Classifier 2023-08-22 22:12:20 +02:00
1c1385b768 Fix maxModels mistake in BoostAODE if !repeatSp
Throw exception if wrong hyperparmeter is supplied
2023-08-22 21:55:17 +02:00
35432b6294 Fix time std was not saved in experiment 2023-08-22 12:30:27 +02:00
c59dd30e53 Complete Excel Report with data 2023-08-22 11:55:15 +02:00
d2da0ddb88 Create ReportExcel eq to ReportConsole 2023-08-21 17:51:49 +02:00
8066701c3c Refactor Report class into ReportBase & ReportCons 2023-08-21 17:16:29 +02:00
0f66ac73d0 Revert "Refactor Report into ReportBase & ReportConsole"
This reverts commit 4370bf51d7.
2023-08-21 17:15:14 +02:00
4370bf51d7 Refactor Report into ReportBase & ReportConsole 2023-08-21 17:14:23 +02:00
2b7353b9e0 Add default sorting by date in manage 2023-08-21 16:30:10 +02:00
b686b3c9c3 Enhance copy in Makefile 2023-08-21 12:18:23 +02:00
2dd04a6c44 enhance saving results and add Makefile copy 2023-08-21 11:57:45 +02:00
1da83662d0 Always save results 2023-08-21 10:55:20 +02:00
3ac9593c65 Fix mistake in sample 2023-08-20 20:36:46 +02:00
6b317accf1 Add hyperparameters and processing order to Boost 2023-08-20 20:31:23 +02:00
4964aab722 Add hyperparameters management in experiments 2023-08-20 17:57:38 +02:00
7a6ec73d63 Merge pull request 'boostAode' (#5) from boostAode into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/5
Implement boostAODE
add list datasets
add manage results
2023-08-20 09:02:07 +00:00
1a534888d6 Fix report format 2023-08-19 23:30:44 +02:00
59ffd179f4 Fix report format 2023-08-19 21:26:48 +02:00
9972738deb Add list datasets and add locale format 2023-08-19 19:05:16 +02:00
bafcb26bb6 Add manage to build target 2023-08-18 13:43:53 +02:00
2d7999d5f2 Add manage to release targets 2023-08-18 13:43:13 +02:00
a6bb22dfb5 Complete first BoostAODE 2023-08-18 11:50:34 +02:00
704dc937be Remove FeatureSel, add SelectKBest to BayesMetrics 2023-08-16 19:05:18 +02:00
a3e665eed6 make weights double 2023-08-16 12:46:09 +02:00
918a7b4180 Remove unneeded output 2023-08-16 12:36:38 +02:00
80b20f35b4 Fix weights mistakes in computation 2023-08-16 12:32:51 +02:00
4d4780c1d5 Add BoostAODE model based on AODE 2023-08-15 16:16:04 +02:00
fa612c531e Complete Adding weights to Models 2023-08-15 15:59:56 +02:00
24b68f9ae2 Add weigths as parameter 2023-08-15 15:04:56 +02:00
a062ebf445 Merge pull request 'reports' (#4) from reports into boostAode
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/4
2023-08-14 16:58:48 +00:00
2a3fc9aa45 Add colors and enhace input control 2023-08-14 17:03:06 +02:00
55d21294d5 Add class Paths and enhance input 2023-08-14 00:40:31 +02:00
3691cb4a61 Add totals and filter by scoreName and model 2023-08-13 18:13:00 +02:00
054567c65a Add sorting capacity 2023-08-13 17:10:18 +02:00
2729b92f06 Summary list 2023-08-13 16:19:17 +02:00
f26ea1f0ac Add weights to BayesMetrics 2023-08-13 12:56:06 +02:00
af0419c9da First approx with const 1 weights 2023-08-13 00:59:02 +02:00
90c92e5c56 Merge pull request 'Add states as result in Proposal methods' (#3) from optimize_memory into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/3
2023-08-12 14:16:55 +00:00
182b52a887 Add states as result in Proposal methods 2023-08-12 16:16:17 +02:00
6679b90a82 Merge pull request 'optimize_memory' (#2) from optimize_memory into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/2
2023-08-12 14:15:03 +00:00
405887f833 Solved Ld poor results 2023-08-12 11:49:18 +02:00
3a85481a5a Redo pass states to Network Fit needed in crossval
fix mistake in headerline (report)
2023-08-12 11:10:53 +02:00
0ad5505c16 Spodeld working with poor accuracy 2023-08-10 02:06:18 +02:00
323444b74a const functions 2023-08-08 01:53:41 +02:00
ef1bffcac3 Fixed normal classifiers 2023-08-07 13:50:11 +02:00
06db8f51ce Refactor library and models to lighten data stored
Refactro Ensemble to inherit from Classifier insted of BaseClassifier
2023-08-07 12:49:37 +02:00
e74565ba01 update clang-tidy 2023-08-07 00:44:12 +02:00
2da0fb5d8f Merge branch 'main' into TANNew 2023-08-06 11:40:10 +02:00
14ea51648a Complete AODELd 2023-08-06 11:31:44 +02:00
9e94f4e140 Rename suffix of proposal classifier to Ld 2023-08-05 23:23:31 +02:00
1d0fd629c9 Add SPODENew to models 2023-08-05 23:11:36 +02:00
506ef34c6f Add report output to main 2023-08-05 20:29:05 +02:00
7f45495837 Refactor New classifiers to extract predict 2023-08-05 18:39:48 +02:00
1a09ccca4c Add KDBNew fix computeCPT error 2023-08-05 14:40:42 +02:00
a1c6ab18f3 TANNew restructured with poor results 2023-08-04 20:11:22 +02:00
64ac8fb4f2 TANNew as a TAN variant working 2023-08-04 19:42:18 +02:00
c568ba111d Add Proposal class 2023-08-04 13:05:12 +02:00
45c1d052ac Compile TANNew with poor accuracy 2023-08-04 01:35:45 +02:00
eb1cec58a3 Complete nxm 2023-08-03 20:22:33 +02:00
f520b40016 Almost complete proposal in TANNew 2023-08-02 02:21:55 +02:00
cdfb45d2cb Add topological order to Network 2023-08-02 00:56:52 +02:00
f63a9a64f9 Update Makefile to add Release & Debug build 2023-08-01 19:02:37 +02:00
285f0938a6 Update mdlp library 2023-08-01 17:33:01 +02:00
8f8f9773ce Make TANNew same as TAN with local discretization 2023-08-01 13:17:12 +02:00
a9ba21560d Add environment platform to experiment result 2023-08-01 10:55:53 +02:00
a18fbe5594 Begin implementation 2023-07-31 19:53:55 +02:00
adf650d257 Max threading 2023-07-31 18:49:18 +02:00
43bb017d5d Fix problem with tensors way 2023-07-30 19:00:02 +02:00
53697648e7 Merge branch 'aftermath' into main 2023-07-30 01:05:31 +02:00
71 changed files with 2060 additions and 431 deletions

View File

@@ -13,5 +13,4 @@ HeaderFilterRegex: 'src/*'
AnalyzeTemporaryDtors: false AnalyzeTemporaryDtors: false
WarningsAsErrors: '' WarningsAsErrors: ''
FormatStyle: file FormatStyle: file
FormatStyleOptions: ''
... ...

38
.vscode/launch.json vendored
View File

@@ -10,12 +10,13 @@
"-d", "-d",
"iris", "iris",
"-m", "-m",
"TAN", "KDB",
"-s",
"271",
"-p", "-p",
"../../data/", "/Users/rmontanana/Code/discretizbench/datasets/",
"--tensors"
], ],
"cwd": "${workspaceFolder}/build/sample/", //"cwd": "${workspaceFolder}/build/sample/",
}, },
{ {
"type": "lldb", "type": "lldb",
@@ -24,17 +25,36 @@
"program": "${workspaceFolder}/build/src/Platform/main", "program": "${workspaceFolder}/build/src/Platform/main",
"args": [ "args": [
"-m", "-m",
"TAN", "BoostAODE",
"-p", "-p",
"/Users/rmontanana/Code/discretizbench/datasets", "/Users/rmontanana/Code/discretizbench/datasets",
"--discretize", "--discretize",
"--stratified", "--stratified",
"--title",
"Debug test",
"-d", "-d",
"ionosphere" "glass",
"--hyperparameters",
"{\"repeatSparent\": true, \"maxModels\": 12}"
], ],
"cwd": "${workspaceFolder}/build/src/Platform", "cwd": "/Users/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "manage",
"program": "${workspaceFolder}/build/src/Platform/manage",
"args": [
"-n",
"20"
],
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "list",
"program": "${workspaceFolder}/build/src/Platform/list",
"args": [],
"cwd": "/Users/rmontanana/Code/discretizbench",
}, },
{ {
"name": "Build & debug active file", "name": "Build & debug active file",

23
.vscode/tasks.json vendored
View File

@@ -32,6 +32,29 @@
], ],
"group": "build", "group": "build",
"detail": "Task generated by Debugger." "detail": "Task generated by Debugger."
},
{
"type": "cppbuild",
"label": "C/C++: g++ build active file",
"command": "/usr/bin/g++",
"args": [
"-fdiagnostics-color=always",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "Task generated by Debugger."
} }
] ]
} }

View File

@@ -7,10 +7,14 @@ project(BayesNet
LANGUAGES CXX LANGUAGES CXX
) )
if (CODE_COVERAGE AND NOT ENABLE_TESTING)
MESSAGE(FATAL_ERROR "Code coverage requires testing enabled")
endif (CODE_COVERAGE AND NOT ENABLE_TESTING)
find_package(Torch REQUIRED) find_package(Torch REQUIRED)
if (POLICY CMP0135) if (POLICY CMP0135)
cmake_policy(SET CMP0135 NEW) cmake_policy(SET CMP0135 NEW)
endif () endif ()
# Global CMake variables # Global CMake variables
@@ -24,26 +28,34 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
# Options # Options
# ------- # -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF) option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" ON) option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" ON) option(CODE_COVERAGE "Collect coverage from test library" OFF)
set(CMAKE_BUILD_TYPE "Debug")
# CMakes modules # CMakes modules
# -------------- # --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule) include(AddGitSubmodule)
include(StaticAnalyzers) # clang-tidy if (CODE_COVERAGE)
include(CodeCoverage) enable_testing()
include(CodeCoverage)
MESSAGE("Code coverage enabled")
set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE)
if (ENABLE_CLANG_TIDY)
include(StaticAnalyzers) # clang-tidy
endif (ENABLE_CLANG_TIDY)
# External libraries - dependencies of BayesNet # External libraries - dependencies of BayesNet
# --------------------------------------------- # ---------------------------------------------
# include(FetchContent) # include(FetchContent)
add_git_submodule("lib/mdlp") add_git_submodule("lib/mdlp")
add_git_submodule("lib/catch2")
add_git_submodule("lib/argparse") add_git_submodule("lib/argparse")
add_git_submodule("lib/json") add_git_submodule("lib/json")
add_git_submodule("lib/openXLSX")
# Subdirectories # Subdirectories
# -------------- # --------------
@@ -59,18 +71,11 @@ file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform
# Testing # Testing
# ------- # -------
if (ENABLE_TESTING) if (ENABLE_TESTING)
MESSAGE("Testing enabled") MESSAGE("Testing enabled")
enable_testing() add_git_submodule("lib/catch2")
if (CODE_COVERAGE)
#include(CodeCoverage)
MESSAGE("Code coverage enabled")
set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE)
#find_package(Catch2 3 REQUIRED)
include(CTest) include(CTest)
#include(Catch)
add_subdirectory(tests) add_subdirectory(tests)
endif (ENABLE_TESTING) endif (ENABLE_TESTING)

View File

@@ -11,19 +11,43 @@ setup: ## Install dependencies for tests and coverage
pip install gcovr; \ pip install gcovr; \
fi fi
dest ?= ../discretizbench
copy: ## Copy binary files to selected folder
@echo "Destination folder: $(dest)"
make build
@echo ">>> Copying files to $(dest)"
@cp build/src/Platform/main $(dest)
@cp build/src/Platform/list $(dest)
@cp build/src/Platform/manage $(dest)
@echo ">>> Done"
dependency: ## Create a dependency graph diagram of the project (build/dependency.png) dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
build: ## Build the project build: ## Build the main and BayesNetSample
@echo ">>> Building BayesNet ..."; cmake --build build -t main -t BayesNetSample -t manage -t list -j 32
clean: ## Clean the debug info
@echo ">>> Cleaning Debug BayesNet ...";
find . -name "*.gcda" -print0 | xargs -0 rm
@echo ">>> Done";
debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet ...";
@if [ -d ./build ]; then rm -rf ./build; fi @if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build; @mkdir build;
cmake -S . -B build; \ cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
cd build; \ cmake --build build -j 32;
make; \
@echo ">>> Done"; @echo ">>> Done";
release: ## Build a Release version of the project
@echo ">>> Building Release BayesNet ...";
@if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
cmake --build build -t main -t BayesNetSample -t manage -t list -j 32;
@echo ">>> Done";
test: ## Run tests test: ## Run tests
@echo "* Running tests..."; @echo "* Running tests...";
find . -name "*.gcda" -print0 | xargs -0 rm find . -name "*.gcda" -print0 | xargs -0 rm

12
TAN_iris.dot Normal file
View File

@@ -0,0 +1,12 @@
digraph BayesNet {
label=<BayesNet >
fontsize=30
fontcolor=blue
labelloc=t
layout=circo
class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ]
class -> sepallength class -> sepalwidth class -> petallength class -> petalwidth petallength [shape=circle]
petallength -> sepallength petalwidth [shape=circle]
sepallength [shape=circle]
sepallength -> sepalwidth sepalwidth [shape=circle]
sepalwidth -> petalwidth }

View File

@@ -1,5 +1,4 @@
filter = src/ filter = src/
exclude = external/ exclude-directories = build/lib/
exclude = tests/
print-summary = yes print-summary = yes
sort-percentage = yes sort-percentage = yes

View File

@@ -1,2 +1 @@
add_library(ArffFiles ArffFiles.cc) add_library(ArffFiles ArffFiles.cc)
#target_link_libraries(BayesNet "${TORCH_LIBRARIES}")

1
lib/openXLSX Submodule

Submodule lib/openXLSX added at b80da42d14

View File

@@ -3,5 +3,6 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,9 +1,9 @@
#include <iostream> #include <iostream>
#include <torch/torch.h> #include <torch/torch.h>
#include <string> #include <string>
#include <thread>
#include <map> #include <map>
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "ArffFiles.h" #include "ArffFiles.h"
#include "BayesMetrics.h" #include "BayesMetrics.h"
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
@@ -42,7 +42,7 @@ bool file_exists(const std::string& name)
} }
pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vector<vector<int>> X, vector<int> y) pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vector<vector<int>> X, vector<int> y)
{ {
vector<vector<int>> Xr; vector<vector<int>> Xr; // nxm
vector<int> yr; vector<int> yr;
for (int col = 0; col < X.size(); ++col) { for (int col = 0; col < X.size(); ++col) {
Xr.push_back(vector<int>()); Xr.push_back(vector<int>());
@@ -96,6 +96,7 @@ int main(int argc, char** argv)
} }
); );
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
@@ -113,7 +114,7 @@ int main(int argc, char** argv)
throw runtime_error("Number of folds must be an integer"); throw runtime_error("Number of folds must be an integer");
}}); }});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors; bool class_last, stratified, tensors, dump_cpt;
string model_name, file_name, path, complete_file_name; string model_name, file_name, path, complete_file_name;
int nFolds, seed; int nFolds, seed;
try { try {
@@ -126,6 +127,7 @@ int main(int argc, char** argv)
tensors = program.get<bool>("tensors"); tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds"); nFolds = program.get<int>("folds");
seed = program.get<int>("seed"); seed = program.get<int>("seed");
dump_cpt = program.get<bool>("dumpcpt");
class_last = datasets[file_name]; class_last = datasets[file_name];
if (!file_exists(complete_file_name)) { if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
@@ -161,13 +163,23 @@ int main(int argc, char** argv)
states[className] = vector<int>(maxes[className]); states[className] = vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name); auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states); clf->fit(Xd, y, features, className, states);
auto score = clf->score(Xd, y); if (dump_cpt) {
cout << "--- CPT Tables ---" << endl;
clf->dump_cpt();
}
auto lines = clf->show(); auto lines = clf->show();
auto graph = clf->graph();
for (auto line : lines) { for (auto line : lines) {
cout << line << endl; cout << line << endl;
} }
cout << "--- Topological Order ---" << endl;
auto order = clf->topological_order();
for (auto name : order) {
cout << name << ", ";
}
cout << "end." << endl;
auto score = clf->score(Xd, y);
cout << "Score: " << score << endl; cout << "Score: " << score << endl;
auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name; auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot"); ofstream file(dot_file + ".dot");
file << graph; file << graph;
@@ -199,6 +211,7 @@ int main(int argc, char** argv)
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest); torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest }); torch::Tensor ytestt = yt.index({ ttest });
clf->fit(Xtraint, ytraint, features, className, states); clf->fit(Xtraint, ytraint, features, className, states);
auto temp = clf->predict(Xtraint);
score_train = clf->score(Xtraint, ytraint); score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt); score_test = clf->score(Xtestt, ytestt);
} else { } else {
@@ -208,6 +221,10 @@ int main(int argc, char** argv)
score_train = clf->score(Xtrain, ytrain); score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest); score_test = clf->score(Xtest, ytest);
} }
if (dump_cpt) {
cout << "--- CPT Tables ---" << endl;
clf->dump_cpt();
}
total_score_train += score_train; total_score_train += score_train;
total_score += score_test; total_score += score_test;
cout << "Score Train: " << score_train << endl; cout << "Score Train: " << score_train << endl;
@@ -216,6 +233,5 @@ int main(int argc, char** argv)
} }
cout << "**********************************************************************************" << endl; cout << "**********************************************************************************" << endl;
cout << "Average Score Train: " << total_score_train / nFolds << endl; cout << "Average Score Train: " << total_score_train / nFolds << endl;
cout << "Average Score Test : " << total_score / nFolds << endl; cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
return 0;
} }

View File

@@ -2,14 +2,16 @@
namespace bayesnet { namespace bayesnet {
AODE::AODE() : Ensemble() {} AODE::AODE() : Ensemble() {}
void AODE::train() void AODE::buildModel(const torch::Tensor& weights)
{ {
models.clear(); models.clear();
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i)); models.push_back(std::make_unique<SPODE>(i));
} }
n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
} }
vector<string> AODE::graph(string title) vector<string> AODE::graph(const string& title) const
{ {
return Ensemble::graph(title); return Ensemble::graph(title);
} }

View File

@@ -5,11 +5,12 @@
namespace bayesnet { namespace bayesnet {
class AODE : public Ensemble { class AODE : public Ensemble {
protected: protected:
void train() override; void buildModel(const torch::Tensor& weights) override;
public: public:
AODE(); AODE();
virtual ~AODE() {}; virtual ~AODE() {};
vector<string> graph(string title = "AODE") override; vector<string> graph(const string& title = "AODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
}; };
} }
#endif #endif

40
src/BayesNet/AODELd.cc Normal file
View File

@@ -0,0 +1,40 @@
#include "AODELd.h"
#include "Models.h"
namespace bayesnet {
using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
Ensemble::fit(dataset, features, className, states);
return *this;
}
void AODELd::buildModel(const torch::Tensor& weights)
{
models.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODELd>(i));
}
n_models = models.size();
}
void AODELd::trainModel(const torch::Tensor& weights)
{
for (const auto& model : models) {
model->fit(Xf, y, features, className, states);
}
}
vector<string> AODELd::graph(const string& name) const
{
return Ensemble::graph(name);
}
}

22
src/BayesNet/AODELd.h Normal file
View File

@@ -0,0 +1,22 @@
#ifndef AODELD_H
#define AODELD_H
#include "Ensemble.h"
#include "Proposal.h"
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
class AODELd : public Ensemble, public Proposal {
protected:
void trainModel(const torch::Tensor& weights) override;
void buildModel(const torch::Tensor& weights) override;
public:
AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) override;
virtual ~AODELd() = default;
vector<string> graph(const string& name = "AODE") const override;
static inline string version() { return "0.0.1"; };
void setHyperparameters(nlohmann::json& hyperparameters) override {};
};
}
#endif // !AODELD_H

View File

@@ -1,23 +1,34 @@
#ifndef BASE_H #ifndef BASE_H
#define BASE_H #define BASE_H
#include <torch/torch.h> #include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <vector> #include <vector>
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
class BaseClassifier { class BaseClassifier {
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
public: public:
// X is nxm vector, y is nx1 vector
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0;
vector<int> virtual predict(vector<vector<int>>& X) = 0; vector<int> virtual predict(vector<vector<int>>& X) = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0; float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes() = 0; int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges() = 0; int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() = 0; int virtual getNumberOfStates() const = 0;
vector<string> virtual show() = 0; vector<string> virtual show() const = 0;
vector<string> virtual graph(string title = "") = 0; vector<string> virtual graph(const string& title = "") const = 0;
virtual ~BaseClassifier() = default;
const string inline getVersion() const { return "0.1.0"; }; const string inline getVersion() const { return "0.1.0"; };
vector<string> virtual topological_order() = 0;
void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;
}; };
} }
#endif #endif

View File

@@ -1,13 +1,15 @@
#include "BayesMetrics.h" #include "BayesMetrics.h"
#include "Mst.h" #include "Mst.h"
namespace bayesnet { namespace bayesnet {
Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates) //samples is nxm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates)
: samples(samples) : samples(samples)
, features(features) , features(features)
, className(className) , className(className)
, classNumStates(classNumStates) , classNumStates(classNumStates)
{ {
} }
//samples is nxm vector used to fit the model
Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates) Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates)
: features(features) : features(features)
, className(className) , className(className)
@@ -15,9 +17,48 @@ namespace bayesnet {
, samples(torch::zeros({ static_cast<int>(vsamples[0].size()), static_cast<int>(vsamples.size() + 1) }, torch::kInt32)) , samples(torch::zeros({ static_cast<int>(vsamples[0].size()), static_cast<int>(vsamples.size() + 1) }, torch::kInt32))
{ {
for (int i = 0; i < vsamples.size(); ++i) { for (int i = 0; i < vsamples.size(); ++i) {
samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt32)); samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32));
} }
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32)); samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
}
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
{
// Return the K Best features
auto n = samples.size(0) - 1;
if (k == 0) {
k = n;
}
// compute scores
scoresKBest.clear();
featuresKBest.clear();
auto label = samples.index({ -1, "..." });
for (int i = 0; i < n; ++i) {
scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
featuresKBest.push_back(i);
}
// sort & reduce scores and features
if (ascending) {
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
{ return scoresKBest[i] < scoresKBest[j]; });
sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
if (k < n) {
for (int i = 0; i < n - k; ++i) {
featuresKBest.erase(featuresKBest.begin());
scoresKBest.erase(scoresKBest.begin());
}
}
} else {
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
{ return scoresKBest[i] > scoresKBest[j]; });
sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
featuresKBest.resize(k);
scoresKBest.resize(k);
}
return featuresKBest;
}
vector<double> Metrics::getScoresKBest() const
{
return scoresKBest;
} }
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source) vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
{ {
@@ -30,28 +71,30 @@ namespace bayesnet {
} }
return result; return result;
} }
torch::Tensor Metrics::conditionalEdge() torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
{ {
auto result = vector<double>(); auto result = vector<double>();
auto source = vector<string>(features); auto source = vector<string>(features);
source.push_back(className); source.push_back(className);
auto combinations = doCombinations(source); auto combinations = doCombinations(source);
double totalWeight = weights.sum().item<double>();
// Compute class prior // Compute class prior
auto margin = torch::zeros({ classNumStates }); auto margin = torch::zeros({ classNumStates }, torch::kFloat);
for (int value = 0; value < classNumStates; ++value) { for (int value = 0; value < classNumStates; ++value) {
auto mask = samples.index({ "...", -1 }) == value; auto mask = samples.index({ -1, "..." }) == value;
margin[value] = mask.sum().item<float>() / samples.sizes()[0]; margin[value] = mask.sum().item<double>() / samples.size(1);
} }
for (auto [first, second] : combinations) { for (auto [first, second] : combinations) {
int index_first = find(features.begin(), features.end(), first) - features.begin(); int index_first = find(features.begin(), features.end(), first) - features.begin();
int index_second = find(features.begin(), features.end(), second) - features.begin(); int index_second = find(features.begin(), features.end(), second) - features.begin();
double accumulated = 0; double accumulated = 0;
for (int value = 0; value < classNumStates; ++value) { for (int value = 0; value < classNumStates; ++value) {
auto mask = samples.index({ "...", -1 }) == value; auto mask = samples.index({ -1, "..." }) == value;
auto first_dataset = samples.index({ mask, index_first }); auto first_dataset = samples.index({ index_first, mask });
auto second_dataset = samples.index({ mask, index_second }); auto second_dataset = samples.index({ index_second, mask });
auto mi = mutualInformation(first_dataset, second_dataset); auto weights_dataset = weights.index({ mask });
auto pb = margin[value].item<float>(); auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
auto pb = margin[value].item<double>();
accumulated += pb * mi; accumulated += pb * mi;
} }
result.push_back(accumulated); result.push_back(accumulated);
@@ -67,31 +110,33 @@ namespace bayesnet {
} }
return matrix; return matrix;
} }
vector<float> Metrics::conditionalEdgeWeights() // To use in Python
vector<float> Metrics::conditionalEdgeWeights(vector<float>& weights_)
{ {
auto matrix = conditionalEdge(); const torch::Tensor weights = torch::tensor(weights_);
auto matrix = conditionalEdge(weights);
std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel()); std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
return v; return v;
} }
double Metrics::entropy(torch::Tensor& feature) double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
{ {
torch::Tensor counts = feature.bincount(); torch::Tensor counts = feature.bincount(weights);
int totalWeight = counts.sum().item<int>(); double totalWeight = counts.sum().item<double>();
torch::Tensor probs = counts.to(torch::kFloat) / totalWeight; torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
torch::Tensor logProbs = torch::log(probs); torch::Tensor logProbs = torch::log(probs);
torch::Tensor entropy = -probs * logProbs; torch::Tensor entropy = -probs * logProbs;
return entropy.nansum().item<double>(); return entropy.nansum().item<double>();
} }
// H(Y|X) = sum_{x in X} p(x) H(Y|X=x) // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
double Metrics::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature) double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{ {
int numSamples = firstFeature.sizes()[0]; int numSamples = firstFeature.sizes()[0];
torch::Tensor featureCounts = secondFeature.bincount(); torch::Tensor featureCounts = secondFeature.bincount(weights);
unordered_map<int, unordered_map<int, double>> jointCounts; unordered_map<int, unordered_map<int, double>> jointCounts;
double totalWeight = 0; double totalWeight = 0;
for (auto i = 0; i < numSamples; i++) { for (auto i = 0; i < numSamples; i++) {
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1; jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
totalWeight += 1; totalWeight += weights[i].item<float>();
} }
if (totalWeight == 0) if (totalWeight == 0)
return 0; return 0;
@@ -112,16 +157,16 @@ namespace bayesnet {
return entropyValue; return entropyValue;
} }
// I(X;Y) = H(Y) - H(Y|X) // I(X;Y) = H(Y) - H(Y|X)
double Metrics::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature) double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{ {
return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature); return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
} }
/* /*
Compute the maximum spanning tree considering the weights as distances Compute the maximum spanning tree considering the weights as distances
and the indices of the weights as nodes of this square matrix using and the indices of the weights as nodes of this square matrix using
Kruskal algorithm Kruskal algorithm
*/ */
vector<pair<int, int>> Metrics::maximumSpanningTree(vector<string> features, Tensor& weights, int root) vector<pair<int, int>> Metrics::maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root)
{ {
auto mst = MST(features, weights, root); auto mst = MST(features, weights, root);
return mst.maximumSpanningTree(); return mst.maximumSpanningTree();

View File

@@ -8,21 +8,25 @@ namespace bayesnet {
using namespace torch; using namespace torch;
class Metrics { class Metrics {
private: private:
Tensor samples; Tensor samples; // nxm tensor used to fit the model
vector<string> features; vector<string> features;
string className; string className;
int classNumStates = 0; int classNumStates = 0;
vector<double> scoresKBest;
vector<int> featuresKBest; // sorted indices of the features
double entropy(const Tensor& feature, const Tensor& weights);
double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<pair<string, string>> doCombinations(const vector<string>&);
public: public:
Metrics() = default; Metrics() = default;
Metrics(Tensor&, vector<string>&, string&, int); Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int); Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates);
double entropy(Tensor&); vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending=false, unsigned k = 0);
double conditionalEntropy(Tensor&, Tensor&); vector<double> getScoresKBest() const;
double mutualInformation(Tensor&, Tensor&); double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<float> conditionalEdgeWeights(); vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python
Tensor conditionalEdge(); Tensor conditionalEdge(const torch::Tensor& weights);
vector<pair<string, string>> doCombinations(const vector<string>&); vector<pair<int, int>> maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root);
vector<pair<int, int>> maximumSpanningTree(vector<string> features, Tensor& weights, int root);
}; };
} }
#endif #endif

91
src/BayesNet/BoostAODE.cc Normal file
View File

@@ -0,0 +1,91 @@
#include "BoostAODE.h"
#include <set>
#include "BayesMetrics.h"
namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() {}
void BoostAODE::buildModel(const torch::Tensor& weights)
{
// Models shall be built in trainModel
}
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
}
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
}
if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"];
}
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
models.clear();
n_models = 0;
if (maxModels == 0)
maxModels = .1 * n > 10 ? .1 * n : n;
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
auto y_ = dataset.index({ -1, "..." });
bool exitCondition = false;
unordered_set<int> featuresUsed;
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == maxModels
int numClasses = states[className].size();
while (!exitCondition) {
// Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
unique_ptr<Classifier> model;
auto feature = featureSelection[0];
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
bool found = false;
for (auto feat : featureSelection) {
if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
continue;
}
found = true;
feature = feat;
break;
}
if (!found) {
exitCondition = true;
continue;
}
}
featuresUsed.insert(feature);
model = std::make_unique<SPODE>(feature);
n_models++;
model->fit(dataset, features, className, states, weights_);
auto ypred = model->predict(X_);
// Step 3.1: Compute the classifier amout of say
auto mask_wrong = ypred != y_;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double wrongWeights = masked_weights.sum().item<double>();
double significance = wrongWeights == 0 ? 1 : 0.5 * log((1 - wrongWeights) / wrongWeights);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights_ += mask_wrong.to(weights_.dtype()) * exp(significance) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
weights_ = weights_ / totalWeights;
// Step 3.4: Store classifier and its accuracy to weigh its future vote
models.push_back(std::move(model));
significanceModels.push_back(significance);
exitCondition = n_models == maxModels && repeatSparent;
}
if (featuresUsed.size() != features.size()) {
cout << "Warning: BoostAODE did not use all the features" << endl;
}
weights.copy_(weights_);
}
vector<string> BoostAODE::graph(const string& title) const
{
return Ensemble::graph(title);
}
}

21
src/BayesNet/BoostAODE.h Normal file
View File

@@ -0,0 +1,21 @@
#ifndef BOOSTAODE_H
#define BOOSTAODE_H
#include "Ensemble.h"
#include "SPODE.h"
namespace bayesnet {
class BoostAODE : public Ensemble {
public:
BoostAODE();
virtual ~BoostAODE() {};
vector<string> graph(const string& title = "BoostAODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override;
private:
bool repeatSparent=false;
int maxModels=0;
bool ascending=false; //Process KBest features ascending or descending order
};
}
#endif

View File

@@ -1,2 +1,9 @@
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc Mst.cc) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
target_link_libraries(BayesNet "${TORCH_LIBRARIES}") include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -5,56 +5,74 @@ namespace bayesnet {
using namespace torch; using namespace torch;
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
Classifier& Classifier::build(vector<string>& features, string className, map<string, vector<int>>& states) Classifier& Classifier::build(vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights)
{ {
dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1);
this->features = features; this->features = features;
this->className = className; this->className = className;
this->states = states; this->states = states;
m = dataset.size(1);
n = dataset.size(0) - 1;
checkFitParameters(); checkFitParameters();
auto n_classes = states[className].size(); auto n_classes = states[className].size();
metrics = Metrics(dataset, features, className, n_classes); metrics = Metrics(dataset, features, className, n_classes);
train(); model.initialize();
if (Xv == vector<vector<int>>()) { buildModel(weights);
// fit with tensors trainModel(weights);
model.fit(X, y, features, className);
} else {
// fit with vectors
model.fit(Xv, yv, features, className);
}
fitted = true; fitted = true;
return *this; return *this;
} }
void Classifier::buildDataset(Tensor& ytmp)
{
try {
auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
dataset = torch::cat({ dataset, yresized }, 0);
}
catch (const std::exception& e) {
std::cerr << e.what() << '\n';
cout << "X dimensions: " << dataset.sizes() << "\n";
cout << "y dimensions: " << ytmp.sizes() << "\n";
exit(1);
}
}
void Classifier::trainModel(const torch::Tensor& weights)
{
model.fit(dataset, weights, features, className, states);
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
{ {
this->X = torch::transpose(X, 0, 1); dataset = X;
this->y = y; buildDataset(y);
Xv = vector<vector<int>>(); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)); return build(features, className, states, weights);
return build(features, className, states);
} }
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{ {
this->X = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, kInt32); dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, kInt32);
Xv = X;
for (int i = 0; i < X.size(); ++i) { for (int i = 0; i < X.size(); ++i) {
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32)); dataset.index_put_({ i, "..." }, torch::tensor(X[i], kInt32));
} }
this->y = torch::tensor(y, kInt32); auto ytmp = torch::tensor(y, kInt32);
yv = y; buildDataset(ytmp);
return build(features, className, states); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights)
{
this->dataset = dataset;
return build(features, className, states, weights);
} }
void Classifier::checkFitParameters() void Classifier::checkFitParameters()
{ {
auto sizes = X.sizes();
m = sizes[0];
n = sizes[1];
if (m != y.size(0)) {
throw invalid_argument("X and y must have the same number of samples");
}
if (n != features.size()) { if (n != features.size()) {
throw invalid_argument("X and features must have the same number of features"); throw invalid_argument("X " + to_string(n) + " and features " + to_string(features.size()) + " must have the same number of features");
} }
if (states.find(className) == states.end()) { if (states.find(className) == states.end()) {
throw invalid_argument("className not found in states"); throw invalid_argument("className not found in states");
@@ -65,23 +83,12 @@ namespace bayesnet {
} }
} }
} }
Tensor Classifier::predict(Tensor& X) Tensor Classifier::predict(Tensor& X)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("Classifier has not been fitted"); throw logic_error("Classifier has not been fitted");
} }
auto m_ = X.size(0); return model.predict(X);
auto n_ = X.size(1);
//auto Xt = torch::transpose(X, 0, 1);
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
auto temp = X.index({ "...", i });
Xd[i] = vector<int>(temp.data_ptr<int>(), temp.data_ptr<int>() + temp.numel());
}
auto yp = model.predict(Xd);
auto ypred = torch::tensor(yp, torch::kInt32);
return ypred;
} }
vector<int> Classifier::predict(vector<vector<int>>& X) vector<int> Classifier::predict(vector<vector<int>>& X)
{ {
@@ -102,8 +109,7 @@ namespace bayesnet {
if (!fitted) { if (!fitted) {
throw logic_error("Classifier has not been fitted"); throw logic_error("Classifier has not been fitted");
} }
auto Xt = torch::transpose(X, 0, 1); Tensor y_pred = predict(X);
Tensor y_pred = predict(Xt);
return (y_pred == y).sum().item<float>() / y.size(0); return (y_pred == y).sum().item<float>() / y.size(0);
} }
float Classifier::score(vector<vector<int>>& X, vector<int>& y) float Classifier::score(vector<vector<int>>& X, vector<int>& y)
@@ -111,37 +117,47 @@ namespace bayesnet {
if (!fitted) { if (!fitted) {
throw logic_error("Classifier has not been fitted"); throw logic_error("Classifier has not been fitted");
} }
auto m_ = X[0].size(); return model.score(X, y);
auto n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
}
return model.score(Xd, y);
} }
vector<string> Classifier::show() vector<string> Classifier::show() const
{ {
return model.show(); return model.show();
} }
void Classifier::addNodes() void Classifier::addNodes()
{ {
// Add all nodes to the network // Add all nodes to the network
for (auto feature : features) { for (const auto& feature : features) {
model.addNode(feature, states[feature].size()); model.addNode(feature);
} }
model.addNode(className, states[className].size()); model.addNode(className);
} }
int Classifier::getNumberOfNodes() int Classifier::getNumberOfNodes() const
{ {
// Features does not include class // Features does not include class
return fitted ? model.getFeatures().size() + 1 : 0; return fitted ? model.getFeatures().size() + 1 : 0;
} }
int Classifier::getNumberOfEdges() int Classifier::getNumberOfEdges() const
{ {
return fitted ? model.getEdges().size() : 0; return fitted ? model.getNumEdges() : 0;
} }
int Classifier::getNumberOfStates() int Classifier::getNumberOfStates() const
{ {
return fitted ? model.getStates() : 0; return fitted ? model.getStates() : 0;
} }
vector<string> Classifier::topological_order()
{
return model.topological_sort();
}
void Classifier::dump_cpt() const
{
model.dump_cpt();
}
void Classifier::checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
} }

View File

@@ -10,36 +10,39 @@ using namespace torch;
namespace bayesnet { namespace bayesnet {
class Classifier : public BaseClassifier { class Classifier : public BaseClassifier {
private: private:
bool fitted; void buildDataset(torch::Tensor& y);
Classifier& build(vector<string>& features, string className, map<string, vector<int>>& states); Classifier& build(vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights);
protected: protected:
Network model; bool fitted;
int m, n; // m: number of samples, n: number of features int m, n; // m: number of samples, n: number of features
Tensor X; Network model;
vector<vector<int>> Xv;
Tensor y;
vector<int> yv;
Tensor dataset;
Metrics metrics; Metrics metrics;
vector<string> features; vector<string> features;
string className; string className;
map<string, vector<int>> states; map<string, vector<int>> states;
Tensor dataset; // (n+1)xm tensor
void checkFitParameters(); void checkFitParameters();
virtual void train() = 0; virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override;
void checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters);
public: public:
Classifier(Network model); Classifier(Network model);
virtual ~Classifier() = default; virtual ~Classifier() = default;
Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override; Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights) override;
void addNodes(); void addNodes();
int getNumberOfNodes() override; int getNumberOfNodes() const override;
int getNumberOfEdges() override; int getNumberOfEdges() const override;
int getNumberOfStates() override; int getNumberOfStates() const override;
Tensor predict(Tensor& X); Tensor predict(Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override; vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override; float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override; float score(vector<vector<int>>& X, vector<int>& y) override;
vector<string> show() override; vector<string> show() const override;
vector<string> topological_order() override;
void dump_cpt() const override;
}; };
} }
#endif #endif

View File

@@ -3,69 +3,52 @@
namespace bayesnet { namespace bayesnet {
using namespace torch; using namespace torch;
Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()), fitted(false) {} Ensemble::Ensemble() : Classifier(Network()) {}
Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states)
void Ensemble::trainModel(const torch::Tensor& weights)
{ {
dataset = cat({ X, y.view({y.size(0), 1}) }, 1);
this->features = features;
this->className = className;
this->states = states;
auto n_classes = states[className].size();
metrics = Metrics(dataset, features, className, n_classes);
// Build models
train();
// Train models
n_models = models.size(); n_models = models.size();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
models[i]->fit(Xv, yv, features, className, states); // fit with vectors
models[i]->fit(dataset, features, className, states);
} }
fitted = true;
return *this;
}
Ensemble& Ensemble::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = X;
this->y = y;
Xv = vector<vector<int>>();
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
return build(features, className, states);
}
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, kInt32);
Xv = X;
for (int i = 0; i < X.size(); ++i) {
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32));
}
this->y = torch::tensor(y, kInt32);
yv = y;
return build(features, className, states);
}
Tensor Ensemble::predict(Tensor& X)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
}
Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt32);
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, models[i]->predict(X));
}
return torch::tensor(voting(y_pred));
} }
vector<int> Ensemble::voting(Tensor& y_pred) vector<int> Ensemble::voting(Tensor& y_pred)
{ {
auto y_pred_ = y_pred.accessor<int, 2>(); auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final; vector<int> y_pred_final;
for (int i = 0; i < y_pred.size(0); ++i) { for (int i = 0; i < y_pred.size(0); ++i) {
vector<float> votes(states[className].size(), 0); vector<double> votes(y_pred.size(1), 0);
for (int j = 0; j < y_pred.size(1); ++j) { for (int j = 0; j < y_pred.size(1); ++j) {
votes[y_pred_[i][j]] += 1; votes[y_pred_[i][j]] += significanceModels[j];
} }
// argsort in descending order
auto indices = argsort(votes); auto indices = argsort(votes);
y_pred_final.push_back(indices[0]); y_pred_final.push_back(indices[0]);
} }
return y_pred_final; return y_pred_final;
} }
Tensor Ensemble::predict(Tensor& X)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
}
Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32);
//Create a threadpool
auto threads{ vector<thread>() };
mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(thread([&, i]() {
auto ypredict = models[i]->predict(X);
lock_guard<mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict);
}));
}
for (auto& thread : threads) {
thread.join();
}
return torch::tensor(voting(y_pred));
}
vector<int> Ensemble::predict(vector<vector<int>>& X) vector<int> Ensemble::predict(vector<vector<int>>& X)
{ {
if (!fitted) { if (!fitted) {
@@ -110,9 +93,8 @@ namespace bayesnet {
} }
} }
return (double)correct / y_pred.size(); return (double)correct / y_pred.size();
} }
vector<string> Ensemble::show() vector<string> Ensemble::show() const
{ {
auto result = vector<string>(); auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
@@ -121,7 +103,7 @@ namespace bayesnet {
} }
return result; return result;
} }
vector<string> Ensemble::graph(string title) vector<string> Ensemble::graph(const string& title) const
{ {
auto result = vector<string>(); auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
@@ -130,7 +112,7 @@ namespace bayesnet {
} }
return result; return result;
} }
int Ensemble::getNumberOfNodes() int Ensemble::getNumberOfNodes() const
{ {
int nodes = 0; int nodes = 0;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
@@ -138,7 +120,7 @@ namespace bayesnet {
} }
return nodes; return nodes;
} }
int Ensemble::getNumberOfEdges() int Ensemble::getNumberOfEdges() const
{ {
int edges = 0; int edges = 0;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
@@ -146,7 +128,7 @@ namespace bayesnet {
} }
return edges; return edges;
} }
int Ensemble::getNumberOfStates() int Ensemble::getNumberOfStates() const
{ {
int nstates = 0; int nstates = 0;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {

View File

@@ -8,39 +8,34 @@ using namespace std;
using namespace torch; using namespace torch;
namespace bayesnet { namespace bayesnet {
class Ensemble : public BaseClassifier { class Ensemble : public Classifier {
private: private:
bool fitted;
long n_models;
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states); Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
protected: protected:
unsigned n_models;
vector<unique_ptr<Classifier>> models; vector<unique_ptr<Classifier>> models;
int m, n; // m: number of samples, n: number of features vector<double> significanceModels;
Tensor X; void trainModel(const torch::Tensor& weights) override;
vector<vector<int>> Xv;
Tensor y;
vector<int> yv;
Tensor dataset;
Metrics metrics;
vector<string> features;
string className;
map<string, vector<int>> states;
void virtual train() = 0;
vector<int> voting(Tensor& y_pred); vector<int> voting(Tensor& y_pred);
public: public:
Ensemble(); Ensemble();
virtual ~Ensemble() = default; virtual ~Ensemble() = default;
Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override; Tensor predict(Tensor& X) override;
Ensemble& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Tensor predict(Tensor& X);
vector<int> predict(vector<vector<int>>& X) override; vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override; float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override; float score(vector<vector<int>>& X, vector<int>& y) override;
int getNumberOfNodes() override; int getNumberOfNodes() const override;
int getNumberOfEdges() override; int getNumberOfEdges() const override;
int getNumberOfStates() override; int getNumberOfStates() const override;
vector<string> show() override; vector<string> show() const override;
vector<string> graph(string title) override; vector<string> graph(const string& title) const override;
vector<string> topological_order() override
{
return vector<string>();
}
void dump_cpt() const override
{
}
}; };
} }
#endif #endif

View File

@@ -4,7 +4,7 @@ namespace bayesnet {
using namespace torch; using namespace torch;
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {} KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::train() void KDB::buildModel(const torch::Tensor& weights)
{ {
/* /*
1. For each feature Xi, compute mutual information, I(X;C), 1. For each feature Xi, compute mutual information, I(X;C),
@@ -27,25 +27,25 @@ namespace bayesnet {
*/ */
// 1. For each feature Xi, compute mutual information, I(X;C), // 1. For each feature Xi, compute mutual information, I(X;C),
// where C is the class. // where C is the class.
vector <float> mi; addNodes();
const Tensor& y = dataset.index({ -1, "..." });
vector<double> mi;
for (auto i = 0; i < features.size(); i++) { for (auto i = 0; i < features.size(); i++) {
Tensor firstFeature = X.index({ "...", i }); Tensor firstFeature = dataset.index({ i, "..." });
mi.push_back(metrics.mutualInformation(firstFeature, y)); mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
} }
// 2. Compute class conditional mutual information I(Xi;XjIC), f or each // 2. Compute class conditional mutual information I(Xi;XjIC), f or each
auto conditionalEdgeWeights = metrics.conditionalEdge(); auto conditionalEdgeWeights = metrics.conditionalEdge(weights);
// 3. Let the used variable list, S, be empty. // 3. Let the used variable list, S, be empty.
vector<int> S; vector<int> S;
// 4. Let the DAG network being constructed, BN, begin with a single // 4. Let the DAG network being constructed, BN, begin with a single
// class node, C. // class node, C.
model.addNode(className, states[className].size());
// 5. Repeat until S includes all domain features // 5. Repeat until S includes all domain features
// 5.1. Select feature Xmax which is not in S and has the largest value // 5.1. Select feature Xmax which is not in S and has the largest value
// I(Xmax;C). // I(Xmax;C).
auto order = argsort(mi); auto order = argsort(mi);
for (auto idx : order) { for (auto idx : order) {
// 5.2. Add a node to BN representing Xmax. // 5.2. Add a node to BN representing Xmax.
model.addNode(features[idx], states[features[idx]].size());
// 5.3. Add an arc from C to Xmax in BN. // 5.3. Add an arc from C to Xmax in BN.
model.addEdge(className, features[idx]); model.addEdge(className, features[idx]);
// 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with // 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
@@ -79,11 +79,12 @@ namespace bayesnet {
exit_cond = num == n_edges || candidates.size(0) == 0; exit_cond = num == n_edges || candidates.size(0) == 0;
} }
} }
vector<string> KDB::graph(string title) vector<string> KDB::graph(const string& title) const
{ {
string header{ title };
if (title == "KDB") { if (title == "KDB") {
title += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")"; header += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")";
} }
return model.graph(title); return model.graph(header);
} }
} }

View File

@@ -1,5 +1,6 @@
#ifndef KDB_H #ifndef KDB_H
#define KDB_H #define KDB_H
#include <torch/torch.h>
#include "Classifier.h" #include "Classifier.h"
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
@@ -11,11 +12,12 @@ namespace bayesnet {
float theta; float theta;
void add_m_edges(int idx, vector<int>& S, Tensor& weights); void add_m_edges(int idx, vector<int>& S, Tensor& weights);
protected: protected:
void train() override; void buildModel(const torch::Tensor& weights) override;
public: public:
explicit KDB(int k, float theta = 0.03); explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {}; virtual ~KDB() {};
vector<string> graph(string name = "KDB") override; void setHyperparameters(nlohmann::json& hyperparameters) override {};
vector<string> graph(const string& name = "KDB") const override;
}; };
} }
#endif #endif

30
src/BayesNet/KDBLd.cc Normal file
View File

@@ -0,0 +1,30 @@
#include "KDBLd.h"
namespace bayesnet {
using namespace std;
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
KDB::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor KDBLd::predict(Tensor& X)
{
auto Xt = prepareX(X);
return KDB::predict(Xt);
}
vector<string> KDBLd::graph(const string& name) const
{
return KDB::graph(name);
}
}

20
src/BayesNet/KDBLd.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef KDBLD_H
#define KDBLD_H
#include "KDB.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class KDBLd : public KDB, public Proposal {
private:
public:
explicit KDBLd(int k);
virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") const override;
Tensor predict(Tensor& X) override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
static inline string version() { return "0.0.1"; };
};
}
#endif // !KDBLD_H

View File

@@ -94,7 +94,7 @@ namespace bayesnet {
return result; return result;
} }
MST::MST(vector<string>& features, Tensor& weights, int root) : features(features), weights(weights), root(root) {} MST::MST(const vector<string>& features, const Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
vector<pair<int, int>> MST::maximumSpanningTree() vector<pair<int, int>> MST::maximumSpanningTree()
{ {
auto num_features = features.size(); auto num_features = features.size();

View File

@@ -13,7 +13,7 @@ namespace bayesnet {
int root = 0; int root = 0;
public: public:
MST() = default; MST() = default;
MST(vector<string>& features, Tensor& weights, int root); MST(const vector<string>& features, const Tensor& weights, const int root);
vector<pair<int, int>> maximumSpanningTree(); vector<pair<int, int>> maximumSpanningTree();
}; };
class Graph { class Graph {

View File

@@ -3,15 +3,24 @@
#include "Network.h" #include "Network.h"
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
Network::Network() : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(0.8), fitted(false) {} Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false) {}
Network::Network(float maxT) : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads()), fitted(other.fitted) getmaxThreads()), fitted(other.fitted)
{ {
for (const auto& pair : other.nodes) { for (const auto& pair : other.nodes) {
nodes[pair.first] = std::make_unique<Node>(*pair.second); nodes[pair.first] = std::make_unique<Node>(*pair.second);
} }
} }
void Network::initialize()
{
features = vector<string>();
className = "";
classNumStates = 0;
fitted = false;
nodes.clear();
samples = torch::Tensor();
}
float Network::getmaxThreads() float Network::getmaxThreads()
{ {
return maxThreads; return maxThreads;
@@ -20,28 +29,28 @@ namespace bayesnet {
{ {
return samples; return samples;
} }
void Network::addNode(const string& name, int numStates) void Network::addNode(const string& name)
{ {
if (name == "") {
throw invalid_argument("Node name cannot be empty");
}
if (nodes.find(name) != nodes.end()) {
return;
}
if (find(features.begin(), features.end(), name) == features.end()) { if (find(features.begin(), features.end(), name) == features.end()) {
features.push_back(name); features.push_back(name);
} }
if (nodes.find(name) != nodes.end()) { nodes[name] = std::make_unique<Node>(name);
// if node exists update its number of states and remove parents, children and CPT
nodes[name]->clear();
nodes[name]->setNumStates(numStates);
return;
}
nodes[name] = std::make_unique<Node>(name, numStates);
} }
vector<string> Network::getFeatures() vector<string> Network::getFeatures() const
{ {
return features; return features;
} }
int Network::getClassNumStates() int Network::getClassNumStates() const
{ {
return classNumStates; return classNumStates;
} }
int Network::getStates() int Network::getStates() const
{ {
int result = 0; int result = 0;
for (auto& node : nodes) { for (auto& node : nodes) {
@@ -49,7 +58,7 @@ namespace bayesnet {
} }
return result; return result;
} }
string Network::getClassName() string Network::getClassName() const
{ {
return className; return className;
} }
@@ -94,45 +103,77 @@ namespace bayesnet {
{ {
return nodes; return nodes;
} }
void Network::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& featureNames, const string& className) void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights)
{ {
features = featureNames; if (weights.size(0) != n_samples) {
this->className = className; throw invalid_argument("Weights (" + to_string(weights.size(0)) + ") must have the same number of elements as samples (" + to_string(n_samples) + ") in Network::fit");
dataset.clear(); }
// Specific part if (n_samples != n_samples_y) {
classNumStates = torch::max(y).item<int>() + 1; throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")");
samples = torch::cat({ X, y.view({ y.size(0), 1 }) }, 1); }
for (int i = 0; i < featureNames.size(); ++i) { if (n_features != featureNames.size()) {
auto column = torch::flatten(X.index({ "...", i })); throw invalid_argument("X and features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(featureNames.size()) + ")");
auto k = vector<int>(); }
for (auto z = 0; z < X.size(0); ++z) { if (n_features != features.size() - 1) {
k.push_back(column[z].item<int>()); throw invalid_argument("X and local features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(features.size() - 1) + ")");
}
if (find(features.begin(), features.end(), className) == features.end()) {
throw invalid_argument("className not found in Network::features");
}
for (auto& feature : featureNames) {
if (find(features.begin(), features.end(), feature) == features.end()) {
throw invalid_argument("Feature " + feature + " not found in Network::features");
}
if (states.find(feature) == states.end()) {
throw invalid_argument("Feature " + feature + " not found in states");
} }
dataset[featureNames[i]] = k;
} }
dataset[className] = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
completeFit();
} }
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className) void Network::setStates(const map<string, vector<int>>& states)
{ {
features = featureNames; // Set states to every Node in the network
for (int i = 0; i < features.size(); ++i) {
nodes[features[i]]->setNumStates(states.at(features[i]).size());
}
classNumStates = nodes[className]->getNumStates();
}
// X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
{
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className; this->className = className;
dataset.clear(); Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
// Specific part samples = torch::cat({ X , ytmp }, 0);
classNumStates = *max_element(labels.begin(), labels.end()) + 1;
// Build dataset & tensor of samples
samples = torch::zeros({ static_cast<int>(input_data[0].size()), static_cast<int>(input_data.size() + 1) }, torch::kInt32);
for (int i = 0; i < featureNames.size(); ++i) { for (int i = 0; i < featureNames.size(); ++i) {
dataset[featureNames[i]] = input_data[i]; auto row_feature = X.index({ i, "..." });
samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt32));
} }
dataset[className] = labels; completeFit(states, weights);
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32));
completeFit();
} }
void Network::completeFit() void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
{ {
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className;
this->samples = samples;
completeFit(states, weights);
}
// input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights_, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
{
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
this->className = className;
// Build tensor of samples (nxm) (n+1 because of the class)
samples = torch::zeros({ static_cast<int>(input_data.size() + 1), static_cast<int>(input_data[0].size()) }, torch::kInt32);
for (int i = 0; i < featureNames.size(); ++i) {
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
}
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(states, weights);
}
void Network::completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights)
{
setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads); int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
if (maxThreadsRunning < 1) { if (maxThreadsRunning < 1) {
maxThreadsRunning = 1; maxThreadsRunning = 1;
@@ -145,7 +186,7 @@ namespace bayesnet {
while (nextNodeIndex < nodes.size()) { while (nextNodeIndex < nodes.size()) {
unique_lock<mutex> lock(mtx); unique_lock<mutex> lock(mtx);
cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; }); cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() { threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads, &weights]() {
while (true) { while (true) {
unique_lock<mutex> lock(mtx); unique_lock<mutex> lock(mtx);
if (nextNodeIndex >= nodes.size()) { if (nextNodeIndex >= nodes.size()) {
@@ -154,7 +195,7 @@ namespace bayesnet {
auto& pair = *std::next(nodes.begin(), nextNodeIndex); auto& pair = *std::next(nodes.begin(), nextNodeIndex);
++nextNodeIndex; ++nextNodeIndex;
lock.unlock(); lock.unlock();
pair.second->computeCPT(dataset, laplaceSmoothing); pair.second->computeCPT(samples, features, laplaceSmoothing, weights);
lock.lock(); lock.lock();
nodes[pair.first] = std::move(pair.second); nodes[pair.first] = std::move(pair.second);
lock.unlock(); lock.unlock();
@@ -170,7 +211,39 @@ namespace bayesnet {
} }
fitted = true; fitted = true;
} }
torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict()");
}
torch::Tensor result;
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
for (int i = 0; i < samples.size(1); ++i) {
const Tensor sample = samples.index({ "...", i });
auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64);
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
result.index_put_({ i, "..." }, temp);
}
if (proba)
return result;
else
return result.argmax(1);
}
// Return mxn tensor of probabilities
Tensor Network::predict_proba(const Tensor& samples)
{
return predict_tensor(samples, true);
}
// Return mxn tensor of probabilities
Tensor Network::predict(const Tensor& samples)
{
return predict_tensor(samples, false);
}
// Return mx1 vector of predictions
// tsamples is nxm vector of samples
vector<int> Network::predict(const vector<vector<int>>& tsamples) vector<int> Network::predict(const vector<vector<int>>& tsamples)
{ {
if (!fitted) { if (!fitted) {
@@ -191,6 +264,7 @@ namespace bayesnet {
} }
return predictions; return predictions;
} }
// Return mxn vector of probabilities
vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples) vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
{ {
if (!fitted) { if (!fitted) {
@@ -218,12 +292,13 @@ namespace bayesnet {
} }
return (double)correct / y_pred.size(); return (double)correct / y_pred.size();
} }
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const vector<int>& sample) vector<double> Network::predict_sample(const vector<int>& sample)
{ {
// Ensure the sample size is equal to the number of features // Ensure the sample size is equal to the number of features
if (sample.size() != features.size()) { if (sample.size() != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size()) + throw invalid_argument("Sample size (" + to_string(sample.size()) +
") does not match the number of features (" + to_string(features.size()) + ")"); ") does not match the number of features (" + to_string(features.size() - 1) + ")");
} }
map<string, int> evidence; map<string, int> evidence;
for (int i = 0; i < sample.size(); ++i) { for (int i = 0; i < sample.size(); ++i) {
@@ -231,6 +306,20 @@ namespace bayesnet {
} }
return exactInference(evidence); return exactInference(evidence);
} }
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const Tensor& sample)
{
// Ensure the sample size is equal to the number of features
if (sample.size(0) != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size(0)) +
") does not match the number of features (" + to_string(features.size() - 1) + ")");
}
map<string, int> evidence;
for (int i = 0; i < sample.size(0); ++i) {
evidence[features[i]] = sample[i].item<int>();
}
return exactInference(evidence);
}
double Network::computeFactor(map<string, int>& completeEvidence) double Network::computeFactor(map<string, int>& completeEvidence)
{ {
double result = 1.0; double result = 1.0;
@@ -256,13 +345,12 @@ namespace bayesnet {
for (auto& thread : threads) { for (auto& thread : threads) {
thread.join(); thread.join();
} }
// Normalize result // Normalize result
double sum = accumulate(result.begin(), result.end(), 0.0); double sum = accumulate(result.begin(), result.end(), 0.0);
transform(result.begin(), result.end(), result.begin(), [sum](double& value) { return value / sum; }); transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result; return result;
} }
vector<string> Network::show() vector<string> Network::show() const
{ {
vector<string> result; vector<string> result;
// Draw the network // Draw the network
@@ -275,7 +363,7 @@ namespace bayesnet {
} }
return result; return result;
} }
vector<string> Network::graph(const string& title) vector<string> Network::graph(const string& title) const
{ {
auto output = vector<string>(); auto output = vector<string>();
auto prefix = "digraph BayesNet {\nlabel=<BayesNet "; auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
@@ -289,7 +377,7 @@ namespace bayesnet {
output.push_back("}\n"); output.push_back("}\n");
return output; return output;
} }
vector<pair<string, string>> Network::getEdges() vector<pair<string, string>> Network::getEdges() const
{ {
auto edges = vector<pair<string, string>>(); auto edges = vector<pair<string, string>>();
for (const auto& node : nodes) { for (const auto& node : nodes) {
@@ -301,4 +389,53 @@ namespace bayesnet {
} }
return edges; return edges;
} }
int Network::getNumEdges() const
{
return getEdges().size();
}
vector<string> Network::topological_sort()
{
/* Check if al the fathers of every node are before the node */
auto result = features;
result.erase(remove(result.begin(), result.end(), className), result.end());
bool ending{ false };
int idx = 0;
while (!ending) {
ending = true;
for (auto feature : features) {
auto fathers = nodes[feature]->getParents();
for (const auto& father : fathers) {
auto fatherName = father->getName();
if (fatherName == className) {
continue;
}
// Check if father is placed before the actual feature
auto it = find(result.begin(), result.end(), fatherName);
if (it != result.end()) {
auto it2 = find(result.begin(), result.end(), feature);
if (it2 != result.end()) {
if (distance(it, it2) < 0) {
// if it is not, insert it before the feature
result.erase(remove(result.begin(), result.end(), fatherName), result.end());
result.insert(it2, fatherName);
ending = false;
}
} else {
throw logic_error("Error in topological sort because of node " + feature + " is not in result");
}
} else {
throw logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
}
}
}
}
return result;
}
void Network::dump_cpt() const
{
for (auto& node : nodes) {
cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl;
cout << node.second->getCPT() << endl;
}
}
} }

View File

@@ -8,47 +8,50 @@ namespace bayesnet {
class Network { class Network {
private: private:
map<string, unique_ptr<Node>> nodes; map<string, unique_ptr<Node>> nodes;
map<string, vector<int>> dataset;
bool fitted; bool fitted;
float maxThreads; float maxThreads = 0.95;
int classNumStates; int classNumStates;
vector<string> features; vector<string> features; // Including classname
string className; string className;
int laplaceSmoothing; double laplaceSmoothing;
torch::Tensor samples; torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&); bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
vector<double> predict_sample(const vector<int>&); vector<double> predict_sample(const vector<int>&);
vector<double> predict_sample(const torch::Tensor&);
vector<double> exactInference(map<string, int>&); vector<double> exactInference(map<string, int>&);
double computeFactor(map<string, int>&); double computeFactor(map<string, int>&);
double mutual_info(torch::Tensor&, torch::Tensor&); void completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights);
double entropy(torch::Tensor&); void checkFitData(int n_features, int n_samples, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights);
double conditionalEntropy(torch::Tensor&, torch::Tensor&); void setStates(const map<string, vector<int>>&);
double mutualInformation(torch::Tensor&, torch::Tensor&);
void completeFit();
public: public:
Network(); Network();
explicit Network(float, int);
explicit Network(float); explicit Network(float);
explicit Network(Network&); explicit Network(Network&);
torch::Tensor& getSamples(); torch::Tensor& getSamples();
float getmaxThreads(); float getmaxThreads();
void addNode(const string&, int); void addNode(const string&);
void addEdge(const string&, const string&); void addEdge(const string&, const string&);
map<string, std::unique_ptr<Node>>& getNodes(); map<string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures(); vector<string> getFeatures() const;
int getStates(); int getStates() const;
vector<pair<string, string>> getEdges(); vector<pair<string, string>> getEdges() const;
int getClassNumStates(); int getNumEdges() const;
string getClassName(); int getClassNumStates() const;
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&); string getClassName() const;
void fit(torch::Tensor&, torch::Tensor&, const vector<string>&, const string&); void fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
vector<int> predict(const vector<vector<int>>&); void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
//Computes the conditional edge weight of variable index u and v conditioned on class_node void fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
torch::Tensor conditionalEdgeWeight(); vector<int> predict(const vector<vector<int>>&); // Return mx1 vector of predictions
vector<vector<double>> predict_proba(const vector<vector<int>>&); torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
vector<vector<double>> predict_proba(const vector<vector<int>>&); // Return mxn vector of probabilities
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const vector<vector<int>>&, const vector<int>&); double score(const vector<vector<int>>&, const vector<int>&);
vector<string> show(); vector<string> topological_sort();
vector<string> graph(const string& title); // Returns a vector of strings representing the graph in graphviz format vector<string> show() const;
vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format
void initialize();
void dump_cpt() const;
inline string version() { return "0.1.0"; } inline string version() { return "0.1.0"; }
}; };
} }

View File

@@ -2,8 +2,8 @@
namespace bayesnet { namespace bayesnet {
Node::Node(const std::string& name, int numStates) Node::Node(const std::string& name)
: name(name), numStates(numStates), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>()) : name(name), numStates(0), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
{ {
} }
void Node::clear() void Node::clear()
@@ -84,8 +84,9 @@ namespace bayesnet {
} }
return result; return result;
} }
void Node::computeCPT(map<string, vector<int>>& dataset, const int laplaceSmoothing) void Node::computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
{ {
dimensions.clear();
// Get dimensions of the CPT // Get dimensions of the CPT
dimensions.push_back(numStates); dimensions.push_back(numStates);
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); }); transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
@@ -93,12 +94,24 @@ namespace bayesnet {
// Create a tensor of zeros with the dimensions of the CPT // Create a tensor of zeros with the dimensions of the CPT
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing; cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
// Fill table with counts // Fill table with counts
for (int n_sample = 0; n_sample < dataset[name].size(); ++n_sample) { auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) {
throw logic_error("Feature " + name + " not found in dataset");
}
int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
torch::List<c10::optional<torch::Tensor>> coordinates; torch::List<c10::optional<torch::Tensor>> coordinates;
coordinates.push_back(torch::tensor(dataset[name][n_sample])); coordinates.push_back(dataset.index({ name_index, n_sample }));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&dataset, &n_sample](const auto& parent) { return torch::tensor(dataset[parent->getName()][n_sample]); }); for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName());
if (pos == features.end()) {
throw logic_error("Feature parent " + parent->getName() + " not found in dataset");
}
int parent_index = pos - features.begin();
coordinates.push_back(dataset.index({ parent_index, n_sample }));
}
// Increment the count of the corresponding coordinate // Increment the count of the corresponding coordinate
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + 1); cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>());
} }
// Normalize the counts // Normalize the counts
cpTable = cpTable / cpTable.sum(0); cpTable = cpTable / cpTable.sum(0);

View File

@@ -16,7 +16,7 @@ namespace bayesnet {
vector<int64_t> dimensions; // dimensions of the cpTable vector<int64_t> dimensions; // dimensions of the cpTable
public: public:
vector<pair<string, string>> combinations(const vector<string>&); vector<pair<string, string>> combinations(const vector<string>&);
Node(const string&, int); explicit Node(const string&);
void clear(); void clear();
void addParent(Node*); void addParent(Node*);
void addChild(Node*); void addChild(Node*);
@@ -26,7 +26,7 @@ namespace bayesnet {
vector<Node*>& getParents(); vector<Node*>& getParents();
vector<Node*>& getChildren(); vector<Node*>& getChildren();
torch::Tensor& getCPT(); torch::Tensor& getCPT();
void computeCPT(map<string, vector<int>>&, const int); void computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
int getNumStates() const; int getNumStates() const;
void setNumStates(int); void setNumStates(int);
unsigned minFill(); unsigned minFill();

110
src/BayesNet/Proposal.cc Normal file
View File

@@ -0,0 +1,110 @@
#include "Proposal.h"
#include "ArffFiles.h"
namespace bayesnet {
Proposal::Proposal(torch::Tensor& dataset_, vector<string>& features_, string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
Proposal::~Proposal()
{
for (auto& [key, value] : discretizers) {
delete value;
}
}
map<string, vector<int>> Proposal::localDiscretizationProposal(const map<string, vector<int>>& oldStates, Network& model)
{
// order of local discretization is important. no good 0, 1, 2...
// although we rediscretize features after the local discretization of every feature
auto order = model.topological_sort();
auto& nodes = model.getNodes();
map<string, vector<int>> states = oldStates;
vector<int> indicesToReDiscretize;
bool upgrade = false; // Flag to check if we need to upgrade the model
for (auto feature : order) {
auto nodeParents = nodes[feature]->getParents();
if (nodeParents.size() < 2) continue; // Only has class as parent
upgrade = true;
int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin();
indicesToReDiscretize.push_back(index); // We need to re-discretize this feature
vector<string> parents;
transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); });
// Remove class as parent as it will be added later
parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end());
// Get the indices of the parents
vector<int> indices;
indices.push_back(-1); // Add class index
transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); });
// Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y)
vector<string> yJoinParents(Xf.size(1));
for (auto idx : indices) {
for (int i = 0; i < Xf.size(1); ++i) {
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
}
}
auto arff = ArffFiles();
auto yxv = arff.factorize(yJoinParents);
auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
auto xvf = vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
discretizers[feature]->fit(xvf, yxv);
//
//
//
// auto tmp = discretizers[feature]->transform(xvf);
// Xv[index] = tmp;
// auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
// iota(xStates.begin(), xStates.end(), 0);
// //Update new states of the feature/node
// states[feature] = xStates;
}
if (upgrade) {
// Discretize again X (only the affected indices) with the new fitted discretizers
for (auto index : indicesToReDiscretize) {
auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt)));
auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
//Update new states of the feature/node
states[pFeatures[index]] = xStates;
}
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
model.fit(pDataset, weights, pFeatures, pClassName, states);
}
return states;
}
map<string, vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
{
// Discretize the continuous input data and build pDataset (Classifier::dataset)
int m = Xf.size(1);
int n = Xf.size(0);
map<string, vector<int>> states;
pDataset = torch::zeros({ n + 1, m }, kInt32);
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// discretize input data by feature(row)
for (auto i = 0; i < pFeatures.size(); ++i) {
auto* discretizer = new mdlp::CPPFImdlp();
auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
discretizer->fit(Xt, yv);
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
auto xStates = vector<int>(discretizer->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
states[pFeatures[i]] = xStates;
discretizers[pFeatures[i]] = discretizer;
}
int n_classes = torch::max(y).item<int>() + 1;
auto yStates = vector<int>(n_classes);
iota(yStates.begin(), yStates.end(), 0);
states[pClassName] = yStates;
pDataset.index_put_({ n, "..." }, y);
return states;
}
torch::Tensor Proposal::prepareX(torch::Tensor& X)
{
auto Xtd = torch::zeros_like(X, torch::kInt32);
for (int i = 0; i < X.size(0); ++i) {
auto Xt = vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xd = discretizers[pFeatures[i]]->transform(Xt);
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
}
return Xtd;
}
}

29
src/BayesNet/Proposal.h Normal file
View File

@@ -0,0 +1,29 @@
#ifndef PROPOSAL_H
#define PROPOSAL_H
#include <string>
#include <map>
#include <torch/torch.h>
#include "Network.h"
#include "CPPFImdlp.h"
#include "Classifier.h"
namespace bayesnet {
class Proposal {
public:
Proposal(torch::Tensor& pDataset, vector<string>& features_, string& className_);
virtual ~Proposal();
protected:
torch::Tensor prepareX(torch::Tensor& X);
map<string, vector<int>> localDiscretizationProposal(const map<string, vector<int>>& states, Network& model);
map<string, vector<int>> fit_local_discretization(const torch::Tensor& y);
torch::Tensor Xf; // X continuous nxm tensor
torch::Tensor y; // y discrete nx1 tensor
map<string, mdlp::CPPFImdlp*> discretizers;
private:
torch::Tensor& pDataset; // (n+1)xm tensor
vector<string>& pFeatures;
string& pClassName;
};
}
#endif

View File

@@ -4,7 +4,7 @@ namespace bayesnet {
SPODE::SPODE(int root) : Classifier(Network()), root(root) {} SPODE::SPODE(int root) : Classifier(Network()), root(root) {}
void SPODE::train() void SPODE::buildModel(const torch::Tensor& weights)
{ {
// 0. Add all nodes to the model // 0. Add all nodes to the model
addNodes(); addNodes();
@@ -17,7 +17,7 @@ namespace bayesnet {
} }
} }
} }
vector<string> SPODE::graph(string name ) vector<string> SPODE::graph(const string& name) const
{ {
return model.graph(name); return model.graph(name);
} }

View File

@@ -7,11 +7,12 @@ namespace bayesnet {
private: private:
int root; int root;
protected: protected:
void train() override; void buildModel(const torch::Tensor& weights) override;
public: public:
explicit SPODE(int root); explicit SPODE(int root);
virtual ~SPODE() {}; virtual ~SPODE() {};
vector<string> graph(string name = "SPODE") override; vector<string> graph(const string& name = "SPODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
}; };
} }
#endif #endif

46
src/BayesNet/SPODELd.cc Normal file
View File

@@ -0,0 +1,46 @@
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
SPODELd& SPODELd::fit(torch::Tensor& dataset, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
y = dataset.index({ -1, "..." }).clone();
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_;
className = className_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor SPODELd::predict(Tensor& X)
{
auto Xt = prepareX(X);
return SPODE::predict(Xt);
}
vector<string> SPODELd::graph(const string& name) const
{
return SPODE::graph(name);
}
}

20
src/BayesNet/SPODELd.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef SPODELD_H
#define SPODELD_H
#include "SPODE.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class SPODELd : public SPODE, public Proposal {
public:
explicit SPODELd(int root);
virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
SPODELd& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") const override;
Tensor predict(Tensor& X) override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
static inline string version() { return "0.0.1"; };
};
}
#endif // !SPODELD_H

View File

@@ -5,25 +5,25 @@ namespace bayesnet {
TAN::TAN() : Classifier(Network()) {} TAN::TAN() : Classifier(Network()) {}
void TAN::train() void TAN::buildModel(const torch::Tensor& weights)
{ {
// 0. Add all nodes to the model // 0. Add all nodes to the model
addNodes(); addNodes();
// 1. Compute mutual information between each feature and the class and set the root node // 1. Compute mutual information between each feature and the class and set the root node
// as the highest mutual information with the class // as the highest mutual information with the class
auto mi = vector <pair<int, float >>(); auto mi = vector <pair<int, float >>();
Tensor class_dataset = dataset.index({ "...", -1 }); Tensor class_dataset = dataset.index({ -1, "..." });
for (int i = 0; i < static_cast<int>(features.size()); ++i) { for (int i = 0; i < static_cast<int>(features.size()); ++i) {
Tensor feature_dataset = dataset.index({ "...", i }); Tensor feature_dataset = dataset.index({ i, "..." });
auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset); auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights);
mi.push_back({ i, mi_value }); mi.push_back({ i, mi_value });
} }
sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;}); sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;});
auto root = mi[mi.size() - 1].first; auto root = mi[mi.size() - 1].first;
// 2. Compute mutual information between each feature and the class // 2. Compute mutual information between each feature and the class
auto weights = metrics.conditionalEdge(); auto weights_matrix = metrics.conditionalEdge(weights);
// 3. Compute the maximum spanning tree // 3. Compute the maximum spanning tree
auto mst = metrics.maximumSpanningTree(features, weights, root); auto mst = metrics.maximumSpanningTree(features, weights_matrix, root);
// 4. Add edges from the maximum spanning tree to the model // 4. Add edges from the maximum spanning tree to the model
for (auto i = 0; i < mst.size(); ++i) { for (auto i = 0; i < mst.size(); ++i) {
auto [from, to] = mst[i]; auto [from, to] = mst[i];
@@ -34,7 +34,7 @@ namespace bayesnet {
model.addEdge(className, feature); model.addEdge(className, feature);
} }
} }
vector<string> TAN::graph(string title) vector<string> TAN::graph(const string& title) const
{ {
return model.graph(title); return model.graph(title);
} }

View File

@@ -3,15 +3,15 @@
#include "Classifier.h" #include "Classifier.h"
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
using namespace torch;
class TAN : public Classifier { class TAN : public Classifier {
private: private:
protected: protected:
void train() override; void buildModel(const torch::Tensor& weights) override;
public: public:
TAN(); TAN();
virtual ~TAN() {}; virtual ~TAN() {};
vector<string> graph(string name = "TAN") override; vector<string> graph(const string& name = "TAN") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
}; };
} }
#endif #endif

31
src/BayesNet/TANLd.cc Normal file
View File

@@ -0,0 +1,31 @@
#include "TANLd.h"
namespace bayesnet {
using namespace std;
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
TAN::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor TANLd::predict(Tensor& X)
{
auto Xt = prepareX(X);
return TAN::predict(Xt);
}
vector<string> TANLd::graph(const string& name) const
{
return TAN::graph(name);
}
}

20
src/BayesNet/TANLd.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef TANLD_H
#define TANLD_H
#include "TAN.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class TANLd : public TAN, public Proposal {
private:
public:
TANLd();
virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
void setHyperparameters(nlohmann::json& hyperparameters) override {};
};
}
#endif // !TANLD_H

View File

@@ -3,7 +3,8 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
using namespace torch; using namespace torch;
vector<int> argsort(vector<float>& nums) // Return the indices in descending order
vector<int> argsort(vector<double>& nums)
{ {
int n = nums.size(); int n = nums.size();
vector<int> indices(n); vector<int> indices(n);

View File

@@ -5,7 +5,7 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
using namespace torch; using namespace torch;
vector<int> argsort(vector<float>& nums); vector<int> argsort(vector<double>& nums);
vector<vector<int>> tensorToVector(Tensor& tensor); vector<vector<int>> tensorToVector(Tensor& tensor);
} }
#endif //BAYESNET_UTILS_H #endif //BAYESNET_UTILS_H

10
src/Platform/BestResult.h Normal file
View File

@@ -0,0 +1,10 @@
#ifndef BESTRESULT_H
#define BESTRESULT_H
#include <string>
class BestResult {
public:
static std::string title() { return "STree_default (linear-ovo)"; }
static double score() { return 22.109799; }
static std::string scoreName() { return "accuracy"; }
};
#endif

View File

@@ -4,5 +4,9 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc)
add_executable(list list.cc platformUtils Datasets.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(manage "${TORCH_LIBRARIES}" OpenXLSX::OpenXLSX)
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")

14
src/Platform/Colors.h Normal file
View File

@@ -0,0 +1,14 @@
#ifndef COLORS_H
#define COLORS_H
class Colors {
public:
static std::string MAGENTA() { return "\033[1;35m"; }
static std::string BLUE() { return "\033[1;34m"; }
static std::string CYAN() { return "\033[1;36m"; }
static std::string GREEN() { return "\033[1;32m"; }
static std::string YELLOW() { return "\033[1;33m"; }
static std::string RED() { return "\033[1;31m"; }
static std::string WHITE() { return "\033[1;37m"; }
static std::string RESET() { return "\033[0m"; }
};
#endif // COLORS_H

View File

@@ -24,75 +24,110 @@ namespace platform {
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; }); transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result; return result;
} }
vector<string> Datasets::getFeatures(string name) vector<string> Datasets::getFeatures(const string& name) const
{ {
if (datasets[name]->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets[name]->getFeatures(); return datasets.at(name)->getFeatures();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
map<string, vector<int>> Datasets::getStates(string name) map<string, vector<int>> Datasets::getStates(const string& name) const
{ {
if (datasets[name]->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets[name]->getStates(); return datasets.at(name)->getStates();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
string Datasets::getClassName(string name) void Datasets::loadDataset(const string& name) const
{ {
if (datasets[name]->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets[name]->getClassName(); return;
} else {
datasets.at(name)->load();
}
}
string Datasets::getClassName(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getClassName();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
int Datasets::getNSamples(string name) int Datasets::getNSamples(const string& name) const
{ {
if (datasets[name]->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets[name]->getNSamples(); return datasets.at(name)->getNSamples();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(string name) int Datasets::getNClasses(const string& name)
{
if (datasets.at(name)->isLoaded()) {
auto className = datasets.at(name)->getClassName();
if (discretize) {
auto states = getStates(name);
return states.at(className).size();
}
auto [Xv, yv] = getVectors(name);
return *max_element(yv.begin(), yv.end()) + 1;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
vector<int> Datasets::getClassesCounts(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
auto [Xv, yv] = datasets.at(name)->getVectors();
vector<int> counts(*max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) {
counts[y]++;
}
return counts;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(const string& name)
{ {
if (!datasets[name]->isLoaded()) { if (!datasets[name]->isLoaded()) {
datasets[name]->load(); datasets[name]->load();
} }
return datasets[name]->getVectors(); return datasets[name]->getVectors();
} }
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(string name) pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(const string& name)
{ {
if (!datasets[name]->isLoaded()) { if (!datasets[name]->isLoaded()) {
datasets[name]->load(); datasets[name]->load();
} }
return datasets[name]->getVectorsDiscretized(); return datasets[name]->getVectorsDiscretized();
} }
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(string name) pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const string& name)
{ {
if (!datasets[name]->isLoaded()) { if (!datasets[name]->isLoaded()) {
datasets[name]->load(); datasets[name]->load();
} }
return datasets[name]->getTensors(); return datasets[name]->getTensors();
} }
bool Datasets::isDataset(const string& name) bool Datasets::isDataset(const string& name) const
{ {
return datasets.find(name) != datasets.end(); return datasets.find(name) != datasets.end();
} }
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType) Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{ {
} }
string Dataset::getName() string Dataset::getName() const
{ {
return name; return name;
} }
string Dataset::getClassName() string Dataset::getClassName() const
{ {
return className; return className;
} }
vector<string> Dataset::getFeatures() vector<string> Dataset::getFeatures() const
{ {
if (loaded) { if (loaded) {
return features; return features;
@@ -100,7 +135,7 @@ namespace platform {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
int Dataset::getNFeatures() int Dataset::getNFeatures() const
{ {
if (loaded) { if (loaded) {
return n_features; return n_features;
@@ -108,7 +143,7 @@ namespace platform {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
int Dataset::getNSamples() int Dataset::getNSamples() const
{ {
if (loaded) { if (loaded) {
return n_samples; return n_samples;
@@ -116,7 +151,7 @@ namespace platform {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
map<string, vector<int>> Dataset::getStates() map<string, vector<int>> Dataset::getStates() const
{ {
if (loaded) { if (loaded) {
return states; return states;
@@ -207,9 +242,9 @@ namespace platform {
if (discretize) { if (discretize) {
Xd = discretizeDataset(Xv, yv); Xd = discretizeDataset(Xv, yv);
computeStates(); computeStates();
n_samples = Xd[0].size();
n_features = Xd.size();
} }
n_samples = Xv[0].size();
n_features = Xv.size();
loaded = true; loaded = true;
} }
void Dataset::buildTensors() void Dataset::buildTensors()

View File

@@ -29,15 +29,15 @@ namespace platform {
public: public:
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&); explicit Dataset(const Dataset&);
string getName(); string getName() const;
string getClassName(); string getClassName() const;
vector<string> getFeatures(); vector<string> getFeatures() const;
map<string, vector<int>> getStates(); map<string, vector<int>> getStates() const;
pair<vector<vector<float>>&, vector<int>&> getVectors(); pair<vector<vector<float>>&, vector<int>&> getVectors();
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(); pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
pair<torch::Tensor&, torch::Tensor&> getTensors(); pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures(); int getNFeatures() const;
int getNSamples(); int getNSamples() const;
void load(); void load();
const bool inline isLoaded() const { return loaded; }; const bool inline isLoaded() const { return loaded; };
}; };
@@ -51,14 +51,17 @@ namespace platform {
public: public:
explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); }; explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
vector<string> getNames(); vector<string> getNames();
vector<string> getFeatures(string name); vector<string> getFeatures(const string& name) const;
int getNSamples(string name); int getNSamples(const string& name) const;
string getClassName(string name); string getClassName(const string& name) const;
map<string, vector<int>> getStates(string name); int getNClasses(const string& name);
pair<vector<vector<float>>&, vector<int>&> getVectors(string name); vector<int> getClassesCounts(const string& name) const;
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(string name); map<string, vector<int>> getStates(const string& name) const;
pair<torch::Tensor&, torch::Tensor&> getTensors(string name); pair<vector<vector<float>>&, vector<int>&> getVectors(const string& name);
bool isDataset(const string& name); pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(const string& name);
pair<torch::Tensor&, torch::Tensor&> getTensors(const string& name);
bool isDataset(const string& name) const;
void loadDataset(const string& name) const;
}; };
}; };

View File

@@ -1,6 +1,7 @@
#include "Experiment.h" #include "Experiment.h"
#include "Datasets.h" #include "Datasets.h"
#include "Models.h" #include "Models.h"
#include "ReportConsole.h"
namespace platform { namespace platform {
using json = nlohmann::json; using json = nlohmann::json;
@@ -24,6 +25,7 @@ namespace platform {
oss << std::put_time(timeinfo, "%H:%M:%S"); oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str(); return oss.str();
} }
Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
string Experiment::get_file_name() string Experiment::get_file_name()
{ {
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json"; string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
@@ -86,6 +88,13 @@ namespace platform {
file.close(); file.close();
} }
void Experiment::report()
{
json data = build_json();
ReportConsole report(data);
report.show();
}
void Experiment::show() void Experiment::show()
{ {
json data = build_json(); json data = build_json();
@@ -104,7 +113,7 @@ namespace platform {
void Experiment::cross_validation(const string& path, const string& fileName) void Experiment::cross_validation(const string& path, const string& fileName)
{ {
auto datasets = platform::Datasets(path, true, platform::ARFF); auto datasets = platform::Datasets(path, discretized, platform::ARFF);
// Get dataset // Get dataset
auto [X, y] = datasets.getTensors(fileName); auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName); auto states = datasets.getStates(fileName);
@@ -114,8 +123,10 @@ namespace platform {
cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush; cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
// Prepare Result // Prepare Result
auto result = Result(); auto result = Result();
auto [values, counts] = at::_unique(y);; auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0)); result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters);
// Initialize results vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size()); int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64); auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64); auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
@@ -136,6 +147,10 @@ namespace platform {
for (int nfold = 0; nfold < nfolds; nfold++) { for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model); auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion()); setModelVersion(clf->getVersion());
if (hyperparameters.size() != 0) {
clf->setHyperparameters(hyperparameters);
}
// Split train - test dataset
train_timer.start(); train_timer.start();
auto [train, test] = fold->getFold(nfold); auto [train, test] = fold->getFold(nfold);
auto train_t = torch::tensor(train); auto train_t = torch::tensor(train);
@@ -145,12 +160,14 @@ namespace platform {
auto X_test = X.index({ "...", test_t }); auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t }); auto y_test = y.index({ test_t });
cout << nfold + 1 << ", " << flush; cout << nfold + 1 << ", " << flush;
// Train model
clf->fit(X_train, y_train, features, className, states); clf->fit(X_train, y_train, features, className, states);
nodes[item] = clf->getNumberOfNodes(); nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges(); edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates(); num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration(); train_time[item] = train_timer.getDuration();
auto accuracy_train_value = clf->score(X_train, y_train); auto accuracy_train_value = clf->score(X_train, y_train);
// Test model
test_timer.start(); test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test); auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration(); test_time[item] = test_timer.getDuration();
@@ -164,11 +181,11 @@ namespace platform {
item++; item++;
} }
cout << "end. " << flush; cout << "end. " << flush;
delete fold;
} }
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>()); result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>()); result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>()); result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>()); result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
result.setDataset(fileName); result.setDataset(fileName);
addResult(result); addResult(result);

View File

@@ -29,7 +29,8 @@ namespace platform {
}; };
class Result { class Result {
private: private:
string dataset, hyperparameters, model_version; string dataset, model_version;
json hyperparameters;
int samples{ 0 }, features{ 0 }, classes{ 0 }; int samples{ 0 }, features{ 0 }, classes{ 0 };
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 }; double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
float nodes{ 0 }, leaves{ 0 }, depth{ 0 }; float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
@@ -37,7 +38,7 @@ namespace platform {
public: public:
Result() = default; Result() = default;
Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; } Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(const string& hyperparameters) { this->hyperparameters = hyperparameters; return *this; } Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; } Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; } Result& setFeatures(int features) { this->features = features; return *this; }
Result& setClasses(int classes) { this->classes = classes; return *this; } Result& setClasses(int classes) { this->classes = classes; return *this; }
@@ -59,7 +60,7 @@ namespace platform {
const float get_score_train() const { return score_train; } const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; } float get_score_test() { return score_test; }
const string& getDataset() const { return dataset; } const string& getDataset() const { return dataset; }
const string& getHyperparameters() const { return hyperparameters; } const json& getHyperparameters() const { return hyperparameters; }
const int getSamples() const { return samples; } const int getSamples() const { return samples; }
const int getFeatures() const { return features; } const int getFeatures() const { return features; }
const int getClasses() const { return classes; } const int getClasses() const { return classes; }
@@ -85,11 +86,12 @@ namespace platform {
bool discretized{ false }, stratified{ false }; bool discretized{ false }, stratified{ false };
vector<Result> results; vector<Result> results;
vector<int> randomSeeds; vector<int> randomSeeds;
json hyperparameters = "{}";
int nfolds{ 0 }; int nfolds{ 0 };
float duration{ 0 }; float duration{ 0 };
json build_json(); json build_json();
public: public:
Experiment() = default; Experiment();
Experiment& setTitle(const string& title) { this->title = title; return *this; } Experiment& setTitle(const string& title) { this->title = title; return *this; }
Experiment& setModel(const string& model) { this->model = model; return *this; } Experiment& setModel(const string& model) { this->model = model; return *this; }
Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; } Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; }
@@ -103,11 +105,13 @@ namespace platform {
Experiment& addResult(Result result) { results.push_back(result); return *this; } Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; } Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; } Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
string get_file_name(); string get_file_name();
void save(const string& path); void save(const string& path);
void cross_validation(const string& path, const string& fileName); void cross_validation(const string& path, const string& fileName);
void go(vector<string> filesToProcess, const string& path); void go(vector<string> filesToProcess, const string& path);
void show(); void show();
void report();
}; };
} }
#endif #endif

View File

@@ -6,6 +6,11 @@
#include "TAN.h" #include "TAN.h"
#include "KDB.h" #include "KDB.h"
#include "SPODE.h" #include "SPODE.h"
#include "TANLd.h"
#include "KDBLd.h"
#include "SPODELd.h"
#include "AODELd.h"
#include "BoostAODE.h"
namespace platform { namespace platform {
class Models { class Models {
private: private:

12
src/Platform/Paths.h Normal file
View File

@@ -0,0 +1,12 @@
#ifndef PATHS_H
#define PATHS_H
#include <string>
namespace platform {
class Paths {
public:
static std::string datasets() { return "datasets/"; }
static std::string results() { return "results/"; }
static std::string excel() { return "excel/"; }
};
}
#endif

View File

@@ -0,0 +1,37 @@
#include <sstream>
#include <locale>
#include "ReportBase.h"
#include "BestResult.h"
namespace platform {
string ReportBase::fromVector(const string& key)
{
stringstream oss;
string sep = "";
oss << "[";
for (auto& item : data[key]) {
oss << sep << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
string ReportBase::fVector(const string& title, const json& data, const int width, const int precision)
{
stringstream oss;
string sep = "";
oss << title << "[";
for (const auto& item : data) {
oss << sep << fixed << setw(width) << setprecision(precision) << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
void ReportBase::show()
{
header();
body();
}
}

23
src/Platform/ReportBase.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef REPORTBASE_H
#define REPORTBASE_H
#include <string>
#include <iostream>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
using namespace std;
class ReportBase {
public:
explicit ReportBase(json data_) { data = data_; };
virtual ~ReportBase() = default;
void show();
protected:
json data;
string fromVector(const string& key);
string fVector(const string& title, const json& data, const int width, const int precision);
virtual void header() = 0;
virtual void body() = 0;
};
};
#endif

View File

@@ -0,0 +1,88 @@
#include <sstream>
#include <locale>
#include "ReportConsole.h"
#include "BestResult.h"
namespace platform {
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
string ReportConsole::headerLine(const string& text)
{
int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n;
return "* " + text + string(n, ' ') + "*\n";
}
void ReportConsole::header()
{
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
stringstream oss;
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
cout << headerLine("Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>());
cout << headerLine(data["title"].get<string>());
cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
oss << "Execution took " << setprecision(2) << fixed << data["duration"].get<float>() << " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<string>();
cout << headerLine(oss.str());
cout << headerLine("Score is " + data["score_name"].get<string>());
cout << string(MAXL, '*') << endl;
cout << endl;
}
void ReportConsole::body()
{
cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl;
cout << "============================== ====== ===== === ========= ========= ========= =============== ================== ===============" << endl;
json lastResult;
double totalScore = 0.0;
bool odd = true;
for (const auto& r : data["results"]) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color << setw(30) << left << r["dataset"].get<string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>() << " ";
cout << setw(11) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " ";
try {
cout << r["hyperparameters"].get<string>();
}
catch (const exception& err) {
cout << r["hyperparameters"];
}
cout << endl;
lastResult = r;
totalScore += r["score"].get<double>();
odd = !odd;
}
if (data["results"].size() == 1) {
cout << string(MAXL, '*') << endl;
cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
cout << string(MAXL, '*') << endl;
} else {
footer(totalScore);
}
}
void ReportConsole::footer(double totalScore)
{
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
auto score = data["score_name"].get<string>();
if (score == BestResult::scoreName()) {
stringstream oss;
oss << score << " compared to " << BestResult::title() << " .: " << totalScore / BestResult::score();
cout << headerLine(oss.str());
}
cout << string(MAXL, '*') << endl << Colors::RESET();
}
}

View File

@@ -0,0 +1,22 @@
#ifndef REPORTCONSOLE_H
#define REPORTCONSOLE_H
#include <string>
#include <iostream>
#include "ReportBase.h"
#include "Colors.h"
namespace platform {
using namespace std;
const int MAXL = 128;
class ReportConsole : public ReportBase{
public:
explicit ReportConsole(json data_) : ReportBase(data_) {};
virtual ~ReportConsole() = default;
private:
string headerLine(const string& text);
void header() override;
void body() override;
void footer(double totalScore);
};
};
#endif

109
src/Platform/ReportExcel.cc Normal file
View File

@@ -0,0 +1,109 @@
#include <sstream>
#include <locale>
#include "ReportExcel.h"
#include "BestResult.h"
namespace platform {
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
void ReportExcel::createFile()
{
doc.create(Paths::excel() + "some_results.xlsx");
wks = doc.workbook().worksheet("Sheet1");
wks.setName(data["model"].get<string>());
}
void ReportExcel::closeFile()
{
doc.save();
doc.close();
}
void ReportExcel::header()
{
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
stringstream oss;
wks.cell("A1").value().set(
"Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " +
to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>());
wks.cell("A2").value() = data["title"].get<string>();
wks.cell("A3").value() = "Random seeds: " + fromVector("seeds") + " Stratified: " +
(data["stratified"].get<bool>() ? "True" : "False");
oss << "Execution took " << setprecision(2) << fixed << data["duration"].get<float>() << " seconds, "
<< data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<string>();
wks.cell("A4").value() = oss.str();
wks.cell("A5").value() = "Score is " + data["score_name"].get<string>();
}
void ReportExcel::body()
{
auto header = vector<string>(
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "Time",
"Time Std.", "Hyperparameters" });
int col = 1;
for (const auto& item : header) {
wks.cell(8, col++).value() = item;
}
int row = 9;
col = 1;
json lastResult;
double totalScore = 0.0;
string hyperparameters;
for (const auto& r : data["results"]) {
wks.cell(row, col).value() = r["dataset"].get<string>();
wks.cell(row, col + 1).value() = r["samples"].get<int>();
wks.cell(row, col + 2).value() = r["features"].get<int>();
wks.cell(row, col + 3).value() = r["classes"].get<int>();
wks.cell(row, col + 4).value() = r["nodes"].get<float>();
wks.cell(row, col + 5).value() = r["leaves"].get<float>();
wks.cell(row, col + 6).value() = r["depth"].get<float>();
wks.cell(row, col + 7).value() = r["score"].get<double>();
wks.cell(row, col + 8).value() = r["score_std"].get<double>();
wks.cell(row, col + 9).value() = r["time"].get<double>();
wks.cell(row, col + 10).value() = r["time_std"].get<double>();
try {
hyperparameters = r["hyperparameters"].get<string>();
}
catch (const exception& err) {
stringstream oss;
oss << r["hyperparameters"];
hyperparameters = oss.str();
}
wks.cell(row, col + 11).value() = hyperparameters;
lastResult = r;
totalScore += r["score"].get<double>();
row++;
}
if (data["results"].size() == 1) {
for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
row++;
col = 1;
wks.cell(row, col).value() = group;
for (double item : lastResult[group]) {
wks.cell(row, ++col).value() = item;
}
}
} else {
footer(totalScore, row);
}
}
void ReportExcel::footer(double totalScore, int row)
{
auto score = data["score_name"].get<string>();
if (score == BestResult::scoreName()) {
wks.cell(row + 2, 1).value() = score + " compared to " + BestResult::title() + " .: ";
wks.cell(row + 2, 5).value() = totalScore / BestResult::score();
}
}
}

View File

@@ -0,0 +1,25 @@
#ifndef REPORTEXCEL_H
#define REPORTEXCEL_H
#include <OpenXLSX.hpp>
#include "ReportBase.h"
#include "Paths.h"
#include "Colors.h"
namespace platform {
using namespace std;
using namespace OpenXLSX;
const int MAXLL = 128;
class ReportExcel : public ReportBase{
public:
explicit ReportExcel(json data_) : ReportBase(data_) {createFile();};
virtual ~ReportExcel() {closeFile();};
private:
void createFile();
void closeFile();
XLDocument doc;
XLWorksheet wks;
void header() override;
void body() override;
void footer(double totalScore, int row);
};
};
#endif // !REPORTEXCEL_H

254
src/Platform/Results.cc Normal file
View File

@@ -0,0 +1,254 @@
#include <filesystem>
#include "platformUtils.h"
#include "Results.h"
#include "ReportConsole.h"
#include "ReportExcel.h"
#include "BestResult.h"
#include "Colors.h"
namespace platform {
Result::Result(const string& path, const string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
if (scoreName == BestResult::scoreName()) {
score /= BestResult::score();
}
title = data["title"];
duration = data["duration"];
model = data["model"];
}
json Result::load() const
{
ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
void Results::load()
{
using std::filesystem::directory_iterator;
for (const auto& file : directory_iterator(path)) {
auto filename = file.path().filename().string();
if (filename.find(".json") != string::npos && filename.find("results_") == 0) {
auto result = Result(path, filename);
bool addResult = true;
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName())
addResult = false;
if (addResult)
files.push_back(result);
}
}
}
string Result::to_string() const
{
stringstream oss;
oss << date << " ";
oss << setw(12) << left << model << " ";
oss << setw(11) << left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
oss << setw(9) << setprecision(3) << fixed << duration << " ";
oss << setw(50) << left << title << " ";
return oss.str();
}
void Results::show() const
{
cout << Colors::GREEN() << "Results found: " << files.size() << endl;
cout << "-------------------" << endl;
auto i = 0;
cout << " # Date Model Score Name Score Duration Title" << endl;
cout << "=== ========== ============ =========== =========== ========= =============================================================" << endl;
bool odd = true;
for (const auto& result : files) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << result.to_string() << endl;
if (i == max && max != 0) {
break;
}
odd = !odd;
}
}
int Results::getIndex(const string& intent) const
{
string color;
if (intent == "delete") {
color = Colors::RED();
} else {
color = Colors::YELLOW();
}
cout << color << "Choose result to " << intent << " (cancel=-1): ";
string line;
getline(cin, line);
int index = stoi(line);
if (index >= -1 && index < static_cast<int>(files.size())) {
return index;
}
cout << "Invalid index" << endl;
return -1;
}
void Results::report(const int index, const bool excelReport) const
{
cout << Colors::YELLOW() << "Reporting " << files.at(index).getFilename() << endl;
auto data = files.at(index).load();
if (excelReport) {
ReportExcel report(data);
report.show();
} else {
ReportConsole report(data);
report.show();
}
}
void Results::menu()
{
char option;
int index;
bool finished = false;
string filename, line, options = "qldhsre";
while (!finished) {
cout << Colors::RESET() << "Choose option (quit='q', list='l', delete='d', hide='h', sort='s', report='r', excel='e'): ";
getline(cin, line);
if (line.size() == 0)
continue;
if (options.find(line[0]) != string::npos) {
if (line.size() > 1) {
cout << "Invalid option" << endl;
continue;
}
option = line[0];
} else {
if (all_of(line.begin(), line.end(), ::isdigit)) {
index = stoi(line);
if (index >= 0 && index < files.size()) {
report(index, false);
continue;
}
}
cout << "Invalid option" << endl;
continue;
}
switch (option) {
case 'q':
finished = true;
break;
case 'l':
show();
break;
case 'd':
index = getIndex("delete");
if (index == -1)
break;
filename = files[index].getFilename();
cout << "Deleting " << filename << endl;
remove((path + "/" + filename).c_str());
files.erase(files.begin() + index);
cout << "File: " + filename + " deleted!" << endl;
show();
break;
case 'h':
index = getIndex("hide");
if (index == -1)
break;
filename = files[index].getFilename();
cout << "Hiding " << filename << endl;
rename((path + "/" + filename).c_str(), (path + "/." + filename).c_str());
files.erase(files.begin() + index);
show();
menu();
break;
case 's':
sortList();
show();
break;
case 'r':
index = getIndex("report");
if (index == -1)
break;
report(index, false);
break;
case 'e':
index = getIndex("excel");
if (index == -1)
break;
report(index, true);
break;
default:
cout << "Invalid option" << endl;
}
}
}
void Results::sortList()
{
cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): ";
string line;
char option;
getline(cin, line);
if (line.size() == 0)
return;
if (line.size() > 1) {
cout << "Invalid option" << endl;
return;
}
option = line[0];
switch (option) {
case 'd':
sortDate();
break;
case 's':
sortScore();
break;
case 'u':
sortDuration();
break;
case 'm':
sortModel();
break;
default:
cout << "Invalid option" << endl;
}
}
void Results::sortDate()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDate() > b.getDate();
});
}
void Results::sortModel()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getModel() > b.getModel();
});
}
void Results::sortDuration()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDuration() > b.getDuration();
});
}
void Results::sortScore()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getScore() > b.getScore();
});
}
void Results::manage()
{
if (files.size() == 0) {
cout << "No results found!" << endl;
exit(0);
}
sortDate();
show();
menu();
cout << "Done!" << endl;
}
}

56
src/Platform/Results.h Normal file
View File

@@ -0,0 +1,56 @@
#ifndef RESULTS_H
#define RESULTS_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using namespace std;
using json = nlohmann::json;
class Result {
public:
Result(const string& path, const string& filename);
json load() const;
string to_string() const;
string getFilename() const { return filename; };
string getDate() const { return date; };
double getScore() const { return score; };
string getTitle() const { return title; };
double getDuration() const { return duration; };
string getModel() const { return model; };
string getScoreName() const { return scoreName; };
private:
string path;
string filename;
string date;
double score;
string title;
double duration;
string model;
string scoreName;
};
class Results {
public:
Results(const string& path, const int max, const string& model, const string& score) : path(path), max(max), model(model), scoreName(score) { load(); };
void manage();
private:
string path;
int max;
string model;
string scoreName;
vector<Result> files;
void load(); // Loads the list of results
void show() const;
void report(const int index, const bool excelReport) const;
int getIndex(const string& intent) const;
void menu();
void sortList();
void sortDate();
void sortScore();
void sortModel();
void sortDuration();
};
};
#endif

57
src/Platform/list.cc Normal file
View File

@@ -0,0 +1,57 @@
#include <iostream>
#include <locale>
#include "Paths.h"
#include "Colors.h"
#include "Datasets.h"
using namespace std;
const int BALANCE_LENGTH = 75;
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
void outputBalance(const string& balance)
{
auto temp = string(balance);
while (temp.size() > BALANCE_LENGTH - 1) {
auto part = temp.substr(0, BALANCE_LENGTH);
cout << part << endl;
cout << setw(48) << " ";
temp = temp.substr(BALANCE_LENGTH);
}
cout << temp << endl;
}
int main(int argc, char** argv)
{
auto data = platform::Datasets(platform::Paths().datasets(), false);
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << endl;
string balanceBars = string(BALANCE_LENGTH, '=');
cout << "============================== ====== ===== === " << balanceBars << endl;
bool odd = true;
for (const auto& dataset : data.getNames()) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color << setw(30) << left << dataset << " ";
data.loadDataset(dataset);
auto nSamples = data.getNSamples(dataset);
cout << setw(6) << right << nSamples << " ";
cout << setw(5) << right << data.getFeatures(dataset).size() << " ";
cout << setw(3) << right << data.getNClasses(dataset) << " ";
stringstream oss;
string sep = "";
for (auto number : data.getClassesCounts(dataset)) {
oss << sep << setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
sep = " / ";
}
outputBalance(oss.str());
odd = !odd;
}
cout << Colors::RESET() << endl;
return 0;
}

View File

@@ -1,25 +1,27 @@
#include <iostream> #include <iostream>
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "platformUtils.h" #include "platformUtils.h"
#include "Experiment.h" #include "Experiment.h"
#include "Datasets.h" #include "Datasets.h"
#include "DotEnv.h" #include "DotEnv.h"
#include "Models.h" #include "Models.h"
#include "modelRegister.h" #include "modelRegister.h"
#include "Paths.h"
using namespace std; using namespace std;
const string PATH_RESULTS = "results"; using json = nlohmann::json;
const string PATH_DATASETS = "datasets";
argparse::ArgumentParser manageArguments(int argc, char** argv) argparse::ArgumentParser manageArguments(int argc, char** argv)
{ {
auto env = platform::DotEnv(); auto env = platform::DotEnv();
argparse::ArgumentParser program("BayesNetSample"); argparse::ArgumentParser program("main");
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name"); program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
program.add_argument("-p", "--path") program.add_argument("-p", "--path")
.help("folder where the data files are located, default") .help("folder where the data files are located, default")
.default_value(string{ PATH_DATASETS } .default_value(string{ platform::Paths::datasets() });
);
program.add_argument("-m", "--model") program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString()) .help("Model to use " + platform::Models::instance()->toString())
.action([](const std::string& value) { .action([](const std::string& value) {
@@ -32,6 +34,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
); );
program.add_argument("--title").default_value("").help("Experiment title"); program.add_argument("--title").default_value("").help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const string& value) { program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const string& value) {
try { try {
@@ -60,6 +63,8 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
auto seeds = program.get<vector<int>>("seeds"); auto seeds = program.get<vector<int>>("seeds");
auto complete_file_name = path + file_name + ".arff"; auto complete_file_name = path + file_name + ".arff";
auto title = program.get<string>("title"); auto title = program.get<string>("title");
auto hyperparameters = program.get<string>("hyperparameters");
auto saveResults = program.get<bool>("save");
if (title == "" && file_name == "") { if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided"); throw runtime_error("title is mandatory if dataset is not provided");
} }
@@ -75,7 +80,6 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
auto program = manageArguments(argc, argv); auto program = manageArguments(argc, argv);
bool saveResults = false;
auto file_name = program.get<string>("dataset"); auto file_name = program.get<string>("dataset");
auto path = program.get<string>("path"); auto path = program.get<string>("path");
auto model_name = program.get<string>("model"); auto model_name = program.get<string>("model");
@@ -83,9 +87,11 @@ int main(int argc, char** argv)
auto stratified = program.get<bool>("stratified"); auto stratified = program.get<bool>("stratified");
auto n_folds = program.get<int>("folds"); auto n_folds = program.get<int>("folds");
auto seeds = program.get<vector<int>>("seeds"); auto seeds = program.get<vector<int>>("seeds");
auto hyperparameters =program.get<string>("hyperparameters");
vector<string> filesToTest; vector<string> filesToTest;
auto datasets = platform::Datasets(path, true, platform::ARFF); auto datasets = platform::Datasets(path, true, platform::ARFF);
auto title = program.get<string>("title"); auto title = program.get<string>("title");
auto saveResults = program.get<bool>("save");
if (file_name != "") { if (file_name != "") {
if (!datasets.isDataset(file_name)) { if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << endl; cerr << "Dataset " << file_name << " not found" << endl;
@@ -99,14 +105,15 @@ int main(int argc, char** argv)
filesToTest = platform::Datasets(path, true, platform::ARFF).getNames(); filesToTest = platform::Datasets(path, true, platform::ARFF).getNames();
saveResults = true; saveResults = true;
} }
/* /*
* Begin Processing * Begin Processing
*/ */
auto env = platform::DotEnv();
auto experiment = platform::Experiment(); auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0"); experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("BayesNet"); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy"); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
experiment.setHyperparameters(json::parse(hyperparameters));
for (auto seed : seeds) { for (auto seed : seeds) {
experiment.addRandomSeed(seed); experiment.addRandomSeed(seed);
} }
@@ -114,10 +121,10 @@ int main(int argc, char** argv)
timer.start(); timer.start();
experiment.go(filesToTest, path); experiment.go(filesToTest, path);
experiment.setDuration(timer.getDuration()); experiment.setDuration(timer.getDuration());
if (saveResults) if (saveResults) {
experiment.save(PATH_RESULTS); experiment.save(platform::Paths::results());
else }
experiment.show(); experiment.report();
cout << "Done!" << endl; cout << "Done!" << endl;
return 0; return 0;
} }

41
src/Platform/manage.cc Normal file
View File

@@ -0,0 +1,41 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "platformUtils.h"
#include "Paths.h"
#include "Results.h"
using namespace std;
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
argparse::ArgumentParser program("manage");
program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>();
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
try {
program.parse_args(argc, argv);
auto number = program.get<int>("number");
if (number < 0) {
throw runtime_error("Number of results must be greater than or equal to 0");
}
auto model = program.get<string>("model");
auto score = program.get<string>("score");
}
catch (const exception& err) {
cerr << err.what() << endl;
cerr << program;
exit(1);
}
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
auto number = program.get<int>("number");
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto results = platform::Results(platform::Paths::results(), number, model, score);
results.manage();
return 0;
}

View File

@@ -2,10 +2,20 @@
#define MODEL_REGISTER_H #define MODEL_REGISTER_H
static platform::Registrar registrarT("TAN", static platform::Registrar registrarT("TAN",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
static platform::Registrar registrarTLD("TANLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
static platform::Registrar registrarS("SPODE", static platform::Registrar registrarS("SPODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
static platform::Registrar registrarSLD("SPODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
static platform::Registrar registrarK("KDB", static platform::Registrar registrarK("KDB",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);});
static platform::Registrar registrarKLD("KDBLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
static platform::Registrar registrarA("AODE", static platform::Registrar registrarA("AODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
static platform::Registrar registrarBA("BoostAODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
#endif #endif

View File

@@ -1,4 +1,5 @@
#include "platformUtils.h" #include "platformUtils.h"
#include "Paths.h"
using namespace torch; using namespace torch;
@@ -85,7 +86,7 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name) tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name)
{ {
auto handler = ArffFiles(); auto handler = ArffFiles();
handler.load(PATH + static_cast<string>(name) + ".arff"); handler.load(platform::Paths::datasets() + static_cast<string>(name) + ".arff");
// Get Dataset X, y // Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX(); vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY(); mdlp::labels_t& y = handler.getY();

View File

@@ -9,29 +9,21 @@ TEST_CASE("Test Bayesian Network")
{ {
auto [Xd, y, features, className, states] = loadFile("iris"); auto [Xd, y, features, className, states] = loadFile("iris");
SECTION("Test Update Nodes")
{
auto net = bayesnet::Network();
net.addNode("A", 3);
REQUIRE(net.getStates() == 3);
net.addNode("A", 5);
REQUIRE(net.getStates() == 5);
}
SECTION("Test get features") SECTION("Test get features")
{ {
auto net = bayesnet::Network(); auto net = bayesnet::Network();
net.addNode("A", 3); net.addNode("A");
net.addNode("B", 5); net.addNode("B");
REQUIRE(net.getFeatures() == vector<string>{"A", "B"}); REQUIRE(net.getFeatures() == vector<string>{"A", "B"});
net.addNode("C", 2); net.addNode("C");
REQUIRE(net.getFeatures() == vector<string>{"A", "B", "C"}); REQUIRE(net.getFeatures() == vector<string>{"A", "B", "C"});
} }
SECTION("Test get edges") SECTION("Test get edges")
{ {
auto net = bayesnet::Network(); auto net = bayesnet::Network();
net.addNode("A", 3); net.addNode("A");
net.addNode("B", 5); net.addNode("B");
net.addNode("C", 2); net.addNode("C");
net.addEdge("A", "B"); net.addEdge("A", "B");
net.addEdge("B", "C"); net.addEdge("B", "C");
REQUIRE(net.getEdges() == vector<pair<string, string>>{ {"A", "B"}, { "B", "C" } }); REQUIRE(net.getEdges() == vector<pair<string, string>>{ {"A", "B"}, { "B", "C" } });