Compare commits

...

35 Commits

Author SHA1 Message Date
d8734ff082 Separate contextual menu from general 2023-09-20 13:15:33 +02:00
03533461c8 Add compare to best results in manage 2023-09-20 12:51:19 +02:00
68f22a673d Add comparison to report console 2023-09-20 11:40:01 +02:00
b9bc0088f3 Add format to unique dataset results summary 2023-09-20 10:30:45 +02:00
c280e254ca Remove OpenXLSX submodule 2023-09-20 01:09:58 +02:00
3d0f29fda3 Remove .vscode/settings.json from repository 2023-09-20 01:01:40 +02:00
20a6ebab7c Support to add any number of sheets to excel 2023-09-20 00:58:01 +02:00
925f71166c Fix mistake in comparison 2023-09-19 23:46:49 +02:00
f69f415b92 Complete comparison with ZeroR 2023-09-19 17:55:03 +02:00
1bdfbd1620 Complete adding color to format 2023-09-19 14:07:41 +02:00
06fb135526 First approach 2023-09-18 23:26:22 +02:00
501ea0ab4e Fix CMakeList manage build with Linux 2023-09-18 19:27:40 +02:00
847c6761d7 Add Linux specific link library to cmake 2023-09-17 10:42:19 +02:00
6030885fc3 Add partial result filter to manage 2023-09-16 17:27:18 +02:00
89df7f4db0 Add library to manage link 2023-09-14 01:41:49 +02:00
41257ed566 If ! convergence don't predict test 2023-09-10 19:50:36 +02:00
506369e46b Add Convergence hyperparameter 2023-09-07 11:27:35 +02:00
d908f389f5 Begin using validation as finish condition 2023-09-06 10:51:07 +02:00
5a7c8f1818 Add status to classifier and Experiment 2023-09-05 13:39:43 +02:00
64fc7bd9dd Add show dataset detail in report 2023-09-05 09:26:49 +02:00
0b7beda78c Add threads without limit to network fit 2023-09-04 21:24:11 +02:00
05b670dfc0 Add detail to fold progress in main 2023-09-03 16:33:48 +02:00
de62d42b74 Fix make debug command 2023-09-03 14:13:10 +02:00
edb957d22e Add filter complete results to manage 2023-09-03 14:07:11 +02:00
4de5cb4c6c Merge pull request 'Solve Ensemble models exceptions on certain datasets' (#7) from solveexceptions into main
Reviewed-on: #7
2023-09-02 15:29:33 +00:00
c35030f137 Upgrade models version and Add class diagram 2023-09-02 14:39:43 +02:00
182b07ed90 Solve voting vector error 2023-09-02 13:58:12 +02:00
7806f961e2 Remove threads 2023-08-31 20:30:28 +02:00
7c3e315ae7 Add Linux specific options to compile 2023-08-29 18:20:55 +02:00
284ef6dfd1 Add significanceModels to AODELd 2023-08-24 12:58:53 +02:00
1c6af619b5 Exception if hyperparameters not valid 2023-08-24 12:09:35 +02:00
86ffdfd6f3 Add const feature and className to fit models 2023-08-23 23:15:39 +02:00
d82148079d Add KDB hyperparameters K and theta 2023-08-23 00:44:10 +02:00
067430fd1b Add xlsxopen submodule 2023-08-22 23:45:11 +02:00
f5d0d16365 Merge pull request 'Add excel report to manage results' (#6) from xlsx into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/6
2023-08-22 21:40:11 +00:00
58 changed files with 1142 additions and 624 deletions

31
.clang-uml Normal file
View File

@@ -0,0 +1,31 @@
compilation_database_dir: build
output_directory: puml
diagrams:
BayesNet:
type: class
glob:
- src/BayesNet/*.cc
- src/Platform/*.cc
using_namespace: bayesnet
include:
namespaces:
- bayesnet
- platform
plantuml:
after:
- "note left of {{ alias(\"MyProjectMain\") }}: Main class of myproject library."
sequence:
type: sequence
glob:
- src/Platform/main.cc
combine_free_functions_into_file_participants: true
using_namespace:
- std
- bayesnet
- platform
include:
paths:
- src/BayesNet
- src/Platform
start_from:
- function: main(int,const char **)

2
.gitignore vendored
View File

@@ -35,3 +35,5 @@ build/
*.dSYM/** *.dSYM/**
cmake-build*/** cmake-build*/**
.idea .idea
puml/**
.vscode/settings.json

3
.gitmodules vendored
View File

@@ -10,3 +10,6 @@
[submodule "lib/json"] [submodule "lib/json"]
path = lib/json path = lib/json
url = https://github.com/nlohmann/json.git url = https://github.com/nlohmann/json.git
[submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git

10
.vscode/launch.json vendored
View File

@@ -10,7 +10,7 @@
"-d", "-d",
"iris", "iris",
"-m", "-m",
"KDB", "TANLd",
"-s", "-s",
"271", "271",
"-p", "-p",
@@ -28,12 +28,12 @@
"BoostAODE", "BoostAODE",
"-p", "-p",
"/Users/rmontanana/Code/discretizbench/datasets", "/Users/rmontanana/Code/discretizbench/datasets",
"--discretize",
"--stratified", "--stratified",
"-d", "-d",
"glass", "mfeat-morphological",
"--hyperparameters", "--discretize"
"{\"repeatSparent\": true, \"maxModels\": 12}" // "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
], ],
"cwd": "/Users/rmontanana/Code/discretizbench", "cwd": "/Users/rmontanana/Code/discretizbench",
}, },

109
.vscode/settings.json vendored
View File

@@ -1,109 +0,0 @@
{
"files.associations": {
"*.rmd": "markdown",
"*.py": "python",
"vector": "cpp",
"__bit_reference": "cpp",
"__bits": "cpp",
"__config": "cpp",
"__debug": "cpp",
"__errc": "cpp",
"__hash_table": "cpp",
"__locale": "cpp",
"__mutex_base": "cpp",
"__node_handle": "cpp",
"__nullptr": "cpp",
"__split_buffer": "cpp",
"__string": "cpp",
"__threading_support": "cpp",
"__tuple": "cpp",
"array": "cpp",
"atomic": "cpp",
"bitset": "cpp",
"cctype": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"compare": "cpp",
"complex": "cpp",
"concepts": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"exception": "cpp",
"initializer_list": "cpp",
"ios": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"locale": "cpp",
"memory": "cpp",
"mutex": "cpp",
"new": "cpp",
"optional": "cpp",
"ostream": "cpp",
"ratio": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"string": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"typeinfo": "cpp",
"unordered_map": "cpp",
"variant": "cpp",
"algorithm": "cpp",
"iostream": "cpp",
"iomanip": "cpp",
"numeric": "cpp",
"set": "cpp",
"__tree": "cpp",
"deque": "cpp",
"list": "cpp",
"map": "cpp",
"unordered_set": "cpp",
"any": "cpp",
"condition_variable": "cpp",
"forward_list": "cpp",
"fstream": "cpp",
"stack": "cpp",
"thread": "cpp",
"__memory": "cpp",
"filesystem": "cpp",
"*.toml": "toml",
"utility": "cpp",
"__verbose_abort": "cpp",
"bit": "cpp",
"random": "cpp",
"*.tcc": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory_resource": "cpp",
"format": "cpp",
"valarray": "cpp",
"regex": "cpp",
"span": "cpp",
"cfenv": "cpp",
"cinttypes": "cpp",
"csetjmp": "cpp",
"future": "cpp",
"queue": "cpp",
"typeindex": "cpp",
"shared_mutex": "cpp",
"*.ipp": "cpp",
"cassert": "cpp",
"charconv": "cpp",
"source_location": "cpp",
"ranges": "cpp"
},
"cmake.configureOnOpen": false,
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools"
}

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.20)
project(BayesNet project(BayesNet
VERSION 0.1.0 VERSION 0.2.0
DESCRIPTION "Bayesian Network and basic classifiers Library." DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX LANGUAGES CXX
@@ -30,7 +30,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF) option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF) option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF) option(CODE_COVERAGE "Collect coverage from test library" OFF)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# CMakes modules # CMakes modules
# -------------- # --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
@@ -40,8 +40,7 @@ if (CODE_COVERAGE)
enable_testing() enable_testing()
include(CodeCoverage) include(CodeCoverage)
MESSAGE("Code coverage enabled") MESSAGE("Code coverage enabled")
set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage") set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage") SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE) endif (CODE_COVERAGE)
@@ -55,7 +54,6 @@ endif (ENABLE_CLANG_TIDY)
add_git_submodule("lib/mdlp") add_git_submodule("lib/mdlp")
add_git_submodule("lib/argparse") add_git_submodule("lib/argparse")
add_git_submodule("lib/json") add_git_submodule("lib/json")
add_git_submodule("lib/openXLSX")
# Subdirectories # Subdirectories
# -------------- # --------------
@@ -74,8 +72,7 @@ file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform
if (ENABLE_TESTING) if (ENABLE_TESTING)
MESSAGE("Testing enabled") MESSAGE("Testing enabled")
add_git_submodule("lib/catch2") add_git_submodule("lib/catch2")
include(CTest) include(CTest)
add_subdirectory(tests) add_subdirectory(tests)
endif (ENABLE_TESTING) endif (ENABLE_TESTING)

View File

@@ -32,12 +32,15 @@ clean: ## Clean the debug info
find . -name "*.gcda" -print0 | xargs -0 rm find . -name "*.gcda" -print0 | xargs -0 rm
@echo ">>> Done"; @echo ">>> Done";
clang-uml: ## Create uml class and sequence diagrams
clang-uml -p --add-compile-flag -I /usr/lib/gcc/x86_64-redhat-linux/8/include/
debug: ## Build a debug version of the project debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet ..."; @echo ">>> Building Debug BayesNet ...";
@if [ -d ./build ]; then rm -rf ./build; fi @if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build; @mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \ cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
cmake --build build -j 32; cmake --build build -t main -t BayesNetSample -t manage -t list unit_tests -j 32;
@echo ">>> Done"; @echo ">>> Done";
release: ## Build a Release version of the project release: ## Build a Release version of the project

View File

@@ -2,4 +2,36 @@
Bayesian Network Classifier with libtorch from scratch Bayesian Network Classifier with libtorch from scratch
## 0. Setup
### libxlswriter
Before compiling BayesNet.
```bash
cd lib/libxlsxwriter
make
sudo make install
```
It has to be installed in /usr/local/lib otherwise CMakeLists.txt has to be modified accordingly
Environment variable has to be set:
```bash
export LD_LIBRARY_PATH=/usr/local/lib
```
### Release
```bash
make release
```
### Debug & Tests
```bash
make debug
```
## 1. Introduction ## 1. Introduction

View File

@@ -1,12 +0,0 @@
digraph BayesNet {
label=<BayesNet >
fontsize=30
fontcolor=blue
labelloc=t
layout=circo
class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ]
class -> sepallength class -> sepalwidth class -> petallength class -> petalwidth petallength [shape=circle]
petallength -> sepallength petalwidth [shape=circle]
sepallength [shape=circle]
sepallength -> sepalwidth sepalwidth [shape=circle]
sepalwidth -> petalwidth }

View File

@@ -1 +0,0 @@
null

BIN
diagrams/BayesNet.pdf Executable file

Binary file not shown.

1
lib/libxlsxwriter Submodule

Submodule lib/libxlsxwriter added at 44e72c5862

Submodule lib/openXLSX deleted from b80da42d14

View File

@@ -10,7 +10,7 @@
#include "Folding.h" #include "Folding.h"
#include "Models.h" #include "Models.h"
#include "modelRegister.h" #include "modelRegister.h"
#include <fstream>
using namespace std; using namespace std;
@@ -58,180 +58,226 @@ pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vect
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
map<string, bool> datasets = { torch::Tensor weights_ = torch::full({ 10 }, 1.0 / 10, torch::kFloat64);
{"diabetes", true}, torch::Tensor y_ = torch::tensor({ 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }, torch::kInt32);
{"ecoli", true}, torch::Tensor ypred = torch::tensor({ 1, 1, 1, 0, 0, 1, 1, 1, 1, 0 }, torch::kInt32);
{"glass", true}, cout << "Initial weights_: " << endl;
{"iris", true}, for (int i = 0; i < 10; i++) {
{"kdd_JapaneseVowels", false}, cout << weights_.index({ i }).item<double>() << ", ";
{"letter", true},
{"liver-disorders", true},
{"mfeat-factors", true},
};
auto valid_datasets = vector<string>();
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
[](const pair<string, bool>& pair) { return pair.first; });
argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-d", "--dataset")
.help("Dataset file name")
.action([valid_datasets](const std::string& value) {
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
return value;
}
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
}
);
program.add_argument("-p", "--path")
.help(" folder where the data files are located, default")
.default_value(string{ PATH }
);
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString())
.action([](const std::string& value) {
static const vector<string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
}
);
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw runtime_error(err.what());
}
catch (...) {
throw runtime_error("Number of folds must be an integer");
}});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors, dump_cpt;
string model_name, file_name, path, complete_file_name;
int nFolds, seed;
try {
program.parse_args(argc, argv);
file_name = program.get<string>("dataset");
path = program.get<string>("path");
model_name = program.get<string>("model");
complete_file_name = path + file_name + ".arff";
stratified = program.get<bool>("stratified");
tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds");
seed = program.get<int>("seed");
dump_cpt = program.get<bool>("dumpcpt");
class_last = datasets[file_name];
if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
}
} }
catch (const exception& err) { cout << "end." << endl;
cerr << err.what() << endl; cout << "y_: " << endl;
cerr << program; for (int i = 0; i < 10; i++) {
exit(1); cout << y_.index({ i }).item<int>() << ", ";
} }
cout << "end." << endl;
cout << "ypred: " << endl;
for (int i = 0; i < 10; i++) {
cout << ypred.index({ i }).item<int>() << ", ";
}
cout << "end." << endl;
auto mask_wrong = ypred != y_;
auto mask_right = ypred == y_;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double epsilon_t = masked_weights.sum().item<double>();
cout << "epsilon_t: " << epsilon_t << endl;
double wt = (1 - epsilon_t) / epsilon_t;
cout << "wt: " << wt << endl;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
cout << "alpha_t: " << alpha_t << endl;
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
cout << "exp(alpha_t): " << exp(alpha_t) << endl;
cout << "exp(-alpha_t): " << exp(-alpha_t) << endl;
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
cout << "totalWeights: " << totalWeights << endl;
cout << "Before normalization: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << endl;
}
weights_ = weights_ / totalWeights;
cout << "After normalization: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << endl;
}
// map<string, bool> datasets = {
// {"diabetes", true},
// {"ecoli", true},
// {"glass", true},
// {"iris", true},
// {"kdd_JapaneseVowels", false},
// {"letter", true},
// {"liver-disorders", true},
// {"mfeat-factors", true},
// };
// auto valid_datasets = vector<string>();
// transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
// [](const pair<string, bool>& pair) { return pair.first; });
// argparse::ArgumentParser program("BayesNetSample");
// program.add_argument("-d", "--dataset")
// .help("Dataset file name")
// .action([valid_datasets](const std::string& value) {
// if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
// return value;
// }
// throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
// }
// );
// program.add_argument("-p", "--path")
// .help(" folder where the data files are located, default")
// .default_value(string{ PATH }
// );
// program.add_argument("-m", "--model")
// .help("Model to use " + platform::Models::instance()->toString())
// .action([](const std::string& value) {
// static const vector<string> choices = platform::Models::instance()->getNames();
// if (find(choices.begin(), choices.end(), value) != choices.end()) {
// return value;
// }
// throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
// }
// );
// program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
// program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
// program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
// program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
// program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
// try {
// auto k = stoi(value);
// if (k < 2) {
// throw runtime_error("Number of folds must be greater than 1");
// }
// return k;
// }
// catch (const runtime_error& err) {
// throw runtime_error(err.what());
// }
// catch (...) {
// throw runtime_error("Number of folds must be an integer");
// }});
// program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
// bool class_last, stratified, tensors, dump_cpt;
// string model_name, file_name, path, complete_file_name;
// int nFolds, seed;
// try {
// program.parse_args(argc, argv);
// file_name = program.get<string>("dataset");
// path = program.get<string>("path");
// model_name = program.get<string>("model");
// complete_file_name = path + file_name + ".arff";
// stratified = program.get<bool>("stratified");
// tensors = program.get<bool>("tensors");
// nFolds = program.get<int>("folds");
// seed = program.get<int>("seed");
// dump_cpt = program.get<bool>("dumpcpt");
// class_last = datasets[file_name];
// if (!file_exists(complete_file_name)) {
// throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
// }
// }
// catch (const exception& err) {
// cerr << err.what() << endl;
// cerr << program;
// exit(1);
// }
/* /*
* Begin Processing * Begin Processing
*/ */
auto handler = ArffFiles(); // auto handler = ArffFiles();
handler.load(complete_file_name, class_last); // handler.load(complete_file_name, class_last);
// Get Dataset X, y // // Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX(); // vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY(); // mdlp::labels_t& y = handler.getY();
// Get className & Features // // Get className & Features
auto className = handler.getClassName(); // auto className = handler.getClassName();
vector<string> features; // vector<string> features;
auto attributes = handler.getAttributes(); // auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), // transform(attributes.begin(), attributes.end(), back_inserter(features),
[](const pair<string, string>& item) { return item.first; }); // [](const pair<string, string>& item) { return item.first; });
// Discretize Dataset // // Discretize Dataset
auto [Xd, maxes] = discretize(X, y, features); // auto [Xd, maxes] = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1; // maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<string, vector<int>> states; // map<string, vector<int>> states;
for (auto feature : features) { // for (auto feature : features) {
states[feature] = vector<int>(maxes[feature]); // states[feature] = vector<int>(maxes[feature]);
} // }
states[className] = vector<int>(maxes[className]); // states[className] = vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name); // auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states); // clf->fit(Xd, y, features, className, states);
if (dump_cpt) { // if (dump_cpt) {
cout << "--- CPT Tables ---" << endl; // cout << "--- CPT Tables ---" << endl;
clf->dump_cpt(); // clf->dump_cpt();
} // }
auto lines = clf->show(); // auto lines = clf->show();
for (auto line : lines) { // for (auto line : lines) {
cout << line << endl; // cout << line << endl;
} // }
cout << "--- Topological Order ---" << endl; // cout << "--- Topological Order ---" << endl;
auto order = clf->topological_order(); // auto order = clf->topological_order();
for (auto name : order) { // for (auto name : order) {
cout << name << ", "; // cout << name << ", ";
} // }
cout << "end." << endl; // cout << "end." << endl;
auto score = clf->score(Xd, y); // auto score = clf->score(Xd, y);
cout << "Score: " << score << endl; // cout << "Score: " << score << endl;
auto graph = clf->graph(); // auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name; // auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot"); // ofstream file(dot_file + ".dot");
file << graph; // file << graph;
file.close(); // file.close();
cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl; // cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl; // cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
string stratified_string = stratified ? " Stratified" : ""; // string stratified_string = stratified ? " Stratified" : "";
cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl; // cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
cout << "==========================================" << endl; // cout << "==========================================" << endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32); // torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32); // torch::Tensor yt = torch::tensor(y, torch::kInt32);
for (int i = 0; i < features.size(); ++i) { // for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32)); // Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
} // }
float total_score = 0, total_score_train = 0, score_train, score_test; // float total_score = 0, total_score_train = 0, score_train, score_test;
Fold* fold; // platform::Fold* fold;
if (stratified) // if (stratified)
fold = new StratifiedKFold(nFolds, y, seed); // fold = new platform::StratifiedKFold(nFolds, y, seed);
else // else
fold = new KFold(nFolds, y.size(), seed); // fold = new platform::KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) { // for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i); // auto [train, test] = fold->getFold(i);
cout << "Fold: " << i + 1 << endl; // cout << "Fold: " << i + 1 << endl;
if (tensors) { // if (tensors) {
auto ttrain = torch::tensor(train, torch::kInt64); // auto ttrain = torch::tensor(train, torch::kInt64);
auto ttest = torch::tensor(test, torch::kInt64); // auto ttest = torch::tensor(test, torch::kInt64);
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain); // torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
torch::Tensor ytraint = yt.index({ ttrain }); // torch::Tensor ytraint = yt.index({ ttrain });
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest); // torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest }); // torch::Tensor ytestt = yt.index({ ttest });
clf->fit(Xtraint, ytraint, features, className, states); // clf->fit(Xtraint, ytraint, features, className, states);
auto temp = clf->predict(Xtraint); // auto temp = clf->predict(Xtraint);
score_train = clf->score(Xtraint, ytraint); // score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt); // score_test = clf->score(Xtestt, ytestt);
} else { // } else {
auto [Xtrain, ytrain] = extract_indices(train, Xd, y); // auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
auto [Xtest, ytest] = extract_indices(test, Xd, y); // auto [Xtest, ytest] = extract_indices(test, Xd, y);
clf->fit(Xtrain, ytrain, features, className, states); // clf->fit(Xtrain, ytrain, features, className, states);
score_train = clf->score(Xtrain, ytrain); // score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest); // score_test = clf->score(Xtest, ytest);
} // }
if (dump_cpt) { // if (dump_cpt) {
cout << "--- CPT Tables ---" << endl; // cout << "--- CPT Tables ---" << endl;
clf->dump_cpt(); // clf->dump_cpt();
} // }
total_score_train += score_train; // total_score_train += score_train;
total_score += score_test; // total_score += score_test;
cout << "Score Train: " << score_train << endl; // cout << "Score Train: " << score_train << endl;
cout << "Score Test : " << score_test << endl; // cout << "Score Test : " << score_test << endl;
cout << "-------------------------------------------------------------------------------" << endl; // cout << "-------------------------------------------------------------------------------" << endl;
} // }
cout << "**********************************************************************************" << endl; // cout << "**********************************************************************************" << endl;
cout << "Average Score Train: " << total_score_train / nFolds << endl; // cout << "Average Score Train: " << total_score_train / nFolds << endl;
cout << "Average Score Test : " << total_score / nFolds << endl;return 0; // cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
} }

View File

@@ -10,7 +10,6 @@ namespace bayesnet {
AODE(); AODE();
virtual ~AODE() {}; virtual ~AODE() {};
vector<string> graph(const string& title = "AODE") const override; vector<string> graph(const string& title = "AODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
}; };
} }
#endif #endif

View File

@@ -4,9 +4,9 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {} AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float... checkInput(X_, y_);
features = features_; features = features_;
className = className_; className = className_;
Xf = X_; Xf = X_;
@@ -26,6 +26,7 @@ namespace bayesnet {
models.push_back(std::make_unique<SPODELd>(i)); models.push_back(std::make_unique<SPODELd>(i));
} }
n_models = models.size(); n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
} }
void AODELd::trainModel(const torch::Tensor& weights) void AODELd::trainModel(const torch::Tensor& weights)
{ {

View File

@@ -12,11 +12,10 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
public: public:
AODELd(); AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) override; AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) override;
virtual ~AODELd() = default; virtual ~AODELd() = default;
vector<string> graph(const string& name = "AODE") const override; vector<string> graph(const string& name = "AODELd") const override;
static inline string version() { return "0.0.1"; }; static inline string version() { return "0.0.1"; };
void setHyperparameters(nlohmann::json& hyperparameters) override {};
}; };
} }
#endif // !AODELD_H #endif // !AODELD_H

View File

@@ -5,19 +5,21 @@
#include <vector> #include <vector>
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier { class BaseClassifier {
protected: protected:
virtual void trainModel(const torch::Tensor& weights) = 0; virtual void trainModel(const torch::Tensor& weights) = 0;
public: public:
// X is nxm vector, y is nx1 vector // X is nxm vector, y is nx1 vector
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor // X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights) = 0; virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default; virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0; torch::Tensor virtual predict(torch::Tensor& X) = 0;
vector<int> virtual predict(vector<vector<int>>& X) = 0; vector<int> virtual predict(vector<vector<int>>& X) = 0;
status_t virtual getStatus() const = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0; float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes()const = 0; int virtual getNumberOfNodes()const = 0;
@@ -25,7 +27,7 @@ namespace bayesnet {
int virtual getNumberOfStates() const = 0; int virtual getNumberOfStates() const = 0;
vector<string> virtual show() const = 0; vector<string> virtual show() const = 0;
vector<string> virtual graph(const string& title = "") const = 0; vector<string> virtual graph(const string& title = "") const = 0;
const string inline getVersion() const { return "0.1.0"; }; const string inline getVersion() const { return "0.2.0"; };
vector<string> virtual topological_order() = 0; vector<string> virtual topological_order() = 0;
void virtual dump_cpt()const = 0; void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0; virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;

View File

@@ -77,7 +77,6 @@ namespace bayesnet {
auto source = vector<string>(features); auto source = vector<string>(features);
source.push_back(className); source.push_back(className);
auto combinations = doCombinations(source); auto combinations = doCombinations(source);
double totalWeight = weights.sum().item<double>();
// Compute class prior // Compute class prior
auto margin = torch::zeros({ classNumStates }, torch::kFloat); auto margin = torch::zeros({ classNumStates }, torch::kFloat);
for (int value = 0; value < classNumStates; ++value) { for (int value = 0; value < classNumStates; ++value) {

View File

@@ -1,6 +1,9 @@
#include "BoostAODE.h" #include "BoostAODE.h"
#include <set> #include <set>
#include "BayesMetrics.h" #include "BayesMetrics.h"
#include "Colors.h"
#include "Folding.h"
#include <limits.h>
namespace bayesnet { namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() {} BoostAODE::BoostAODE() : Ensemble() {}
@@ -11,7 +14,7 @@ namespace bayesnet {
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters) void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
{ {
// Check if hyperparameters are valid // Check if hyperparameters are valid
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending" }; const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence" };
checkHyperparameters(validKeys, hyperparameters); checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("repeatSparent")) { if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"]; repeatSparent = hyperparameters["repeatSparent"];
@@ -22,6 +25,38 @@ namespace bayesnet {
if (hyperparameters.contains("ascending")) { if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"]; ascending = hyperparameters["ascending"];
} }
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
}
}
void BoostAODE::validationInit()
{
auto y_ = dataset.index({ -1, "..." });
if (convergence) {
// Prepare train & validation sets from train data
auto fold = platform::StratifiedKFold(5, y_, 271);
dataset_ = torch::clone(dataset);
// save input dataset
auto [train, test] = fold.getFold(0);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
// Get train and validation sets
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
y_train = dataset.index({ -1, train_t });
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
y_test = dataset.index({ -1, test_t });
dataset = X_train;
m = X_train.size(1);
auto n_classes = states.at(className).size();
metrics = Metrics(dataset, features, className, n_classes);
// Build dataset with train data
buildDataset(y_train);
} else {
// Use all data to train
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
y_train = y_;
}
} }
void BoostAODE::trainModel(const torch::Tensor& weights) void BoostAODE::trainModel(const torch::Tensor& weights)
{ {
@@ -29,15 +64,22 @@ namespace bayesnet {
n_models = 0; n_models = 0;
if (maxModels == 0) if (maxModels == 0)
maxModels = .1 * n > 10 ? .1 * n : n; maxModels = .1 * n > 10 ? .1 * n : n;
validationInit();
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
auto y_ = dataset.index({ -1, "..." });
bool exitCondition = false; bool exitCondition = false;
unordered_set<int> featuresUsed; unordered_set<int> featuresUsed;
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double delta = 1.0;
double threshold = 1e-4;
int tolerance = 5; // number of times the accuracy can be lower than the threshold
int count = 0; // number of times the accuracy is lower than the threshold
fitted = true; // to enable predict
// Step 0: Set the finish condition // Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features // if not repeatSparent a finish condition is run out of features
// n_models == maxModels // n_models == maxModels
int numClasses = states[className].size(); // epsiolon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing
while (!exitCondition) { while (!exitCondition) {
// Step 1: Build ranking with mutual information // Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
@@ -60,29 +102,44 @@ namespace bayesnet {
} }
featuresUsed.insert(feature); featuresUsed.insert(feature);
model = std::make_unique<SPODE>(feature); model = std::make_unique<SPODE>(feature);
n_models++;
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_);
auto ypred = model->predict(X_); auto ypred = model->predict(X_train);
// Step 3.1: Compute the classifier amout of say // Step 3.1: Compute the classifier amout of say
auto mask_wrong = ypred != y_; auto mask_wrong = ypred != y_train;
auto mask_right = ypred == y_train;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype()); auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double wrongWeights = masked_weights.sum().item<double>(); double epsilon_t = masked_weights.sum().item<double>();
double significance = wrongWeights == 0 ? 1 : 0.5 * log((1 - wrongWeights) / wrongWeights); double wt = (1 - epsilon_t) / epsilon_t;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
// Step 3.2: Update weights for next classifier // Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples // Step 3.2.1: Update weights of wrong samples
weights_ += mask_wrong.to(weights_.dtype()) * exp(significance) * weights_; weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights // Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>(); double totalWeights = torch::sum(weights_).item<double>();
weights_ = weights_ / totalWeights; weights_ = weights_ / totalWeights;
// Step 3.4: Store classifier and its accuracy to weigh its future vote // Step 3.4: Store classifier and its accuracy to weigh its future vote
models.push_back(std::move(model)); models.push_back(std::move(model));
significanceModels.push_back(significance); significanceModels.push_back(alpha_t);
exitCondition = n_models == maxModels && repeatSparent; n_models++;
if (convergence) {
auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) {
priorAccuracy = accuracy;
} else {
delta = accuracy - priorAccuracy;
}
if (delta < threshold) {
count++;
}
}
exitCondition = n_models == maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance;
} }
if (featuresUsed.size() != features.size()) { if (featuresUsed.size() != features.size()) {
cout << "Warning: BoostAODE did not use all the features" << endl; status = WARNING;
} }
weights.copy_(weights_);
} }
vector<string> BoostAODE::graph(const string& title) const vector<string> BoostAODE::graph(const string& title) const
{ {

View File

@@ -13,9 +13,13 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
private: private:
bool repeatSparent=false; torch::Tensor dataset_;
int maxModels=0; torch::Tensor X_train, y_train, X_test, y_test;
bool ascending=false; //Process KBest features ascending or descending order void validationInit();
bool repeatSparent = false;
int maxModels = 0;
bool ascending = false; //Process KBest features ascending or descending order
bool convergence = false; //if true, stop when the model does not improve
}; };
} }
#endif #endif

View File

@@ -5,7 +5,7 @@ namespace bayesnet {
using namespace torch; using namespace torch;
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
Classifier& Classifier::build(vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights) Classifier& Classifier::build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights)
{ {
this->features = features; this->features = features;
this->className = className; this->className = className;
@@ -13,7 +13,7 @@ namespace bayesnet {
m = dataset.size(1); m = dataset.size(1);
n = dataset.size(0) - 1; n = dataset.size(0) - 1;
checkFitParameters(); checkFitParameters();
auto n_classes = states[className].size(); auto n_classes = states.at(className).size();
metrics = Metrics(dataset, features, className, n_classes); metrics = Metrics(dataset, features, className, n_classes);
model.initialize(); model.initialize();
buildModel(weights); buildModel(weights);
@@ -39,7 +39,7 @@ namespace bayesnet {
model.fit(dataset, weights, features, className, states); model.fit(dataset, weights, features, className, states);
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states)
{ {
dataset = X; dataset = X;
buildDataset(y); buildDataset(y);
@@ -47,7 +47,7 @@ namespace bayesnet {
return build(features, className, states, weights); return build(features, className, states, weights);
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states)
{ {
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, kInt32); dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, kInt32);
for (int i = 0; i < X.size(); ++i) { for (int i = 0; i < X.size(); ++i) {
@@ -58,21 +58,24 @@ namespace bayesnet {
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights);
} }
Classifier& Classifier::fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states)
{ {
this->dataset = dataset; this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights);
} }
Classifier& Classifier::fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights) Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights)
{ {
this->dataset = dataset; this->dataset = dataset;
return build(features, className, states, weights); return build(features, className, states, weights);
} }
void Classifier::checkFitParameters() void Classifier::checkFitParameters()
{ {
if (torch::is_floating_point(dataset)) {
throw invalid_argument("dataset (X, y) must be of type Integer");
}
if (n != features.size()) { if (n != features.size()) {
throw invalid_argument("X " + to_string(n) + " and features " + to_string(features.size()) + " must have the same number of features"); throw invalid_argument("Classifier: X " + to_string(n) + " and features " + to_string(features.size()) + " must have the same number of features");
} }
if (states.find(className) == states.end()) { if (states.find(className) == states.end()) {
throw invalid_argument("className not found in states"); throw invalid_argument("className not found in states");
@@ -160,4 +163,10 @@ namespace bayesnet {
} }
} }
} }
void Classifier::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const vector<string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
}
} }

View File

@@ -10,8 +10,7 @@ using namespace torch;
namespace bayesnet { namespace bayesnet {
class Classifier : public BaseClassifier { class Classifier : public BaseClassifier {
private: private:
void buildDataset(torch::Tensor& y); Classifier& build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights);
Classifier& build(vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights);
protected: protected:
bool fitted; bool fitted;
int m, n; // m: number of samples, n: number of features int m, n; // m: number of samples, n: number of features
@@ -21,28 +20,32 @@ namespace bayesnet {
string className; string className;
map<string, vector<int>> states; map<string, vector<int>> states;
Tensor dataset; // (n+1)xm tensor Tensor dataset; // (n+1)xm tensor
status_t status = NORMAL;
void checkFitParameters(); void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0; virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
void checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters); void checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters);
void buildDataset(torch::Tensor& y);
public: public:
Classifier(Network model); Classifier(Network model);
virtual ~Classifier() = default; virtual ~Classifier() = default;
Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override; Classifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; Classifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) override; Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights) override; Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) override;
void addNodes(); void addNodes();
int getNumberOfNodes() const override; int getNumberOfNodes() const override;
int getNumberOfEdges() const override; int getNumberOfEdges() const override;
int getNumberOfStates() const override; int getNumberOfStates() const override;
Tensor predict(Tensor& X) override; Tensor predict(Tensor& X) override;
status_t getStatus() const override { return status; }
vector<int> predict(vector<vector<int>>& X) override; vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override; float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override; float score(vector<vector<int>>& X, vector<int>& y) override;
vector<string> show() const override; vector<string> show() const override;
vector<string> topological_order() override; vector<string> topological_order() override;
void dump_cpt() const override; void dump_cpt() const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
}; };
} }
#endif #endif

View File

@@ -3,7 +3,7 @@
namespace bayesnet { namespace bayesnet {
using namespace torch; using namespace torch;
Ensemble::Ensemble() : Classifier(Network()) {} Ensemble::Ensemble() : Classifier(Network()), n_models(0) {}
void Ensemble::trainModel(const torch::Tensor& weights) void Ensemble::trainModel(const torch::Tensor& weights)
{ {
@@ -17,10 +17,14 @@ namespace bayesnet {
{ {
auto y_pred_ = y_pred.accessor<int, 2>(); auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final; vector<int> y_pred_final;
int numClasses = states.at(className).size();
// y_pred is m x n_models with the prediction of every model for each sample
for (int i = 0; i < y_pred.size(0); ++i) { for (int i = 0; i < y_pred.size(0); ++i) {
vector<double> votes(y_pred.size(1), 0); // votes store in each index (value of class) the significance added by each model
for (int j = 0; j < y_pred.size(1); ++j) { // i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
votes[y_pred_[i][j]] += significanceModels[j]; vector<double> votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
votes[y_pred_[i][j]] += significanceModels.at(j);
} }
// argsort in descending order // argsort in descending order
auto indices = argsort(votes); auto indices = argsort(votes);
@@ -34,7 +38,6 @@ namespace bayesnet {
throw logic_error("Ensemble has not been fitted"); throw logic_error("Ensemble has not been fitted");
} }
Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32); Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32);
//Create a threadpool
auto threads{ vector<thread>() }; auto threads{ vector<thread>() };
mutex mtx; mutex mtx;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {

View File

@@ -4,6 +4,18 @@ namespace bayesnet {
using namespace torch; using namespace torch;
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {} KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const vector<string> validKeys = { "k", "theta" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("k")) {
k = hyperparameters["k"];
}
if (hyperparameters.contains("theta")) {
theta = hyperparameters["theta"];
}
}
void KDB::buildModel(const torch::Tensor& weights) void KDB::buildModel(const torch::Tensor& weights)
{ {
/* /*

View File

@@ -16,7 +16,7 @@ namespace bayesnet {
public: public:
explicit KDB(int k, float theta = 0.03); explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {}; virtual ~KDB() {};
void setHyperparameters(nlohmann::json& hyperparameters) override {}; void setHyperparameters(nlohmann::json& hyperparameters) override;
vector<string> graph(const string& name = "KDB") const override; vector<string> graph(const string& name = "KDB") const override;
}; };
} }

View File

@@ -3,9 +3,9 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {} KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float... checkInput(X_, y_);
features = features_; features = features_;
className = className_; className = className_;
Xf = X_; Xf = X_;

View File

@@ -10,10 +10,9 @@ namespace bayesnet {
public: public:
explicit KDBLd(int k); explicit KDBLd(int k);
virtual ~KDBLd() = default; virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") const override; vector<string> graph(const string& name = "KDB") const override;
Tensor predict(Tensor& X) override; Tensor predict(Tensor& X) override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
static inline string version() { return "0.0.1"; }; static inline string version() { return "0.0.1"; };
}; };
} }

View File

@@ -3,8 +3,8 @@
#include "Network.h" #include "Network.h"
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false) {} Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other. Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
getmaxThreads()), fitted(other.fitted) getmaxThreads()), fitted(other.fitted)
{ {
@@ -132,10 +132,10 @@ namespace bayesnet {
void Network::setStates(const map<string, vector<int>>& states) void Network::setStates(const map<string, vector<int>>& states)
{ {
// Set states to every Node in the network // Set states to every Node in the network
for (int i = 0; i < features.size(); ++i) { for_each(features.begin(), features.end(), [this, &states](const string& feature) {
nodes[features[i]]->setNumStates(states.at(features[i]).size()); nodes.at(feature)->setNumStates(states.at(feature).size());
} });
classNumStates = nodes[className]->getNumStates(); classNumStates = nodes.at(className)->getNumStates();
} }
// X comes in nxm, where n is the number of features and m the number of samples // X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states) void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
@@ -174,37 +174,11 @@ namespace bayesnet {
{ {
setStates(states); setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
if (maxThreadsRunning < 1) {
maxThreadsRunning = 1;
}
vector<thread> threads; vector<thread> threads;
mutex mtx; for (auto& node : nodes) {
condition_variable cv; threads.emplace_back([this, &node, &weights]() {
int activeThreads = 0; node.second->computeCPT(samples, features, laplaceSmoothing, weights);
int nextNodeIndex = 0;
while (nextNodeIndex < nodes.size()) {
unique_lock<mutex> lock(mtx);
cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads, &weights]() {
while (true) {
unique_lock<mutex> lock(mtx);
if (nextNodeIndex >= nodes.size()) {
break; // No more work remaining
}
auto& pair = *std::next(nodes.begin(), nextNodeIndex);
++nextNodeIndex;
lock.unlock();
pair.second->computeCPT(samples, features, laplaceSmoothing, weights);
lock.lock();
nodes[pair.first] = std::move(pair.second);
lock.unlock();
}
lock_guard<mutex> lock(mtx);
--activeThreads;
cv.notify_one();
}); });
++activeThreads;
} }
for (auto& thread : threads) { for (auto& thread : threads) {
thread.join(); thread.join();
@@ -399,7 +373,6 @@ namespace bayesnet {
auto result = features; auto result = features;
result.erase(remove(result.begin(), result.end(), className), result.end()); result.erase(remove(result.begin(), result.end(), className), result.end());
bool ending{ false }; bool ending{ false };
int idx = 0;
while (!ending) { while (!ending) {
ending = true; ending = true;
for (auto feature : features) { for (auto feature : features) {

View File

@@ -27,6 +27,7 @@ namespace bayesnet {
Network(); Network();
explicit Network(float); explicit Network(float);
explicit Network(Network&); explicit Network(Network&);
~Network() = default;
torch::Tensor& getSamples(); torch::Tensor& getSamples();
float getmaxThreads(); float getmaxThreads();
void addNode(const string&); void addNode(const string&);
@@ -52,7 +53,7 @@ namespace bayesnet {
vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format
void initialize(); void initialize();
void dump_cpt() const; void dump_cpt() const;
inline string version() { return "0.1.0"; } inline string version() { return "0.2.0"; }
}; };
} }
#endif #endif

View File

@@ -100,7 +100,7 @@ namespace bayesnet {
} }
int name_index = pos - features.begin(); int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) { for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
torch::List<c10::optional<torch::Tensor>> coordinates; c10::List<c10::optional<at::Tensor>> coordinates;
coordinates.push_back(dataset.index({ name_index, n_sample })); coordinates.push_back(dataset.index({ name_index, n_sample }));
for (auto parent : parents) { for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName()); pos = find(features.begin(), features.end(), parent->getName());
@@ -118,10 +118,10 @@ namespace bayesnet {
} }
float Node::getFactorValue(map<string, int>& evidence) float Node::getFactorValue(map<string, int>& evidence)
{ {
torch::List<c10::optional<torch::Tensor>> coordinates; c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h) // following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(torch::tensor(evidence[name])); coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return torch::tensor(evidence[parent->getName()]); }); transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<float>(); return cpTable.index({ coordinates }).item<float>();
} }
vector<string> Node::graph(const string& className) vector<string> Node::graph(const string& className)

View File

@@ -9,6 +9,15 @@ namespace bayesnet {
delete value; delete value;
} }
} }
void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
{
if (!torch::is_floating_point(X)) {
throw std::invalid_argument("X must be a floating point tensor");
}
if (torch::is_floating_point(y)) {
throw std::invalid_argument("y must be an integer tensor");
}
}
map<string, vector<int>> Proposal::localDiscretizationProposal(const map<string, vector<int>>& oldStates, Network& model) map<string, vector<int>> Proposal::localDiscretizationProposal(const map<string, vector<int>>& oldStates, Network& model)
{ {
// order of local discretization is important. no good 0, 1, 2... // order of local discretization is important. no good 0, 1, 2...
@@ -44,15 +53,6 @@ namespace bayesnet {
auto xvf_ptr = Xf.index({ index }).data_ptr<float>(); auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
auto xvf = vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1)); auto xvf = vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
discretizers[feature]->fit(xvf, yxv); discretizers[feature]->fit(xvf, yxv);
//
//
//
// auto tmp = discretizers[feature]->transform(xvf);
// Xv[index] = tmp;
// auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
// iota(xStates.begin(), xStates.end(), 0);
// //Update new states of the feature/node
// states[feature] = xStates;
} }
if (upgrade) { if (upgrade) {
// Discretize again X (only the affected indices) with the new fitted discretizers // Discretize again X (only the affected indices) with the new fitted discretizers

View File

@@ -13,6 +13,7 @@ namespace bayesnet {
Proposal(torch::Tensor& pDataset, vector<string>& features_, string& className_); Proposal(torch::Tensor& pDataset, vector<string>& features_, string& className_);
virtual ~Proposal(); virtual ~Proposal();
protected: protected:
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
torch::Tensor prepareX(torch::Tensor& X); torch::Tensor prepareX(torch::Tensor& X);
map<string, vector<int>> localDiscretizationProposal(const map<string, vector<int>>& states, Network& model); map<string, vector<int>> localDiscretizationProposal(const map<string, vector<int>>& states, Network& model);
map<string, vector<int>> fit_local_discretization(const torch::Tensor& y); map<string, vector<int>> fit_local_discretization(const torch::Tensor& y);

View File

@@ -12,7 +12,6 @@ namespace bayesnet {
explicit SPODE(int root); explicit SPODE(int root);
virtual ~SPODE() {}; virtual ~SPODE() {};
vector<string> graph(const string& name = "SPODE") const override; vector<string> graph(const string& name = "SPODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
}; };
} }
#endif #endif

View File

@@ -3,9 +3,9 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {} SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float... checkInput(X_, y_);
features = features_; features = features_;
className = className_; className = className_;
Xf = X_; Xf = X_;
@@ -18,11 +18,13 @@ namespace bayesnet {
states = localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
return *this; return *this;
} }
SPODELd& SPODELd::fit(torch::Tensor& dataset, vector<string>& features_, string className_, map<string, vector<int>>& states_) SPODELd& SPODELd::fit(torch::Tensor& dataset, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{ {
if (!torch::is_floating_point(dataset)) {
throw std::runtime_error("Dataset must be a floating point tensor");
}
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
y = dataset.index({ -1, "..." }).clone(); y = dataset.index({ -1, "..." }).clone();
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_; features = features_;
className = className_; className = className_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y

View File

@@ -9,11 +9,10 @@ namespace bayesnet {
public: public:
explicit SPODELd(int root); explicit SPODELd(int root);
virtual ~SPODELd() = default; virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
SPODELd& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) override; SPODELd& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") const override; vector<string> graph(const string& name = "SPODE") const override;
Tensor predict(Tensor& X) override; Tensor predict(Tensor& X) override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
static inline string version() { return "0.0.1"; }; static inline string version() { return "0.0.1"; };
}; };
} }

View File

@@ -11,7 +11,6 @@ namespace bayesnet {
TAN(); TAN();
virtual ~TAN() {}; virtual ~TAN() {};
vector<string> graph(const string& name = "TAN") const override; vector<string> graph(const string& name = "TAN") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override {};
}; };
} }
#endif #endif

View File

@@ -3,9 +3,9 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float... checkInput(X_, y_);
features = features_; features = features_;
className = className_; className = className_;
Xf = X_; Xf = X_;

View File

@@ -10,11 +10,10 @@ namespace bayesnet {
public: public:
TANLd(); TANLd();
virtual ~TANLd() = default; virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; TANLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") const override; vector<string> graph(const string& name = "TAN") const override;
Tensor predict(Tensor& X) override; Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; }; static inline string version() { return "0.0.1"; };
void setHyperparameters(nlohmann::json& hyperparameters) override {};
}; };
} }
#endif // !TANLD_H #endif // !TANLD_H

View File

@@ -5,8 +5,12 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc) add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
add_executable(list list.cc platformUtils Datasets.cc) add_executable(list list.cc platformUtils Datasets.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(manage "${TORCH_LIBRARIES}" OpenXLSX::OpenXLSX) if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
else()
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp)
endif()
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,6 +1,7 @@
#include "Datasets.h" #include "Datasets.h"
#include "platformUtils.h" #include "platformUtils.h"
#include "ArffFiles.h" #include "ArffFiles.h"
#include <fstream>
namespace platform { namespace platform {
void Datasets::load() void Datasets::load()
{ {
@@ -212,10 +213,11 @@ namespace platform {
{ {
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1); states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0); auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
} }
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1); states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states[className]), end(states[className]), 0); iota(begin(states.at(className)), end(states.at(className)), 0);
} }
void Dataset::load_arff() void Dataset::load_arff()
{ {

View File

@@ -2,7 +2,7 @@
#include "Datasets.h" #include "Datasets.h"
#include "Models.h" #include "Models.h"
#include "ReportConsole.h" #include "ReportConsole.h"
#include <fstream>
namespace platform { namespace platform {
using json = nlohmann::json; using json = nlohmann::json;
string get_date() string get_date()
@@ -111,6 +111,26 @@ namespace platform {
} }
} }
string getColor(bayesnet::status_t status)
{
switch (status) {
case bayesnet::NORMAL:
return Colors::GREEN();
case bayesnet::WARNING:
return Colors::YELLOW();
case bayesnet::ERROR:
return Colors::RED();
default:
return Colors::RESET();
}
}
void showProgress(int fold, const string& color, const string& phase)
{
string prefix = phase == "a" ? "" : "\b\b\b\b";
cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
}
void Experiment::cross_validation(const string& path, const string& fileName) void Experiment::cross_validation(const string& path, const string& fileName)
{ {
auto datasets = platform::Datasets(path, discretized, platform::ARFF); auto datasets = platform::Datasets(path, discretized, platform::ARFF);
@@ -159,28 +179,34 @@ namespace platform {
auto y_train = y.index({ train_t }); auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t }); auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t }); auto y_test = y.index({ test_t });
cout << nfold + 1 << ", " << flush; showProgress(nfold + 1, getColor(clf->getStatus()), "a");
// Train model // Train model
clf->fit(X_train, y_train, features, className, states); clf->fit(X_train, y_train, features, className, states);
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
nodes[item] = clf->getNumberOfNodes(); nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges(); edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates(); num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration(); train_time[item] = train_timer.getDuration();
// Score train
auto accuracy_train_value = clf->score(X_train, y_train); auto accuracy_train_value = clf->score(X_train, y_train);
// Test model // Test model
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
test_timer.start(); test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test); auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration(); test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value; accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value; accuracy_test[item] = accuracy_test_value;
cout << "\b\b\b, " << flush;
// Store results and times in vector // Store results and times in vector
result.addScoreTrain(accuracy_train_value); result.addScoreTrain(accuracy_train_value);
result.addScoreTest(accuracy_test_value); result.addScoreTest(accuracy_test_value);
result.addTimeTrain(train_time[item].item<double>()); result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>()); result.addTimeTest(test_time[item].item<double>());
item++; item++;
clf.reset();
} }
cout << "end. " << flush; cout << "end. " << flush;
delete fold;
} }
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>()); result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>()); result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());

View File

@@ -1,95 +1,97 @@
#include "Folding.h" #include "Folding.h"
#include <algorithm> #include <algorithm>
#include <map> #include <map>
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed) namespace platform {
{ Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
random_device rd; {
random_seed = default_random_engine(seed == -1 ? rd() : seed); random_device rd;
srand(seed == -1 ? time(0) : seed); random_seed = default_random_engine(seed == -1 ? rd() : seed);
} srand(seed == -1 ? time(0) : seed);
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector<int>(n))
{
iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
shuffle(indices.begin(), indices.end(), random_seed);
}
pair<vector<int>, vector<int>> KFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
} }
int nTest = n / k; KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector<int>(n))
auto train = vector<int>(); {
auto test = vector<int>(); iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
for (int i = 0; i < n; i++) {
if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
test.push_back(indices[i]);
} else {
train.push_back(indices[i]);
}
}
return { train, test };
}
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{
n = y.numel();
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build();
}
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed)
: Fold(k, y.size(), seed)
{
this->y = y;
n = y.size();
build();
}
void StratifiedKFold::build()
{
stratified_indices = vector<vector<int>>(k);
int fold_size = n / k;
// Compute class counts and indices
auto class_indices = map<int, vector<int>>();
vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
for (auto i = 0; i < n; ++i) {
class_counts[y[i]]++;
class_indices[y[i]].push_back(i);
}
// Shuffle class indices
for (auto& [cls, indices] : class_indices) {
shuffle(indices.begin(), indices.end(), random_seed); shuffle(indices.begin(), indices.end(), random_seed);
} }
// Assign indices to folds pair<vector<int>, vector<int>> KFold::getFold(int nFold)
for (auto label = 0; label < class_counts.size(); ++label) { {
auto num_samples_to_take = class_counts[label] / k; if (nFold >= k || nFold < 0) {
if (num_samples_to_take == 0) throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
continue;
auto remainder_samples_to_take = class_counts[label] % k;
for (auto fold = 0; fold < k; ++fold) {
auto it = next(class_indices[label].begin(), num_samples_to_take);
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
class_indices[label].erase(class_indices[label].begin(), it);
} }
while (remainder_samples_to_take > 0) { int nTest = n / k;
int fold = (rand() % static_cast<int>(k)); auto train = vector<int>();
if (stratified_indices[fold].size() == fold_size + 1) { auto test = vector<int>();
continue; for (int i = 0; i < n; i++) {
if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
test.push_back(indices[i]);
} else {
train.push_back(indices[i]);
}
}
return { train, test };
}
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{
n = y.numel();
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build();
}
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed)
: Fold(k, y.size(), seed)
{
this->y = y;
n = y.size();
build();
}
void StratifiedKFold::build()
{
stratified_indices = vector<vector<int>>(k);
int fold_size = n / k;
// Compute class counts and indices
auto class_indices = map<int, vector<int>>();
vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
for (auto i = 0; i < n; ++i) {
class_counts[y[i]]++;
class_indices[y[i]].push_back(i);
}
// Shuffle class indices
for (auto& [cls, indices] : class_indices) {
shuffle(indices.begin(), indices.end(), random_seed);
}
// Assign indices to folds
for (auto label = 0; label < class_counts.size(); ++label) {
auto num_samples_to_take = class_counts[label] / k;
if (num_samples_to_take == 0)
continue;
auto remainder_samples_to_take = class_counts[label] % k;
for (auto fold = 0; fold < k; ++fold) {
auto it = next(class_indices[label].begin(), num_samples_to_take);
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
class_indices[label].erase(class_indices[label].begin(), it);
}
while (remainder_samples_to_take > 0) {
int fold = (rand() % static_cast<int>(k));
if (stratified_indices[fold].size() == fold_size + 1) {
continue;
}
auto it = next(class_indices[label].begin(), 1);
stratified_indices[fold].push_back(*class_indices[label].begin());
class_indices[label].erase(class_indices[label].begin(), it);
remainder_samples_to_take--;
} }
auto it = next(class_indices[label].begin(), 1);
stratified_indices[fold].push_back(*class_indices[label].begin());
class_indices[label].erase(class_indices[label].begin(), it);
remainder_samples_to_take--;
} }
} }
} pair<vector<int>, vector<int>> StratifiedKFold::getFold(int nFold)
pair<vector<int>, vector<int>> StratifiedKFold::getFold(int nFold) {
{ if (nFold >= k || nFold < 0) {
if (nFold >= k || nFold < 0) { throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")"); }
vector<int> test_indices = stratified_indices[nFold];
vector<int> train_indices;
for (int i = 0; i < k; ++i) {
if (i == nFold) continue;
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());
}
return { train_indices, test_indices };
} }
vector<int> test_indices = stratified_indices[nFold];
vector<int> train_indices;
for (int i = 0; i < k; ++i) {
if (i == nFold) continue;
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());
}
return { train_indices, test_indices };
} }

View File

@@ -4,34 +4,35 @@
#include <vector> #include <vector>
#include <random> #include <random>
using namespace std; using namespace std;
namespace platform {
class Fold { class Fold {
protected: protected:
int k; int k;
int n; int n;
int seed; int seed;
default_random_engine random_seed; default_random_engine random_seed;
public: public:
Fold(int k, int n, int seed = -1); Fold(int k, int n, int seed = -1);
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0; virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default; virtual ~Fold() = default;
int getNumberOfFolds() { return k; } int getNumberOfFolds() { return k; }
}; };
class KFold : public Fold { class KFold : public Fold {
private: private:
vector<int> indices; vector<int> indices;
public: public:
KFold(int k, int n, int seed = -1); KFold(int k, int n, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override; pair<vector<int>, vector<int>> getFold(int nFold) override;
}; };
class StratifiedKFold : public Fold { class StratifiedKFold : public Fold {
private: private:
vector<int> y; vector<int> y;
vector<vector<int>> stratified_indices; vector<vector<int>> stratified_indices;
void build(); void build();
public: public:
StratifiedKFold(int k, const vector<int>& y, int seed = -1); StratifiedKFold(int k, const vector<int>& y, int seed = -1);
StratifiedKFold(int k, torch::Tensor& y, int seed = -1); StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override; pair<vector<int>, vector<int>> getFold(int nFold) override;
}; };
}
#endif #endif

View File

@@ -26,7 +26,7 @@ namespace platform {
instance = it->second(); instance = it->second();
// wrap instance in a shared ptr and return // wrap instance in a shared ptr and return
if (instance != nullptr) if (instance != nullptr)
return shared_ptr<bayesnet::BaseClassifier>(instance); return unique_ptr<bayesnet::BaseClassifier>(instance);
else else
return nullptr; return nullptr;
} }

View File

@@ -1,10 +1,22 @@
#include <sstream> #include <sstream>
#include <locale> #include <locale>
#include "Datasets.h"
#include "ReportBase.h" #include "ReportBase.h"
#include "BestResult.h" #include "BestResult.h"
namespace platform { namespace platform {
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
{
stringstream oss;
oss << "Better than ZeroR + " << setprecision(1) << fixed << margin * 100 << "%";
meaning = {
{Symbols::equal_best, "Equal to best"},
{Symbols::better_best, "Better than best"},
{Symbols::cross, "Less than or equal to ZeroR"},
{Symbols::upward_arrow, oss.str()}
};
}
string ReportBase::fromVector(const string& key) string ReportBase::fromVector(const string& key)
{ {
stringstream oss; stringstream oss;
@@ -34,4 +46,62 @@ namespace platform {
header(); header();
body(); body();
} }
string ReportBase::compareResult(const string& dataset, double result)
{
string status = " ";
if (compare) {
double best = bestResult(dataset, data["model"].get<string>());
if (result == best) {
status = Symbols::equal_best;
} else if (result > best) {
status = Symbols::better_best;
}
} else {
if (data["score_name"].get<string>() == "accuracy") {
auto dt = Datasets(Paths::datasets(), false);
dt.loadDataset(dataset);
auto numClasses = dt.getNClasses(dataset);
if (numClasses == 2) {
vector<int> distribution = dt.getClassesCounts(dataset);
double nSamples = dt.getNSamples(dataset);
vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin);
if (mark > 1) {
mark = 0.9995;
}
status = result < mark ? Symbols::cross : result > mark ? Symbols::upward_arrow : "=";
}
}
}
if (status != " ") {
auto item = summary.find(status);
if (item != summary.end()) {
summary[status]++;
} else {
summary[status] = 1;
}
}
return status;
}
double ReportBase::bestResult(const string& dataset, const string& model)
{
double value = 0.0;
if (bestResults.size() == 0) {
// try to load the best results
string score = data["score_name"];
replace(score.begin(), score.end(), '_', '-');
string fileName = "best_results_" + score + "_" + model + ".json";
ifstream resultData(Paths::results() + "/" + fileName);
if (resultData.is_open()) {
bestResults = json::parse(resultData);
}
}
try {
value = bestResults.at(dataset).at(0);
}
catch (exception) {
value = 1.0;
}
return value;
}
} }

View File

@@ -2,14 +2,26 @@
#define REPORTBASE_H #define REPORTBASE_H
#include <string> #include <string>
#include <iostream> #include <iostream>
#include "Paths.h"
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using json = nlohmann::json; using json = nlohmann::json;
namespace platform { namespace platform {
using namespace std; using namespace std;
class Symbols {
public:
inline static const string check_mark{ "\u2714" };
inline static const string exclamation{ "\u2757" };
inline static const string black_star{ "\u2605" };
inline static const string cross{ "\u2717" };
inline static const string upward_arrow{ "\u27B6" };
inline static const string down_arrow{ "\u27B4" };
inline static const string equal_best{ check_mark };
inline static const string better_best{ black_star };
};
class ReportBase { class ReportBase {
public: public:
explicit ReportBase(json data_) { data = data_; }; explicit ReportBase(json data_, bool compare);
virtual ~ReportBase() = default; virtual ~ReportBase() = default;
void show(); void show();
protected: protected:
@@ -18,6 +30,15 @@ namespace platform {
string fVector(const string& title, const json& data, const int width, const int precision); string fVector(const string& title, const json& data, const int width, const int precision);
virtual void header() = 0; virtual void header() = 0;
virtual void body() = 0; virtual void body() = 0;
virtual void showSummary() = 0;
string compareResult(const string& dataset, double result);
map<string, int> summary;
double margin;
map<string, string> meaning;
private:
double bestResult(const string& dataset, const string& model);
bool compare;
json bestResults;
}; };
}; };
#endif #endif

View File

@@ -10,14 +10,14 @@ namespace platform {
char do_thousands_sep() const { return '.'; } char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; } string do_grouping() const { return "\03"; }
}; };
string ReportConsole::headerLine(const string& text) string ReportConsole::headerLine(const string& text, int utf = 0)
{ {
int n = MAXL - text.length() - 3; int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n; n = n < 0 ? 0 : n;
return "* " + text + string(n, ' ') + "*\n"; return "* " + text + string(n + utf, ' ') + "*\n";
} }
void ReportConsole::header() void ReportConsole::header()
{ {
locale mylocale(cout.getloc(), new separated); locale mylocale(cout.getloc(), new separated);
@@ -36,22 +36,31 @@ namespace platform {
} }
void ReportConsole::body() void ReportConsole::body()
{ {
cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl; cout << Colors::GREEN() << " # Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl;
cout << "============================== ====== ===== === ========= ========= ========= =============== ================== ===============" << endl; cout << "=== ========================= ====== ===== === ========= ========= ========= =============== =================== ====================" << endl;
json lastResult; json lastResult;
double totalScore = 0.0; double totalScore = 0.0;
bool odd = true; bool odd = true;
int index = 0;
for (const auto& r : data["results"]) { for (const auto& r : data["results"]) {
if (selectedIndex != -1 && index != selectedIndex) {
index++;
continue;
}
auto color = odd ? Colors::CYAN() : Colors::BLUE(); auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color << setw(30) << left << r["dataset"].get<string>() << " "; cout << color;
cout << setw(3) << index++ << " ";
cout << setw(25) << left << r["dataset"].get<string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " "; cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " "; cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " "; cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " "; cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " "; cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " "; cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>() << " "; cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>();
cout << setw(11) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " "; const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
cout << status;
cout << setw(12) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " ";
try { try {
cout << r["hyperparameters"].get<string>(); cout << r["hyperparameters"].get<string>();
} }
@@ -63,7 +72,7 @@ namespace platform {
totalScore += r["score"].get<double>(); totalScore += r["score"].get<double>();
odd = !odd; odd = !odd;
} }
if (data["results"].size() == 1) { if (data["results"].size() == 1 || selectedIndex != -1) {
cout << string(MAXL, '*') << endl; cout << string(MAXL, '*') << endl;
cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12)); cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12)); cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
@@ -74,9 +83,21 @@ namespace platform {
footer(totalScore); footer(totalScore);
} }
} }
void ReportConsole::showSummary()
{
for (const auto& item : summary) {
stringstream oss;
oss << setw(3) << left << item.first;
oss << setw(3) << right << item.second << " ";
oss << left << meaning.at(item.first);
cout << headerLine(oss.str(), 2);
}
}
void ReportConsole::footer(double totalScore) void ReportConsole::footer(double totalScore)
{ {
cout << Colors::MAGENTA() << string(MAXL, '*') << endl; cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
showSummary();
auto score = data["score_name"].get<string>(); auto score = data["score_name"].get<string>();
if (score == BestResult::scoreName()) { if (score == BestResult::scoreName()) {
stringstream oss; stringstream oss;

View File

@@ -7,16 +7,18 @@
namespace platform { namespace platform {
using namespace std; using namespace std;
const int MAXL = 128; const int MAXL = 133;
class ReportConsole : public ReportBase{ class ReportConsole : public ReportBase {
public: public:
explicit ReportConsole(json data_) : ReportBase(data_) {}; explicit ReportConsole(json data_, bool compare = false, int index = -1) : ReportBase(data_, compare), selectedIndex(index) {};
virtual ~ReportConsole() = default; virtual ~ReportConsole() = default;
private: private:
string headerLine(const string& text); int selectedIndex;
string headerLine(const string& text, int utf);
void header() override; void header() override;
void body() override; void body() override;
void footer(double totalScore); void footer(double totalScore);
void showSummary();
}; };
}; };
#endif #endif

View File

@@ -13,17 +13,195 @@ namespace platform {
string do_grouping() const { return "\03"; } string do_grouping() const { return "\03"; }
}; };
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook) : ReportBase(data_, compare), row(0), workbook(workbook)
{
normalSize = 14; //font size for report body
colorTitle = 0xB1A0C7;
colorOdd = 0xDCE6F1;
colorEven = 0xFDE9D9;
createFile();
}
lxw_workbook* ReportExcel::getWorkbook()
{
return workbook;
}
lxw_format* ReportExcel::efectiveStyle(const string& style)
{
lxw_format* efectiveStyle;
if (style == "") {
efectiveStyle = NULL;
} else {
string suffix = row % 2 ? "_odd" : "_even";
efectiveStyle = styles.at(style + suffix);
}
return efectiveStyle;
}
void ReportExcel::writeString(int row, int col, const string& text, const string& style)
{
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
}
void ReportExcel::writeInt(int row, int col, const int number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::writeDouble(int row, int col, const double number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 6, 1);
vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void ReportExcel::addColor(lxw_format* style, bool odd)
{
uint32_t efectiveColor = odd ? colorEven : colorOdd;
format_set_bg_color(style, lxw_color_t(efectiveColor));
}
void ReportExcel::createStyle(const string& name, lxw_format* style, bool odd)
{
addColor(style, odd);
if (name == "textCentered") {
format_set_align(style, LXW_ALIGN_CENTER);
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "text") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "bodyHeader") {
format_set_bold(style);
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_CENTER);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
format_set_bg_color(style, lxw_color_t(colorTitle));
} else if (name == "result") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "0.0000000");
} else if (name == "time") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "#,##0.000000");
} else if (name == "ints") {
format_set_font_size(style, normalSize);
format_set_num_format(style, "###,##0");
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "floats") {
format_set_border(style, LXW_BORDER_THIN);
format_set_font_size(style, normalSize);
format_set_num_format(style, "#,##0.00");
}
}
void ReportExcel::createFormats()
{
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
lxw_format* style;
for (string name : styleNames) {
lxw_format* style = workbook_add_format(workbook);
style = workbook_add_format(workbook);
createStyle(name, style, true);
styles[name + "_odd"] = style;
style = workbook_add_format(workbook);
createStyle(name, style, false);
styles[name + "_even"] = style;
}
// Header 1st line
lxw_format* headerFirst = workbook_add_format(workbook);
format_set_bold(headerFirst);
format_set_font_size(headerFirst, 18);
format_set_align(headerFirst, LXW_ALIGN_CENTER);
format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerFirst, LXW_BORDER_THIN);
format_set_bg_color(headerFirst, lxw_color_t(colorTitle));
// Header rest
lxw_format* headerRest = workbook_add_format(workbook);
format_set_bold(headerRest);
format_set_align(headerRest, LXW_ALIGN_CENTER);
format_set_font_size(headerRest, 16);
format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerRest, LXW_BORDER_THIN);
format_set_bg_color(headerRest, lxw_color_t(colorOdd));
// Header small
lxw_format* headerSmall = workbook_add_format(workbook);
format_set_bold(headerSmall);
format_set_align(headerSmall, LXW_ALIGN_LEFT);
format_set_font_size(headerSmall, 12);
format_set_border(headerSmall, LXW_BORDER_THIN);
format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER);
format_set_bg_color(headerSmall, lxw_color_t(colorOdd));
// Summary style
lxw_format* summaryStyle = workbook_add_format(workbook);
format_set_bold(summaryStyle);
format_set_font_size(summaryStyle, 16);
format_set_border(summaryStyle, LXW_BORDER_THIN);
format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER);
styles["headerFirst"] = headerFirst;
styles["headerRest"] = headerRest;
styles["headerSmall"] = headerSmall;
styles["summaryStyle"] = summaryStyle;
}
void ReportExcel::setProperties()
{
char line[data["title"].get<string>().size() + 1];
strcpy(line, data["title"].get<string>().c_str());
lxw_doc_properties properties = {
.title = line,
.subject = "Machine learning results",
.author = "Ricardo Montañana Gómez",
.manager = "Dr. J. A. Gámez, Dr. J. M. Puerta",
.company = "UCLM",
.comments = "Created with libxlsxwriter and c++",
};
workbook_set_properties(workbook, &properties);
}
void ReportExcel::createFile() void ReportExcel::createFile()
{ {
doc.create(Paths::excel() + "some_results.xlsx"); if (workbook == NULL) {
wks = doc.workbook().worksheet("Sheet1"); workbook = workbook_new((Paths::excel() + fileName).c_str());
wks.setName(data["model"].get<string>()); }
const string name = data["model"].get<string>();
string suffix = "";
string efectiveName;
int num = 1;
// Create a sheet with the name of the model
while (true) {
efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = to_string(++num);
} else {
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break;
}
if (num > 100) {
throw invalid_argument("Couldn't create sheet " + efectiveName);
}
}
cout << "Adding sheet " << efectiveName << " to " << Paths::excel() + fileName << endl;
setProperties();
createFormats();
formatColumns();
} }
void ReportExcel::closeFile() void ReportExcel::closeFile()
{ {
doc.save(); workbook_close(workbook);
doc.close();
} }
void ReportExcel::header() void ReportExcel::header()
@@ -32,45 +210,62 @@ namespace platform {
locale::global(mylocale); locale::global(mylocale);
cout.imbue(mylocale); cout.imbue(mylocale);
stringstream oss; stringstream oss;
wks.cell("A1").value().set( string message = data["model"].get<string>() + " ver. " + data["version"].get<string>() + " " +
"Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + data["language"].get<string>() + " ver. " + data["language_version"].get<string>() +
to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>()); " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>();
wks.cell("A2").value() = data["title"].get<string>(); worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
wks.cell("A3").value() = "Random seeds: " + fromVector("seeds") + " Stratified: " + worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<string>().c_str(), styles["headerRest"]);
(data["stratified"].get<bool>() ? "True" : "False"); worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<string>()).c_str(), styles["headerRest"]);
oss << "Execution took " << setprecision(2) << fixed << data["duration"].get<float>() << " seconds, " worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
<< data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<string>(); oss << setprecision(2) << fixed << data["duration"].get<float>() << " s";
wks.cell("A4").value() = oss.str(); worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
wks.cell("A5").value() = "Score is " + data["score_name"].get<string>(); oss.str("");
oss.clear();
oss << setprecision(2) << fixed << data["duration"].get<float>() / 3600 << " h";
worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Stratified: " << (data["stratified"].get<bool>() ? "True" : "False");
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False");
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
} }
void ReportExcel::body() void ReportExcel::body()
{ {
auto header = vector<string>( auto head = vector<string>(
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "Time", { "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time",
"Time Std.", "Hyperparameters" }); "Time Std.", "Hyperparameters" });
int col = 1; int col = 0;
for (const auto& item : header) { for (const auto& item : head) {
wks.cell(8, col++).value() = item; writeString(5, col++, item, "bodyHeader");
} }
int row = 9; row = 6;
col = 1; col = 0;
int hypSize = 22;
json lastResult; json lastResult;
double totalScore = 0.0; double totalScore = 0.0;
string hyperparameters; string hyperparameters;
for (const auto& r : data["results"]) { for (const auto& r : data["results"]) {
wks.cell(row, col).value() = r["dataset"].get<string>(); writeString(row, col, r["dataset"].get<string>(), "text");
wks.cell(row, col + 1).value() = r["samples"].get<int>(); writeInt(row, col + 1, r["samples"].get<int>(), "ints");
wks.cell(row, col + 2).value() = r["features"].get<int>(); writeInt(row, col + 2, r["features"].get<int>(), "ints");
wks.cell(row, col + 3).value() = r["classes"].get<int>(); writeInt(row, col + 3, r["classes"].get<int>(), "ints");
wks.cell(row, col + 4).value() = r["nodes"].get<float>(); writeDouble(row, col + 4, r["nodes"].get<float>(), "floats");
wks.cell(row, col + 5).value() = r["leaves"].get<float>(); writeDouble(row, col + 5, r["leaves"].get<float>(), "floats");
wks.cell(row, col + 6).value() = r["depth"].get<float>(); writeDouble(row, col + 6, r["depth"].get<double>(), "floats");
wks.cell(row, col + 7).value() = r["score"].get<double>(); writeDouble(row, col + 7, r["score"].get<double>(), "result");
wks.cell(row, col + 8).value() = r["score_std"].get<double>(); writeDouble(row, col + 8, r["score_std"].get<double>(), "result");
wks.cell(row, col + 9).value() = r["time"].get<double>(); const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
wks.cell(row, col + 10).value() = r["time_std"].get<double>(); writeString(row, col + 9, status, "textCentered");
writeDouble(row, col + 10, r["time"].get<double>(), "time");
writeDouble(row, col + 11, r["time_std"].get<double>(), "time");
try { try {
hyperparameters = r["hyperparameters"].get<string>(); hyperparameters = r["hyperparameters"].get<string>();
} }
@@ -79,31 +274,57 @@ namespace platform {
oss << r["hyperparameters"]; oss << r["hyperparameters"];
hyperparameters = oss.str(); hyperparameters = oss.str();
} }
wks.cell(row, col + 11).value() = hyperparameters; if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, col + 12, hyperparameters, "text");
lastResult = r; lastResult = r;
totalScore += r["score"].get<double>(); totalScore += r["score"].get<double>();
row++; row++;
} }
// Set the right column width of hyperparameters with the maximum length
worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL);
// Show totals if only one dataset is present in the result
if (data["results"].size() == 1) { if (data["results"].size() == 1) {
for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) { for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
row++; row++;
col = 1; col = 1;
wks.cell(row, col).value() = group; writeString(row, col, group, "text");
for (double item : lastResult[group]) { for (double item : lastResult[group]) {
wks.cell(row, ++col).value() = item; string style = group.find("scores") != string::npos ? "result" : "time";
writeDouble(row, ++col, item, style);
} }
} }
// Set with of columns to show those totals completely
worksheet_set_column(worksheet, 1, 1, 12, NULL);
for (int i = 2; i < 7; ++i) {
// doesn't work with from col to col, so...
worksheet_set_column(worksheet, i, i, 15, NULL);
}
} else { } else {
footer(totalScore, row); footer(totalScore, row);
} }
} }
void ReportExcel::showSummary()
{
for (const auto& item : summary) {
worksheet_write_string(worksheet, row + 2, 1, item.first.c_str(), styles["summaryStyle"]);
worksheet_write_number(worksheet, row + 2, 2, item.second, styles["summaryStyle"]);
worksheet_merge_range(worksheet, row + 2, 3, row + 2, 5, meaning.at(item.first).c_str(), styles["summaryStyle"]);
row += 1;
}
}
void ReportExcel::footer(double totalScore, int row) void ReportExcel::footer(double totalScore, int row)
{ {
showSummary();
row += 4 + summary.size();
auto score = data["score_name"].get<string>(); auto score = data["score_name"].get<string>();
if (score == BestResult::scoreName()) { if (score == BestResult::scoreName()) {
wks.cell(row + 2, 1).value() = score + " compared to " + BestResult::title() + " .: "; worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestResult::title() + " .:").c_str(), efectiveStyle("text"));
wks.cell(row + 2, 5).value() = totalScore / BestResult::score(); writeDouble(row, 6, totalScore / BestResult::score(), "result");
} }
} }
} }

View File

@@ -1,25 +1,42 @@
#ifndef REPORTEXCEL_H #ifndef REPORTEXCEL_H
#define REPORTEXCEL_H #define REPORTEXCEL_H
#include <OpenXLSX.hpp> #include<map>
#include "xlsxwriter.h"
#include "ReportBase.h" #include "ReportBase.h"
#include "Paths.h"
#include "Colors.h" #include "Colors.h"
namespace platform { namespace platform {
using namespace std; using namespace std;
using namespace OpenXLSX;
const int MAXLL = 128; const int MAXLL = 128;
class ReportExcel : public ReportBase{
class ReportExcel : public ReportBase {
public: public:
explicit ReportExcel(json data_) : ReportBase(data_) {createFile();}; explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook);
virtual ~ReportExcel() {closeFile();}; lxw_workbook* getWorkbook();
private: private:
void writeString(int row, int col, const string& text, const string& style = "");
void writeInt(int row, int col, const int number, const string& style = "");
void writeDouble(int row, int col, const double number, const string& style = "");
void formatColumns();
void createFormats();
void setProperties();
void createFile(); void createFile();
void closeFile(); void closeFile();
XLDocument doc; void showSummary();
XLWorksheet wks; lxw_workbook* workbook;
lxw_worksheet* worksheet;
map<string, lxw_format*> styles;
int row;
int normalSize; //font size for report body
uint32_t colorTitle;
uint32_t colorOdd;
uint32_t colorEven;
const string fileName = "some_results.xlsx";
void header() override; void header() override;
void body() override; void body() override;
void footer(double totalScore, int row); void footer(double totalScore, int row);
void createStyle(const string& name, lxw_format* style, bool odd);
void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const string& name);
}; };
}; };
#endif // !REPORTEXCEL_H #endif // !REPORTEXCEL_H

View File

@@ -23,6 +23,7 @@ namespace platform {
title = data["title"]; title = data["title"];
duration = data["duration"]; duration = data["duration"];
model = data["model"]; model = data["model"];
complete = data["results"].size() > 1;
} }
json Result::load() const json Result::load() const
{ {
@@ -41,7 +42,7 @@ namespace platform {
if (filename.find(".json") != string::npos && filename.find("results_") == 0) { if (filename.find(".json") != string::npos && filename.find("results_") == 0) {
auto result = Result(path, filename); auto result = Result(path, filename);
bool addResult = true; bool addResult = true;
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName()) if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete())
addResult = false; addResult = false;
if (addResult) if (addResult)
files.push_back(result); files.push_back(result);
@@ -55,6 +56,8 @@ namespace platform {
oss << setw(12) << left << model << " "; oss << setw(12) << left << model << " ";
oss << setw(11) << left << scoreName << " "; oss << setw(11) << left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " "; oss << right << setw(11) << setprecision(7) << fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << setw(1) << " " << completeString << " ";
oss << setw(9) << setprecision(3) << fixed << duration << " "; oss << setw(9) << setprecision(3) << fixed << duration << " ";
oss << setw(50) << left << title << " "; oss << setw(50) << left << title << " ";
return oss.str(); return oss.str();
@@ -63,9 +66,15 @@ namespace platform {
{ {
cout << Colors::GREEN() << "Results found: " << files.size() << endl; cout << Colors::GREEN() << "Results found: " << files.size() << endl;
cout << "-------------------" << endl; cout << "-------------------" << endl;
if (complete) {
cout << Colors::MAGENTA() << "Only listing complete results" << endl;
}
if (partial) {
cout << Colors::MAGENTA() << "Only listing partial results" << endl;
}
auto i = 0; auto i = 0;
cout << " # Date Model Score Name Score Duration Title" << endl; cout << Colors::GREEN() << " # Date Model Score Name Score C/P Duration Title" << endl;
cout << "=== ========== ============ =========== =========== ========= =============================================================" << endl; cout << "=== ========== ============ =========== =========== === ========= =============================================================" << endl;
bool odd = true; bool odd = true;
for (const auto& result : files) { for (const auto& result : files) {
auto color = odd ? Colors::BLUE() : Colors::CYAN(); auto color = odd ? Colors::BLUE() : Colors::CYAN();
@@ -95,26 +104,51 @@ namespace platform {
cout << "Invalid index" << endl; cout << "Invalid index" << endl;
return -1; return -1;
} }
void Results::report(const int index, const bool excelReport) const void Results::report(const int index, const bool excelReport)
{ {
cout << Colors::YELLOW() << "Reporting " << files.at(index).getFilename() << endl; cout << Colors::YELLOW() << "Reporting " << files.at(index).getFilename() << endl;
auto data = files.at(index).load(); auto data = files.at(index).load();
if (excelReport) { if (excelReport) {
ReportExcel report(data); ReportExcel reporter(data, compare, workbook);
report.show(); reporter.show();
openExcel = true;
workbook = reporter.getWorkbook();
} else { } else {
ReportConsole report(data); ReportConsole reporter(data, compare);
report.show(); reporter.show();
} }
} }
void Results::showIndex(const int index, const int idx) const
{
auto data = files.at(index).load();
if (idx < 0 or idx >= static_cast<int>(data["results"].size())) {
cout << "Invalid index" << endl;
return;
}
cout << Colors::YELLOW() << "Showing " << files.at(index).getFilename() << endl;
ReportConsole reporter(data, compare, idx);
reporter.show();
}
void Results::menu() void Results::menu()
{ {
char option; char option;
int index; int index;
bool finished = false; bool finished = false;
string color, context;
string filename, line, options = "qldhsre"; string filename, line, options = "qldhsre";
while (!finished) { while (!finished) {
cout << Colors::RESET() << "Choose option (quit='q', list='l', delete='d', hide='h', sort='s', report='r', excel='e'): "; if (indexList) {
color = Colors::GREEN();
context = " (quit='q', list='l', delete='d', hide='h', sort='s', report='r', excel='e'): ";
options = "qldhsre";
} else {
color = Colors::MAGENTA();
context = " (quit='q', list='l'): ";
options = "ql";
}
cout << Colors::RESET() << color;
cout << "Choose option " << context;
getline(cin, line); getline(cin, line);
if (line.size() == 0) if (line.size() == 0)
continue; continue;
@@ -126,9 +160,18 @@ namespace platform {
option = line[0]; option = line[0];
} else { } else {
if (all_of(line.begin(), line.end(), ::isdigit)) { if (all_of(line.begin(), line.end(), ::isdigit)) {
index = stoi(line); int idx = stoi(line);
if (index >= 0 && index < files.size()) { if (indexList) {
report(index, false); // The value is about the files list
index = idx;
if (index >= 0 && index < files.size()) {
report(index, false);
indexList = false;
continue;
}
} else {
// The value is about the result showed on screen
showIndex(index, idx);
continue; continue;
} }
} }
@@ -141,6 +184,7 @@ namespace platform {
break; break;
case 'l': case 'l':
show(); show();
indexList = true;
break; break;
case 'd': case 'd':
index = getIndex("delete"); index = getIndex("delete");
@@ -152,6 +196,7 @@ namespace platform {
files.erase(files.begin() + index); files.erase(files.begin() + index);
cout << "File: " + filename + " deleted!" << endl; cout << "File: " + filename + " deleted!" << endl;
show(); show();
indexList = true;
break; break;
case 'h': case 'h':
index = getIndex("hide"); index = getIndex("hide");
@@ -163,21 +208,25 @@ namespace platform {
files.erase(files.begin() + index); files.erase(files.begin() + index);
show(); show();
menu(); menu();
indexList = true;
break; break;
case 's': case 's':
sortList(); sortList();
indexList = true;
show(); show();
break; break;
case 'r': case 'r':
index = getIndex("report"); index = getIndex("report");
if (index == -1) if (index == -1)
break; break;
indexList = false;
report(index, false); report(index, false);
break; break;
case 'e': case 'e':
index = getIndex("excel"); index = getIndex("excel");
if (index == -1) if (index == -1)
break; break;
indexList = true;
report(index, true); report(index, true);
break; break;
default: default:
@@ -248,6 +297,9 @@ namespace platform {
sortDate(); sortDate();
show(); show();
menu(); menu();
if (openExcel) {
workbook_close(workbook);
}
cout << "Done!" << endl; cout << "Done!" << endl;
} }

View File

@@ -1,5 +1,6 @@
#ifndef RESULTS_H #ifndef RESULTS_H
#define RESULTS_H #define RESULTS_H
#include "xlsxwriter.h"
#include <map> #include <map>
#include <vector> #include <vector>
#include <string> #include <string>
@@ -20,6 +21,7 @@ namespace platform {
double getDuration() const { return duration; }; double getDuration() const { return duration; };
string getModel() const { return model; }; string getModel() const { return model; };
string getScoreName() const { return scoreName; }; string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; };
private: private:
string path; string path;
string filename; string filename;
@@ -29,20 +31,32 @@ namespace platform {
double duration; double duration;
string model; string model;
string scoreName; string scoreName;
bool complete;
}; };
class Results { class Results {
public: public:
Results(const string& path, const int max, const string& model, const string& score) : path(path), max(max), model(model), scoreName(score) { load(); }; Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) :
path(path), max(max), model(model), scoreName(score), complete(complete), partial(partial), compare(compare)
{
load();
};
void manage(); void manage();
private: private:
string path; string path;
int max; int max;
string model; string model;
string scoreName; string scoreName;
bool complete;
bool partial;
bool indexList = true;
bool openExcel = false;
bool compare;
lxw_workbook* workbook = NULL;
vector<Result> files; vector<Result> files;
void load(); // Loads the list of results void load(); // Loads the list of results
void show() const; void show() const;
void report(const int index, const bool excelReport) const; void report(const int index, const bool excelReport);
void showIndex(const int index, const int idx) const;
int getIndex(const string& intent) const; int getIndex(const string& intent) const;
void menu(); void menu();
void sortList(); void sortList();

View File

@@ -87,7 +87,7 @@ int main(int argc, char** argv)
auto stratified = program.get<bool>("stratified"); auto stratified = program.get<bool>("stratified");
auto n_folds = program.get<int>("folds"); auto n_folds = program.get<int>("folds");
auto seeds = program.get<vector<int>>("seeds"); auto seeds = program.get<vector<int>>("seeds");
auto hyperparameters =program.get<string>("hyperparameters"); auto hyperparameters = program.get<string>("hyperparameters");
vector<string> filesToTest; vector<string> filesToTest;
auto datasets = platform::Datasets(path, true, platform::ARFF); auto datasets = platform::Datasets(path, true, platform::ARFF);
auto title = program.get<string>("title"); auto title = program.get<string>("title");
@@ -102,7 +102,7 @@ int main(int argc, char** argv)
} }
filesToTest.push_back(file_name); filesToTest.push_back(file_name);
} else { } else {
filesToTest = platform::Datasets(path, true, platform::ARFF).getNames(); filesToTest = datasets.getNames();
saveResults = true; saveResults = true;
} }
/* /*

View File

@@ -12,6 +12,9 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>(); program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>();
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)"); program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied"); program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true);
program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true);
program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true);
try { try {
program.parse_args(argc, argv); program.parse_args(argc, argv);
auto number = program.get<int>("number"); auto number = program.get<int>("number");
@@ -20,6 +23,9 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
} }
auto model = program.get<string>("model"); auto model = program.get<string>("model");
auto score = program.get<string>("score"); auto score = program.get<string>("score");
auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
} }
catch (const exception& err) { catch (const exception& err) {
cerr << err.what() << endl; cerr << err.what() << endl;
@@ -35,7 +41,12 @@ int main(int argc, char** argv)
auto number = program.get<int>("number"); auto number = program.get<int>("number");
auto model = program.get<string>("model"); auto model = program.get<string>("model");
auto score = program.get<string>("score"); auto score = program.get<string>("score");
auto results = platform::Results(platform::Paths::results(), number, model, score); auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
if (complete)
partial = false;
auto results = platform::Results(platform::Paths::results(), number, model, score, complete, partial, compare);
results.manage(); results.manage();
return 0; return 0;
} }

View File

@@ -69,11 +69,12 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32); Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1); states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0); auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32)); Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
} }
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1); states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states[className]), end(states[className]), 0); iota(begin(states.at(className)), end(states.at(className)), 0);
} else { } else {
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32); Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {

View File

@@ -4,6 +4,7 @@ if(ENABLE_TESTING)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform) include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ${BayesNet_SOURCE_DIR}/src/Platform/platformUtils.cc ${BayesNet_SOURCES}) set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ${BayesNet_SOURCE_DIR}/src/Platform/platformUtils.cc ${BayesNet_SOURCES})
add_executable(${TEST_MAIN} ${TEST_SOURCES}) add_executable(${TEST_MAIN} ${TEST_SOURCES})
target_link_libraries(${TEST_MAIN} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain) target_link_libraries(${TEST_MAIN} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)