Compare commits
30 Commits
libxlsxwri
...
boost
Author | SHA1 | Date | |
---|---|---|---|
926de2bebd
|
|||
71704e3547
|
|||
3b06534327
|
|||
ac89a451e3
|
|||
00c6cf663b
|
|||
5043c12be8
|
|||
11320e2cc7
|
|||
ce66483b65
|
|||
cab8e14b2d
|
|||
f0d0abe891
|
|||
dcba146e12
|
|||
3ea0285119
|
|||
e3888e1503 | |||
06de13df98
|
|||
de4fa6a04f
|
|||
3a7bf4e672
|
|||
cd0bc02a74
|
|||
c8597a794e
|
|||
b30416364d
|
|||
3a16589220
|
|||
c4f9187e2a
|
|||
c4d0a5b4e6
|
|||
7bfafe555f
|
|||
337b6f7e79
|
|||
5fa0b957dd
|
|||
67252fc41d
|
|||
94ae9456a0
|
|||
781993e326
|
|||
8257a6ae39
|
|||
fc81730dfc |
14
.vscode/launch.json
vendored
14
.vscode/launch.json
vendored
@@ -37,6 +37,20 @@
|
|||||||
],
|
],
|
||||||
"cwd": "/Users/rmontanana/Code/discretizbench",
|
"cwd": "/Users/rmontanana/Code/discretizbench",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "lldb",
|
||||||
|
"request": "launch",
|
||||||
|
"name": "best",
|
||||||
|
"program": "${workspaceFolder}/build/src/Platform/best",
|
||||||
|
"args": [
|
||||||
|
"-m",
|
||||||
|
"BoostAODE",
|
||||||
|
"-s",
|
||||||
|
"accuracy",
|
||||||
|
"--build",
|
||||||
|
],
|
||||||
|
"cwd": "/Users/rmontanana/Code/discretizbench",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "lldb",
|
"type": "lldb",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
|
@@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
|||||||
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
||||||
option(ENABLE_TESTING "Unit testing build" OFF)
|
option(ENABLE_TESTING "Unit testing build" OFF)
|
||||||
option(CODE_COVERAGE "Collect coverage from test library" OFF)
|
option(CODE_COVERAGE "Collect coverage from test library" OFF)
|
||||||
|
|
||||||
|
# Boost Library
|
||||||
|
set(Boost_USE_STATIC_LIBS OFF)
|
||||||
|
set(Boost_USE_MULTITHREADED ON)
|
||||||
|
set(Boost_USE_STATIC_RUNTIME OFF)
|
||||||
|
find_package(Boost 1.78.0 REQUIRED)
|
||||||
|
if(Boost_FOUND)
|
||||||
|
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
|
||||||
|
include_directories(${Boost_INCLUDE_DIRS})
|
||||||
|
endif()
|
||||||
|
|
||||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||||
# CMakes modules
|
# CMakes modules
|
||||||
# --------------
|
# --------------
|
||||||
@@ -54,6 +65,7 @@ endif (ENABLE_CLANG_TIDY)
|
|||||||
add_git_submodule("lib/mdlp")
|
add_git_submodule("lib/mdlp")
|
||||||
add_git_submodule("lib/argparse")
|
add_git_submodule("lib/argparse")
|
||||||
add_git_submodule("lib/json")
|
add_git_submodule("lib/json")
|
||||||
|
find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib)
|
||||||
|
|
||||||
# Subdirectories
|
# Subdirectories
|
||||||
# --------------
|
# --------------
|
||||||
|
7
Makefile
7
Makefile
@@ -19,13 +19,14 @@ copy: ## Copy binary files to selected folder
|
|||||||
@cp build/src/Platform/main $(dest)
|
@cp build/src/Platform/main $(dest)
|
||||||
@cp build/src/Platform/list $(dest)
|
@cp build/src/Platform/list $(dest)
|
||||||
@cp build/src/Platform/manage $(dest)
|
@cp build/src/Platform/manage $(dest)
|
||||||
|
@cp build/src/Platform/best $(dest)
|
||||||
@echo ">>> Done"
|
@echo ">>> Done"
|
||||||
|
|
||||||
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
|
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
|
||||||
cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
|
cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
|
||||||
|
|
||||||
build: ## Build the main and BayesNetSample
|
build: ## Build the main and BayesNetSample
|
||||||
cmake --build build -t main -t BayesNetSample -t manage -t list -j 32
|
cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32
|
||||||
|
|
||||||
clean: ## Clean the debug info
|
clean: ## Clean the debug info
|
||||||
@echo ">>> Cleaning Debug BayesNet ...";
|
@echo ">>> Cleaning Debug BayesNet ...";
|
||||||
@@ -40,7 +41,7 @@ debug: ## Build a debug version of the project
|
|||||||
@if [ -d ./build ]; then rm -rf ./build; fi
|
@if [ -d ./build ]; then rm -rf ./build; fi
|
||||||
@mkdir build;
|
@mkdir build;
|
||||||
cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
|
cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
|
||||||
cmake --build build -t main -t BayesNetSample -t manage -t list unit_tests -j 32;
|
cmake --build build -t main -t BayesNetSample -t manage -t list -t best -t unit_tests -j 32;
|
||||||
@echo ">>> Done";
|
@echo ">>> Done";
|
||||||
|
|
||||||
release: ## Build a Release version of the project
|
release: ## Build a Release version of the project
|
||||||
@@ -48,7 +49,7 @@ release: ## Build a Release version of the project
|
|||||||
@if [ -d ./build ]; then rm -rf ./build; fi
|
@if [ -d ./build ]; then rm -rf ./build; fi
|
||||||
@mkdir build;
|
@mkdir build;
|
||||||
cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
|
cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
|
||||||
cmake --build build -t main -t BayesNetSample -t manage -t list -j 32;
|
cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32;
|
||||||
@echo ">>> Done";
|
@echo ">>> Done";
|
||||||
|
|
||||||
test: ## Run tests
|
test: ## Run tests
|
||||||
|
@@ -4,10 +4,14 @@ Bayesian Network Classifier with libtorch from scratch
|
|||||||
|
|
||||||
## 0. Setup
|
## 0. Setup
|
||||||
|
|
||||||
### libxlswriter
|
|
||||||
|
|
||||||
Before compiling BayesNet.
|
Before compiling BayesNet.
|
||||||
|
|
||||||
|
### boost library
|
||||||
|
|
||||||
|
[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
|
||||||
|
|
||||||
|
### libxlswriter
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd lib/libxlsxwriter
|
cd lib/libxlsxwriter
|
||||||
make
|
make
|
||||||
|
344
sample/sample.cc
344
sample/sample.cc
@@ -104,180 +104,180 @@ int main(int argc, char** argv)
|
|||||||
for (int i = 0; i < 10; i++) {
|
for (int i = 0; i < 10; i++) {
|
||||||
cout << weights_.index({ i }).item<double>() << endl;
|
cout << weights_.index({ i }).item<double>() << endl;
|
||||||
}
|
}
|
||||||
// map<string, bool> datasets = {
|
map<string, bool> datasets = {
|
||||||
// {"diabetes", true},
|
{"diabetes", true},
|
||||||
// {"ecoli", true},
|
{"ecoli", true},
|
||||||
// {"glass", true},
|
{"glass", true},
|
||||||
// {"iris", true},
|
{"iris", true},
|
||||||
// {"kdd_JapaneseVowels", false},
|
{"kdd_JapaneseVowels", false},
|
||||||
// {"letter", true},
|
{"letter", true},
|
||||||
// {"liver-disorders", true},
|
{"liver-disorders", true},
|
||||||
// {"mfeat-factors", true},
|
{"mfeat-factors", true},
|
||||||
// };
|
};
|
||||||
// auto valid_datasets = vector<string>();
|
auto valid_datasets = vector<string>();
|
||||||
// transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
|
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
|
||||||
// [](const pair<string, bool>& pair) { return pair.first; });
|
[](const pair<string, bool>& pair) { return pair.first; });
|
||||||
// argparse::ArgumentParser program("BayesNetSample");
|
argparse::ArgumentParser program("BayesNetSample");
|
||||||
// program.add_argument("-d", "--dataset")
|
program.add_argument("-d", "--dataset")
|
||||||
// .help("Dataset file name")
|
.help("Dataset file name")
|
||||||
// .action([valid_datasets](const std::string& value) {
|
.action([valid_datasets](const std::string& value) {
|
||||||
// if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
|
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
|
||||||
// return value;
|
return value;
|
||||||
// }
|
}
|
||||||
// throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
|
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
|
||||||
// }
|
}
|
||||||
// );
|
);
|
||||||
// program.add_argument("-p", "--path")
|
program.add_argument("-p", "--path")
|
||||||
// .help(" folder where the data files are located, default")
|
.help(" folder where the data files are located, default")
|
||||||
// .default_value(string{ PATH }
|
.default_value(string{ PATH }
|
||||||
// );
|
);
|
||||||
// program.add_argument("-m", "--model")
|
program.add_argument("-m", "--model")
|
||||||
// .help("Model to use " + platform::Models::instance()->toString())
|
.help("Model to use " + platform::Models::instance()->toString())
|
||||||
// .action([](const std::string& value) {
|
.action([](const std::string& value) {
|
||||||
// static const vector<string> choices = platform::Models::instance()->getNames();
|
static const vector<string> choices = platform::Models::instance()->getNames();
|
||||||
// if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
||||||
// return value;
|
return value;
|
||||||
// }
|
}
|
||||||
// throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
|
throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
|
||||||
// }
|
}
|
||||||
// );
|
);
|
||||||
// program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
|
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
|
||||||
// program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
|
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
|
||||||
// program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
|
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
|
||||||
// program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
|
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
|
||||||
// program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
|
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
|
||||||
// try {
|
try {
|
||||||
// auto k = stoi(value);
|
auto k = stoi(value);
|
||||||
// if (k < 2) {
|
if (k < 2) {
|
||||||
// throw runtime_error("Number of folds must be greater than 1");
|
throw runtime_error("Number of folds must be greater than 1");
|
||||||
// }
|
}
|
||||||
// return k;
|
return k;
|
||||||
// }
|
}
|
||||||
// catch (const runtime_error& err) {
|
catch (const runtime_error& err) {
|
||||||
// throw runtime_error(err.what());
|
throw runtime_error(err.what());
|
||||||
// }
|
}
|
||||||
// catch (...) {
|
catch (...) {
|
||||||
// throw runtime_error("Number of folds must be an integer");
|
throw runtime_error("Number of folds must be an integer");
|
||||||
// }});
|
}});
|
||||||
// program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
|
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
|
||||||
// bool class_last, stratified, tensors, dump_cpt;
|
bool class_last, stratified, tensors, dump_cpt;
|
||||||
// string model_name, file_name, path, complete_file_name;
|
string model_name, file_name, path, complete_file_name;
|
||||||
// int nFolds, seed;
|
int nFolds, seed;
|
||||||
// try {
|
try {
|
||||||
// program.parse_args(argc, argv);
|
program.parse_args(argc, argv);
|
||||||
// file_name = program.get<string>("dataset");
|
file_name = program.get<string>("dataset");
|
||||||
// path = program.get<string>("path");
|
path = program.get<string>("path");
|
||||||
// model_name = program.get<string>("model");
|
model_name = program.get<string>("model");
|
||||||
// complete_file_name = path + file_name + ".arff";
|
complete_file_name = path + file_name + ".arff";
|
||||||
// stratified = program.get<bool>("stratified");
|
stratified = program.get<bool>("stratified");
|
||||||
// tensors = program.get<bool>("tensors");
|
tensors = program.get<bool>("tensors");
|
||||||
// nFolds = program.get<int>("folds");
|
nFolds = program.get<int>("folds");
|
||||||
// seed = program.get<int>("seed");
|
seed = program.get<int>("seed");
|
||||||
// dump_cpt = program.get<bool>("dumpcpt");
|
dump_cpt = program.get<bool>("dumpcpt");
|
||||||
// class_last = datasets[file_name];
|
class_last = datasets[file_name];
|
||||||
// if (!file_exists(complete_file_name)) {
|
if (!file_exists(complete_file_name)) {
|
||||||
// throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
// catch (const exception& err) {
|
catch (const exception& err) {
|
||||||
// cerr << err.what() << endl;
|
cerr << err.what() << endl;
|
||||||
// cerr << program;
|
cerr << program;
|
||||||
// exit(1);
|
exit(1);
|
||||||
// }
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Begin Processing
|
* Begin Processing
|
||||||
*/
|
*/
|
||||||
// auto handler = ArffFiles();
|
auto handler = ArffFiles();
|
||||||
// handler.load(complete_file_name, class_last);
|
handler.load(complete_file_name, class_last);
|
||||||
// // Get Dataset X, y
|
// Get Dataset X, y
|
||||||
// vector<mdlp::samples_t>& X = handler.getX();
|
vector<mdlp::samples_t>& X = handler.getX();
|
||||||
// mdlp::labels_t& y = handler.getY();
|
mdlp::labels_t& y = handler.getY();
|
||||||
// // Get className & Features
|
// Get className & Features
|
||||||
// auto className = handler.getClassName();
|
auto className = handler.getClassName();
|
||||||
// vector<string> features;
|
vector<string> features;
|
||||||
// auto attributes = handler.getAttributes();
|
auto attributes = handler.getAttributes();
|
||||||
// transform(attributes.begin(), attributes.end(), back_inserter(features),
|
transform(attributes.begin(), attributes.end(), back_inserter(features),
|
||||||
// [](const pair<string, string>& item) { return item.first; });
|
[](const pair<string, string>& item) { return item.first; });
|
||||||
// // Discretize Dataset
|
// Discretize Dataset
|
||||||
// auto [Xd, maxes] = discretize(X, y, features);
|
auto [Xd, maxes] = discretize(X, y, features);
|
||||||
// maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
||||||
// map<string, vector<int>> states;
|
map<string, vector<int>> states;
|
||||||
// for (auto feature : features) {
|
for (auto feature : features) {
|
||||||
// states[feature] = vector<int>(maxes[feature]);
|
states[feature] = vector<int>(maxes[feature]);
|
||||||
// }
|
}
|
||||||
// states[className] = vector<int>(maxes[className]);
|
states[className] = vector<int>(maxes[className]);
|
||||||
// auto clf = platform::Models::instance()->create(model_name);
|
auto clf = platform::Models::instance()->create(model_name);
|
||||||
// clf->fit(Xd, y, features, className, states);
|
clf->fit(Xd, y, features, className, states);
|
||||||
// if (dump_cpt) {
|
if (dump_cpt) {
|
||||||
// cout << "--- CPT Tables ---" << endl;
|
cout << "--- CPT Tables ---" << endl;
|
||||||
// clf->dump_cpt();
|
clf->dump_cpt();
|
||||||
// }
|
}
|
||||||
// auto lines = clf->show();
|
auto lines = clf->show();
|
||||||
// for (auto line : lines) {
|
for (auto line : lines) {
|
||||||
// cout << line << endl;
|
cout << line << endl;
|
||||||
// }
|
}
|
||||||
// cout << "--- Topological Order ---" << endl;
|
cout << "--- Topological Order ---" << endl;
|
||||||
// auto order = clf->topological_order();
|
auto order = clf->topological_order();
|
||||||
// for (auto name : order) {
|
for (auto name : order) {
|
||||||
// cout << name << ", ";
|
cout << name << ", ";
|
||||||
// }
|
}
|
||||||
// cout << "end." << endl;
|
cout << "end." << endl;
|
||||||
// auto score = clf->score(Xd, y);
|
auto score = clf->score(Xd, y);
|
||||||
// cout << "Score: " << score << endl;
|
cout << "Score: " << score << endl;
|
||||||
// auto graph = clf->graph();
|
auto graph = clf->graph();
|
||||||
// auto dot_file = model_name + "_" + file_name;
|
auto dot_file = model_name + "_" + file_name;
|
||||||
// ofstream file(dot_file + ".dot");
|
ofstream file(dot_file + ".dot");
|
||||||
// file << graph;
|
file << graph;
|
||||||
// file.close();
|
file.close();
|
||||||
// cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
|
cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
|
||||||
// cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
|
cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
|
||||||
// string stratified_string = stratified ? " Stratified" : "";
|
string stratified_string = stratified ? " Stratified" : "";
|
||||||
// cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
|
cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
|
||||||
// cout << "==========================================" << endl;
|
cout << "==========================================" << endl;
|
||||||
// torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
||||||
// torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
||||||
// for (int i = 0; i < features.size(); ++i) {
|
for (int i = 0; i < features.size(); ++i) {
|
||||||
// Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||||
// }
|
}
|
||||||
// float total_score = 0, total_score_train = 0, score_train, score_test;
|
float total_score = 0, total_score_train = 0, score_train, score_test;
|
||||||
// platform::Fold* fold;
|
platform::Fold* fold;
|
||||||
// if (stratified)
|
if (stratified)
|
||||||
// fold = new platform::StratifiedKFold(nFolds, y, seed);
|
fold = new platform::StratifiedKFold(nFolds, y, seed);
|
||||||
// else
|
else
|
||||||
// fold = new platform::KFold(nFolds, y.size(), seed);
|
fold = new platform::KFold(nFolds, y.size(), seed);
|
||||||
// for (auto i = 0; i < nFolds; ++i) {
|
for (auto i = 0; i < nFolds; ++i) {
|
||||||
// auto [train, test] = fold->getFold(i);
|
auto [train, test] = fold->getFold(i);
|
||||||
// cout << "Fold: " << i + 1 << endl;
|
cout << "Fold: " << i + 1 << endl;
|
||||||
// if (tensors) {
|
if (tensors) {
|
||||||
// auto ttrain = torch::tensor(train, torch::kInt64);
|
auto ttrain = torch::tensor(train, torch::kInt64);
|
||||||
// auto ttest = torch::tensor(test, torch::kInt64);
|
auto ttest = torch::tensor(test, torch::kInt64);
|
||||||
// torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
||||||
// torch::Tensor ytraint = yt.index({ ttrain });
|
torch::Tensor ytraint = yt.index({ ttrain });
|
||||||
// torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
||||||
// torch::Tensor ytestt = yt.index({ ttest });
|
torch::Tensor ytestt = yt.index({ ttest });
|
||||||
// clf->fit(Xtraint, ytraint, features, className, states);
|
clf->fit(Xtraint, ytraint, features, className, states);
|
||||||
// auto temp = clf->predict(Xtraint);
|
auto temp = clf->predict(Xtraint);
|
||||||
// score_train = clf->score(Xtraint, ytraint);
|
score_train = clf->score(Xtraint, ytraint);
|
||||||
// score_test = clf->score(Xtestt, ytestt);
|
score_test = clf->score(Xtestt, ytestt);
|
||||||
// } else {
|
} else {
|
||||||
// auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
||||||
// auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
||||||
// clf->fit(Xtrain, ytrain, features, className, states);
|
clf->fit(Xtrain, ytrain, features, className, states);
|
||||||
// score_train = clf->score(Xtrain, ytrain);
|
score_train = clf->score(Xtrain, ytrain);
|
||||||
// score_test = clf->score(Xtest, ytest);
|
score_test = clf->score(Xtest, ytest);
|
||||||
// }
|
}
|
||||||
// if (dump_cpt) {
|
if (dump_cpt) {
|
||||||
// cout << "--- CPT Tables ---" << endl;
|
cout << "--- CPT Tables ---" << endl;
|
||||||
// clf->dump_cpt();
|
clf->dump_cpt();
|
||||||
// }
|
}
|
||||||
// total_score_train += score_train;
|
total_score_train += score_train;
|
||||||
// total_score += score_test;
|
total_score += score_test;
|
||||||
// cout << "Score Train: " << score_train << endl;
|
cout << "Score Train: " << score_train << endl;
|
||||||
// cout << "Score Test : " << score_test << endl;
|
cout << "Score Test : " << score_test << endl;
|
||||||
// cout << "-------------------------------------------------------------------------------" << endl;
|
cout << "-------------------------------------------------------------------------------" << endl;
|
||||||
// }
|
}
|
||||||
// cout << "**********************************************************************************" << endl;
|
cout << "**********************************************************************************" << endl;
|
||||||
// cout << "Average Score Train: " << total_score_train / nFolds << endl;
|
cout << "Average Score Train: " << total_score_train / nFolds << endl;
|
||||||
// cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
|
cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
|
||||||
}
|
}
|
292
src/Platform/BestResults.cc
Normal file
292
src/Platform/BestResults.cc
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
#include <filesystem>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include "BestResults.h"
|
||||||
|
#include "Result.h"
|
||||||
|
#include "Colors.h"
|
||||||
|
#include "Statistics.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
// function ftime_to_string, Code taken from
|
||||||
|
// https://stackoverflow.com/a/58237530/1389271
|
||||||
|
template <typename TP>
|
||||||
|
std::string ftime_to_string(TP tp)
|
||||||
|
{
|
||||||
|
using namespace std::chrono;
|
||||||
|
auto sctp = time_point_cast<system_clock::duration>(tp - TP::clock::now()
|
||||||
|
+ system_clock::now());
|
||||||
|
auto tt = system_clock::to_time_t(sctp);
|
||||||
|
std::tm* gmt = std::gmtime(&tt);
|
||||||
|
std::stringstream buffer;
|
||||||
|
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
|
||||||
|
return buffer.str();
|
||||||
|
}
|
||||||
|
namespace platform {
|
||||||
|
|
||||||
|
string BestResults::build()
|
||||||
|
{
|
||||||
|
auto files = loadResultFiles();
|
||||||
|
if (files.size() == 0) {
|
||||||
|
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
json bests;
|
||||||
|
for (const auto& file : files) {
|
||||||
|
auto result = Result(path, file);
|
||||||
|
auto data = result.load();
|
||||||
|
for (auto const& item : data.at("results")) {
|
||||||
|
bool update = false;
|
||||||
|
if (bests.contains(item.at("dataset").get<string>())) {
|
||||||
|
if (item.at("score").get<double>() > bests[item.at("dataset").get<string>()].at(0).get<double>()) {
|
||||||
|
update = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
update = true;
|
||||||
|
}
|
||||||
|
if (update) {
|
||||||
|
bests[item.at("dataset").get<string>()] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
string bestFileName = path + bestResultFile();
|
||||||
|
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
|
||||||
|
fclose(fileTest);
|
||||||
|
cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl;
|
||||||
|
}
|
||||||
|
ofstream file(bestFileName);
|
||||||
|
file << bests;
|
||||||
|
file.close();
|
||||||
|
return bestFileName;
|
||||||
|
}
|
||||||
|
|
||||||
|
string BestResults::bestResultFile()
|
||||||
|
{
|
||||||
|
return "best_results_" + score + "_" + model + ".json";
|
||||||
|
}
|
||||||
|
|
||||||
|
pair<string, string> getModelScore(string name)
|
||||||
|
{
|
||||||
|
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
|
||||||
|
int i = 0;
|
||||||
|
auto pos = name.find("_");
|
||||||
|
auto pos2 = name.find("_", pos + 1);
|
||||||
|
string score = name.substr(pos + 1, pos2 - pos - 1);
|
||||||
|
pos = name.find("_", pos2 + 1);
|
||||||
|
string model = name.substr(pos2 + 1, pos - pos2 - 1);
|
||||||
|
return { model, score };
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<string> BestResults::loadResultFiles()
|
||||||
|
{
|
||||||
|
vector<string> files;
|
||||||
|
using std::filesystem::directory_iterator;
|
||||||
|
string fileModel, fileScore;
|
||||||
|
for (const auto& file : directory_iterator(path)) {
|
||||||
|
auto fileName = file.path().filename().string();
|
||||||
|
if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) {
|
||||||
|
tie(fileModel, fileScore) = getModelScore(fileName);
|
||||||
|
if (score == fileScore && (model == fileModel || model == "any")) {
|
||||||
|
files.push_back(fileName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
json BestResults::loadFile(const string& fileName)
|
||||||
|
{
|
||||||
|
ifstream resultData(fileName);
|
||||||
|
if (resultData.is_open()) {
|
||||||
|
json data = json::parse(resultData);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
throw invalid_argument("Unable to open result file. [" + fileName + "]");
|
||||||
|
}
|
||||||
|
vector<string> BestResults::getModels()
|
||||||
|
{
|
||||||
|
set<string> models;
|
||||||
|
vector<string> result;
|
||||||
|
auto files = loadResultFiles();
|
||||||
|
if (files.size() == 0) {
|
||||||
|
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
string fileModel, fileScore;
|
||||||
|
for (const auto& file : files) {
|
||||||
|
// extract the model from the file name
|
||||||
|
tie(fileModel, fileScore) = getModelScore(file);
|
||||||
|
// add the model to the vector of models
|
||||||
|
models.insert(fileModel);
|
||||||
|
}
|
||||||
|
result = vector<string>(models.begin(), models.end());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BestResults::buildAll()
|
||||||
|
{
|
||||||
|
auto models = getModels();
|
||||||
|
for (const auto& model : models) {
|
||||||
|
cout << "Building best results for model: " << model << endl;
|
||||||
|
this->model = model;
|
||||||
|
build();
|
||||||
|
}
|
||||||
|
model = "any";
|
||||||
|
}
|
||||||
|
|
||||||
|
void BestResults::reportSingle()
|
||||||
|
{
|
||||||
|
string bestFileName = path + bestResultFile();
|
||||||
|
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
|
||||||
|
fclose(fileTest);
|
||||||
|
} else {
|
||||||
|
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
auto date = ftime_to_string(filesystem::last_write_time(bestFileName));
|
||||||
|
auto data = loadFile(bestFileName);
|
||||||
|
cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
|
||||||
|
cout << "--------------------------------------------------------" << endl;
|
||||||
|
cout << Colors::GREEN() << " # Dataset Score File Hyperparameters" << endl;
|
||||||
|
cout << "=== ========================= =========== ================================================================== ================================================= " << endl;
|
||||||
|
auto i = 0;
|
||||||
|
bool odd = true;
|
||||||
|
for (auto const& item : data.items()) {
|
||||||
|
auto color = odd ? Colors::BLUE() : Colors::CYAN();
|
||||||
|
cout << color << setw(3) << fixed << right << i++ << " ";
|
||||||
|
cout << setw(25) << left << item.key() << " ";
|
||||||
|
cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
|
||||||
|
cout << setw(66) << item.value().at(2).get<string>() << " ";
|
||||||
|
cout << item.value().at(1) << " ";
|
||||||
|
cout << endl;
|
||||||
|
odd = !odd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
json BestResults::buildTableResults(vector<string> models)
|
||||||
|
{
|
||||||
|
int numberOfDatasets = 0;
|
||||||
|
bool first = true;
|
||||||
|
json origin;
|
||||||
|
json table;
|
||||||
|
auto maxDate = filesystem::file_time_type::max();
|
||||||
|
for (const auto& model : models) {
|
||||||
|
this->model = model;
|
||||||
|
string bestFileName = path + bestResultFile();
|
||||||
|
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
|
||||||
|
fclose(fileTest);
|
||||||
|
} else {
|
||||||
|
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
auto dateWrite = filesystem::last_write_time(bestFileName);
|
||||||
|
if (dateWrite < maxDate) {
|
||||||
|
maxDate = dateWrite;
|
||||||
|
}
|
||||||
|
auto data = loadFile(bestFileName);
|
||||||
|
if (first) {
|
||||||
|
// Get the number of datasets of the first file and check that is the same for all the models
|
||||||
|
first = false;
|
||||||
|
numberOfDatasets = data.size();
|
||||||
|
origin = data;
|
||||||
|
} else {
|
||||||
|
if (numberOfDatasets != data.size()) {
|
||||||
|
cerr << Colors::MAGENTA() << "The number of datasets in the best results files is not the same for all the models." << Colors::RESET() << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
table[model] = data;
|
||||||
|
}
|
||||||
|
table["dateTable"] = ftime_to_string(maxDate);
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BestResults::printTableResults(vector<string> models, json table)
|
||||||
|
{
|
||||||
|
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
|
||||||
|
cout << "------------------------------------------------" << endl;
|
||||||
|
cout << Colors::GREEN() << " # Dataset ";
|
||||||
|
for (const auto& model : models) {
|
||||||
|
cout << setw(12) << left << model << " ";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
cout << "=== ========================= ";
|
||||||
|
for (const auto& model : models) {
|
||||||
|
cout << "============ ";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
auto i = 0;
|
||||||
|
bool odd = true;
|
||||||
|
map<string, double> totals;
|
||||||
|
int nDatasets = table.begin().value().size();
|
||||||
|
for (const auto& model : models) {
|
||||||
|
totals[model] = 0.0;
|
||||||
|
}
|
||||||
|
json origin = table.begin().value();
|
||||||
|
for (auto const& item : origin.items()) {
|
||||||
|
auto color = odd ? Colors::BLUE() : Colors::CYAN();
|
||||||
|
cout << color << setw(3) << fixed << right << i++ << " ";
|
||||||
|
cout << setw(25) << left << item.key() << " ";
|
||||||
|
double maxValue = 0;
|
||||||
|
// Find out the max value for this dataset
|
||||||
|
for (const auto& model : models) {
|
||||||
|
double value = table[model].at(item.key()).at(0).get<double>();
|
||||||
|
if (value > maxValue) {
|
||||||
|
maxValue = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Print the row with red colors on max values
|
||||||
|
for (const auto& model : models) {
|
||||||
|
string efectiveColor = color;
|
||||||
|
double value = table[model].at(item.key()).at(0).get<double>();
|
||||||
|
if (value == maxValue) {
|
||||||
|
efectiveColor = Colors::RED();
|
||||||
|
}
|
||||||
|
totals[model] += value;
|
||||||
|
cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
odd = !odd;
|
||||||
|
}
|
||||||
|
cout << Colors::GREEN() << "=== ========================= ";
|
||||||
|
for (const auto& model : models) {
|
||||||
|
cout << "============ ";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
cout << Colors::GREEN() << setw(30) << " Totals...................";
|
||||||
|
double max = 0.0;
|
||||||
|
for (const auto& total : totals) {
|
||||||
|
if (total.second > max) {
|
||||||
|
max = total.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const auto& model : models) {
|
||||||
|
string efectiveColor = Colors::GREEN();
|
||||||
|
if (totals[model] == max) {
|
||||||
|
efectiveColor = Colors::RED();
|
||||||
|
}
|
||||||
|
cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
}
|
||||||
|
void BestResults::reportAll()
|
||||||
|
{
|
||||||
|
auto models = getModels();
|
||||||
|
// Build the table of results
|
||||||
|
json table = buildTableResults(models);
|
||||||
|
// Print the table of results
|
||||||
|
printTableResults(models, table);
|
||||||
|
// Compute the Friedman test
|
||||||
|
if (friedman) {
|
||||||
|
vector<string> datasets;
|
||||||
|
for (const auto& dataset : table.begin().value().items()) {
|
||||||
|
datasets.push_back(dataset.key());
|
||||||
|
}
|
||||||
|
double significance = 0.05;
|
||||||
|
Statistics stats(models, datasets, table, significance);
|
||||||
|
auto result = stats.friedmanTest();
|
||||||
|
stats.postHocHolmTest(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
29
src/Platform/BestResults.h
Normal file
29
src/Platform/BestResults.h
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
#ifndef BESTRESULTS_H
|
||||||
|
#define BESTRESULTS_H
|
||||||
|
#include <string>
|
||||||
|
#include <set>
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
using namespace std;
|
||||||
|
using json = nlohmann::json;
|
||||||
|
namespace platform {
|
||||||
|
class BestResults {
|
||||||
|
public:
|
||||||
|
explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
|
||||||
|
string build();
|
||||||
|
void reportSingle();
|
||||||
|
void reportAll();
|
||||||
|
void buildAll();
|
||||||
|
private:
|
||||||
|
vector<string> getModels();
|
||||||
|
vector<string> loadResultFiles();
|
||||||
|
json buildTableResults(vector<string> models);
|
||||||
|
void printTableResults(vector<string> models, json table);
|
||||||
|
string bestResultFile();
|
||||||
|
json loadFile(const string& fileName);
|
||||||
|
string path;
|
||||||
|
string score;
|
||||||
|
string model;
|
||||||
|
bool friedman;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif //BESTRESULTS_H
|
@@ -1,7 +1,7 @@
|
|||||||
#ifndef BESTRESULT_H
|
#ifndef BESTSCORE_H
|
||||||
#define BESTRESULT_H
|
#define BESTSCORE_H
|
||||||
#include <string>
|
#include <string>
|
||||||
class BestResult {
|
class BestScore {
|
||||||
public:
|
public:
|
||||||
static std::string title() { return "STree_default (linear-ovo)"; }
|
static std::string title() { return "STree_default (linear-ovo)"; }
|
||||||
static double score() { return 22.109799; }
|
static double score() { return 22.109799; }
|
@@ -4,13 +4,17 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
|||||||
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
||||||
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
|
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
|
||||||
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
||||||
|
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
|
||||||
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
|
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
|
||||||
add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
|
add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
|
||||||
add_executable(list list.cc platformUtils Datasets.cc)
|
add_executable(list list.cc platformUtils Datasets.cc)
|
||||||
|
add_executable(best best.cc BestResults.cc Result.cc Statistics.cc)
|
||||||
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
||||||
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
|
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
|
||||||
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
|
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
|
||||||
|
target_link_libraries(best Boost::boost stdc++fs)
|
||||||
else()
|
else()
|
||||||
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp)
|
target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
|
||||||
|
target_link_libraries(best Boost::boost)
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")
|
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")
|
@@ -2,7 +2,7 @@
|
|||||||
#include <locale>
|
#include <locale>
|
||||||
#include "Datasets.h"
|
#include "Datasets.h"
|
||||||
#include "ReportBase.h"
|
#include "ReportBase.h"
|
||||||
#include "BestResult.h"
|
#include "BestScore.h"
|
||||||
|
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
@@ -94,6 +94,8 @@ namespace platform {
|
|||||||
ifstream resultData(Paths::results() + "/" + fileName);
|
ifstream resultData(Paths::results() + "/" + fileName);
|
||||||
if (resultData.is_open()) {
|
if (resultData.is_open()) {
|
||||||
bestResults = json::parse(resultData);
|
bestResults = json::parse(resultData);
|
||||||
|
} else {
|
||||||
|
existBestFile = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
@@ -101,7 +103,12 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
catch (exception) {
|
catch (exception) {
|
||||||
value = 1.0;
|
value = 1.0;
|
||||||
|
|
||||||
}
|
}
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
bool ReportBase::getExistBestFile()
|
||||||
|
{
|
||||||
|
return existBestFile;
|
||||||
|
}
|
||||||
}
|
}
|
@@ -3,22 +3,13 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "Paths.h"
|
#include "Paths.h"
|
||||||
|
#include "Symbols.h"
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
|
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
namespace platform {
|
namespace platform {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
class Symbols {
|
|
||||||
public:
|
|
||||||
inline static const string check_mark{ "\u2714" };
|
|
||||||
inline static const string exclamation{ "\u2757" };
|
|
||||||
inline static const string black_star{ "\u2605" };
|
|
||||||
inline static const string cross{ "\u2717" };
|
|
||||||
inline static const string upward_arrow{ "\u27B6" };
|
|
||||||
inline static const string down_arrow{ "\u27B4" };
|
|
||||||
inline static const string equal_best{ check_mark };
|
|
||||||
inline static const string better_best{ black_star };
|
|
||||||
};
|
|
||||||
class ReportBase {
|
class ReportBase {
|
||||||
public:
|
public:
|
||||||
explicit ReportBase(json data_, bool compare);
|
explicit ReportBase(json data_, bool compare);
|
||||||
@@ -28,6 +19,7 @@ namespace platform {
|
|||||||
json data;
|
json data;
|
||||||
string fromVector(const string& key);
|
string fromVector(const string& key);
|
||||||
string fVector(const string& title, const json& data, const int width, const int precision);
|
string fVector(const string& title, const json& data, const int width, const int precision);
|
||||||
|
bool getExistBestFile();
|
||||||
virtual void header() = 0;
|
virtual void header() = 0;
|
||||||
virtual void body() = 0;
|
virtual void body() = 0;
|
||||||
virtual void showSummary() = 0;
|
virtual void showSummary() = 0;
|
||||||
@@ -35,10 +27,11 @@ namespace platform {
|
|||||||
map<string, int> summary;
|
map<string, int> summary;
|
||||||
double margin;
|
double margin;
|
||||||
map<string, string> meaning;
|
map<string, string> meaning;
|
||||||
|
bool compare;
|
||||||
private:
|
private:
|
||||||
double bestResult(const string& dataset, const string& model);
|
double bestResult(const string& dataset, const string& model);
|
||||||
bool compare;
|
|
||||||
json bestResults;
|
json bestResults;
|
||||||
|
bool existBestFile = true;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
@@ -1,7 +1,7 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include "ReportConsole.h"
|
#include "ReportConsole.h"
|
||||||
#include "BestResult.h"
|
#include "BestScore.h"
|
||||||
|
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
@@ -99,11 +99,14 @@ namespace platform {
|
|||||||
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
|
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
|
||||||
showSummary();
|
showSummary();
|
||||||
auto score = data["score_name"].get<string>();
|
auto score = data["score_name"].get<string>();
|
||||||
if (score == BestResult::scoreName()) {
|
if (score == BestScore::scoreName()) {
|
||||||
stringstream oss;
|
stringstream oss;
|
||||||
oss << score << " compared to " << BestResult::title() << " .: " << totalScore / BestResult::score();
|
oss << score << " compared to " << BestScore::title() << " .: " << totalScore / BestScore::score();
|
||||||
cout << headerLine(oss.str());
|
cout << headerLine(oss.str());
|
||||||
}
|
}
|
||||||
|
if (!getExistBestFile() && compare) {
|
||||||
|
cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
|
||||||
|
}
|
||||||
cout << string(MAXL, '*') << endl << Colors::RESET();
|
cout << string(MAXL, '*') << endl << Colors::RESET();
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -18,7 +18,7 @@ namespace platform {
|
|||||||
void header() override;
|
void header() override;
|
||||||
void body() override;
|
void body() override;
|
||||||
void footer(double totalScore);
|
void footer(double totalScore);
|
||||||
void showSummary();
|
void showSummary() override;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
@@ -1,7 +1,7 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include "ReportExcel.h"
|
#include "ReportExcel.h"
|
||||||
#include "BestResult.h"
|
#include "BestScore.h"
|
||||||
|
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
@@ -162,11 +162,11 @@ namespace platform {
|
|||||||
strcpy(line, data["title"].get<string>().c_str());
|
strcpy(line, data["title"].get<string>().c_str());
|
||||||
lxw_doc_properties properties = {
|
lxw_doc_properties properties = {
|
||||||
.title = line,
|
.title = line,
|
||||||
.subject = "Machine learning results",
|
.subject = (char*)"Machine learning results",
|
||||||
.author = "Ricardo Montañana Gómez",
|
.author = (char*)"Ricardo Montañana Gómez",
|
||||||
.manager = "Dr. J. A. Gámez, Dr. J. M. Puerta",
|
.manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta",
|
||||||
.company = "UCLM",
|
.company = (char*)"UCLM",
|
||||||
.comments = "Created with libxlsxwriter and c++",
|
.comments = (char*)"Created with libxlsxwriter and c++",
|
||||||
};
|
};
|
||||||
workbook_set_properties(workbook, &properties);
|
workbook_set_properties(workbook, &properties);
|
||||||
}
|
}
|
||||||
@@ -322,9 +322,12 @@ namespace platform {
|
|||||||
showSummary();
|
showSummary();
|
||||||
row += 4 + summary.size();
|
row += 4 + summary.size();
|
||||||
auto score = data["score_name"].get<string>();
|
auto score = data["score_name"].get<string>();
|
||||||
if (score == BestResult::scoreName()) {
|
if (score == BestScore::scoreName()) {
|
||||||
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestResult::title() + " .:").c_str(), efectiveStyle("text"));
|
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestScore::title() + " .:").c_str(), efectiveStyle("text"));
|
||||||
writeDouble(row, 6, totalScore / BestResult::score(), "result");
|
writeDouble(row, 6, totalScore / BestScore::score(), "result");
|
||||||
|
}
|
||||||
|
if (!getExistBestFile() && compare) {
|
||||||
|
worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -21,7 +21,6 @@ namespace platform {
|
|||||||
void setProperties();
|
void setProperties();
|
||||||
void createFile();
|
void createFile();
|
||||||
void closeFile();
|
void closeFile();
|
||||||
void showSummary();
|
|
||||||
lxw_workbook* workbook;
|
lxw_workbook* workbook;
|
||||||
lxw_worksheet* worksheet;
|
lxw_worksheet* worksheet;
|
||||||
map<string, lxw_format*> styles;
|
map<string, lxw_format*> styles;
|
||||||
@@ -33,6 +32,7 @@ namespace platform {
|
|||||||
const string fileName = "some_results.xlsx";
|
const string fileName = "some_results.xlsx";
|
||||||
void header() override;
|
void header() override;
|
||||||
void body() override;
|
void body() override;
|
||||||
|
void showSummary() override;
|
||||||
void footer(double totalScore, int row);
|
void footer(double totalScore, int row);
|
||||||
void createStyle(const string& name, lxw_format* style, bool odd);
|
void createStyle(const string& name, lxw_format* style, bool odd);
|
||||||
void addColor(lxw_format* style, bool odd);
|
void addColor(lxw_format* style, bool odd);
|
||||||
|
51
src/Platform/Result.cc
Normal file
51
src/Platform/Result.cc
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
#include <filesystem>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include "Result.h"
|
||||||
|
#include "Colors.h"
|
||||||
|
#include "BestScore.h"
|
||||||
|
namespace platform {
|
||||||
|
Result::Result(const string& path, const string& filename)
|
||||||
|
: path(path)
|
||||||
|
, filename(filename)
|
||||||
|
{
|
||||||
|
auto data = load();
|
||||||
|
date = data["date"];
|
||||||
|
score = 0;
|
||||||
|
for (const auto& result : data["results"]) {
|
||||||
|
score += result["score"].get<double>();
|
||||||
|
}
|
||||||
|
scoreName = data["score_name"];
|
||||||
|
if (scoreName == BestScore::scoreName()) {
|
||||||
|
score /= BestScore::score();
|
||||||
|
}
|
||||||
|
title = data["title"];
|
||||||
|
duration = data["duration"];
|
||||||
|
model = data["model"];
|
||||||
|
complete = data["results"].size() > 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
json Result::load() const
|
||||||
|
{
|
||||||
|
ifstream resultData(path + "/" + filename);
|
||||||
|
if (resultData.is_open()) {
|
||||||
|
json data = json::parse(resultData);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
string Result::to_string() const
|
||||||
|
{
|
||||||
|
stringstream oss;
|
||||||
|
oss << date << " ";
|
||||||
|
oss << setw(12) << left << model << " ";
|
||||||
|
oss << setw(11) << left << scoreName << " ";
|
||||||
|
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
|
||||||
|
auto completeString = isComplete() ? "C" : "P";
|
||||||
|
oss << setw(1) << " " << completeString << " ";
|
||||||
|
oss << setw(9) << setprecision(3) << fixed << duration << " ";
|
||||||
|
oss << setw(50) << left << title << " ";
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
}
|
37
src/Platform/Result.h
Normal file
37
src/Platform/Result.h
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
#ifndef RESULT_H
|
||||||
|
#define RESULT_H
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
namespace platform {
|
||||||
|
using namespace std;
|
||||||
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
class Result {
|
||||||
|
public:
|
||||||
|
Result(const string& path, const string& filename);
|
||||||
|
json load() const;
|
||||||
|
string to_string() const;
|
||||||
|
string getFilename() const { return filename; };
|
||||||
|
string getDate() const { return date; };
|
||||||
|
double getScore() const { return score; };
|
||||||
|
string getTitle() const { return title; };
|
||||||
|
double getDuration() const { return duration; };
|
||||||
|
string getModel() const { return model; };
|
||||||
|
string getScoreName() const { return scoreName; };
|
||||||
|
bool isComplete() const { return complete; };
|
||||||
|
private:
|
||||||
|
string path;
|
||||||
|
string filename;
|
||||||
|
string date;
|
||||||
|
double score;
|
||||||
|
string title;
|
||||||
|
double duration;
|
||||||
|
string model;
|
||||||
|
string scoreName;
|
||||||
|
bool complete;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@@ -3,37 +3,9 @@
|
|||||||
#include "Results.h"
|
#include "Results.h"
|
||||||
#include "ReportConsole.h"
|
#include "ReportConsole.h"
|
||||||
#include "ReportExcel.h"
|
#include "ReportExcel.h"
|
||||||
#include "BestResult.h"
|
#include "BestScore.h"
|
||||||
#include "Colors.h"
|
#include "Colors.h"
|
||||||
namespace platform {
|
namespace platform {
|
||||||
Result::Result(const string& path, const string& filename)
|
|
||||||
: path(path)
|
|
||||||
, filename(filename)
|
|
||||||
{
|
|
||||||
auto data = load();
|
|
||||||
date = data["date"];
|
|
||||||
score = 0;
|
|
||||||
for (const auto& result : data["results"]) {
|
|
||||||
score += result["score"].get<double>();
|
|
||||||
}
|
|
||||||
scoreName = data["score_name"];
|
|
||||||
if (scoreName == BestResult::scoreName()) {
|
|
||||||
score /= BestResult::score();
|
|
||||||
}
|
|
||||||
title = data["title"];
|
|
||||||
duration = data["duration"];
|
|
||||||
model = data["model"];
|
|
||||||
complete = data["results"].size() > 1;
|
|
||||||
}
|
|
||||||
json Result::load() const
|
|
||||||
{
|
|
||||||
ifstream resultData(path + "/" + filename);
|
|
||||||
if (resultData.is_open()) {
|
|
||||||
json data = json::parse(resultData);
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
|
|
||||||
}
|
|
||||||
void Results::load()
|
void Results::load()
|
||||||
{
|
{
|
||||||
using std::filesystem::directory_iterator;
|
using std::filesystem::directory_iterator;
|
||||||
@@ -48,19 +20,9 @@ namespace platform {
|
|||||||
files.push_back(result);
|
files.push_back(result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if (max == 0) {
|
||||||
string Result::to_string() const
|
max = files.size();
|
||||||
{
|
}
|
||||||
stringstream oss;
|
|
||||||
oss << date << " ";
|
|
||||||
oss << setw(12) << left << model << " ";
|
|
||||||
oss << setw(11) << left << scoreName << " ";
|
|
||||||
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
|
|
||||||
auto completeString = isComplete() ? "C" : "P";
|
|
||||||
oss << setw(1) << " " << completeString << " ";
|
|
||||||
oss << setw(9) << setprecision(3) << fixed << duration << " ";
|
|
||||||
oss << setw(50) << left << title << " ";
|
|
||||||
return oss.str();
|
|
||||||
}
|
}
|
||||||
void Results::show() const
|
void Results::show() const
|
||||||
{
|
{
|
||||||
@@ -164,7 +126,7 @@ namespace platform {
|
|||||||
if (indexList) {
|
if (indexList) {
|
||||||
// The value is about the files list
|
// The value is about the files list
|
||||||
index = idx;
|
index = idx;
|
||||||
if (index >= 0 && index < files.size()) {
|
if (index >= 0 && index < max) {
|
||||||
report(index, false);
|
report(index, false);
|
||||||
indexList = false;
|
indexList = false;
|
||||||
continue;
|
continue;
|
||||||
@@ -300,7 +262,7 @@ namespace platform {
|
|||||||
if (openExcel) {
|
if (openExcel) {
|
||||||
workbook_close(workbook);
|
workbook_close(workbook);
|
||||||
}
|
}
|
||||||
cout << "Done!" << endl;
|
cout << Colors::RESET() << "Done!" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@@ -5,34 +5,11 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
|
#include "Result.h"
|
||||||
namespace platform {
|
namespace platform {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
|
|
||||||
class Result {
|
|
||||||
public:
|
|
||||||
Result(const string& path, const string& filename);
|
|
||||||
json load() const;
|
|
||||||
string to_string() const;
|
|
||||||
string getFilename() const { return filename; };
|
|
||||||
string getDate() const { return date; };
|
|
||||||
double getScore() const { return score; };
|
|
||||||
string getTitle() const { return title; };
|
|
||||||
double getDuration() const { return duration; };
|
|
||||||
string getModel() const { return model; };
|
|
||||||
string getScoreName() const { return scoreName; };
|
|
||||||
bool isComplete() const { return complete; };
|
|
||||||
private:
|
|
||||||
string path;
|
|
||||||
string filename;
|
|
||||||
string date;
|
|
||||||
double score;
|
|
||||||
string title;
|
|
||||||
double duration;
|
|
||||||
string model;
|
|
||||||
string scoreName;
|
|
||||||
bool complete;
|
|
||||||
};
|
|
||||||
class Results {
|
class Results {
|
||||||
public:
|
public:
|
||||||
Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) :
|
Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) :
|
||||||
|
215
src/Platform/Statistics.cc
Normal file
215
src/Platform/Statistics.cc
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
#include "Statistics.h"
|
||||||
|
#include "Colors.h"
|
||||||
|
#include "Symbols.h"
|
||||||
|
#include <boost/math/distributions/chi_squared.hpp>
|
||||||
|
#include <boost/math/distributions/normal.hpp>
|
||||||
|
|
||||||
|
namespace platform {
|
||||||
|
|
||||||
|
Statistics::Statistics(vector<string>& models, vector<string>& datasets, json data, double significance) : models(models), datasets(datasets), data(data), significance(significance)
|
||||||
|
{
|
||||||
|
nModels = models.size();
|
||||||
|
nDatasets = datasets.size();
|
||||||
|
};
|
||||||
|
|
||||||
|
void Statistics::fit()
|
||||||
|
{
|
||||||
|
if (nModels < 3 || nDatasets < 3) {
|
||||||
|
cerr << "nModels: " << nModels << endl;
|
||||||
|
cerr << "nDatasets: " << nDatasets << endl;
|
||||||
|
throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
|
||||||
|
}
|
||||||
|
computeRanks();
|
||||||
|
// Set the control model as the one with the lowest average rank
|
||||||
|
controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
|
||||||
|
computeWTL();
|
||||||
|
fitted = true;
|
||||||
|
}
|
||||||
|
map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
|
||||||
|
{
|
||||||
|
// sort the ranksOrder vector by value
|
||||||
|
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
|
||||||
|
return a.second > b.second;
|
||||||
|
});
|
||||||
|
//Assign ranks to values and if they are the same they share the same averaged rank
|
||||||
|
map<string, float> ranks;
|
||||||
|
for (int i = 0; i < ranksOrder.size(); i++) {
|
||||||
|
ranks[ranksOrder[i].first] = i + 1.0;
|
||||||
|
}
|
||||||
|
int i = 0;
|
||||||
|
while (i < static_cast<int>(ranksOrder.size())) {
|
||||||
|
int j = i + 1;
|
||||||
|
int sumRanks = ranks[ranksOrder[i].first];
|
||||||
|
while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
|
||||||
|
sumRanks += ranks[ranksOrder[j++].first];
|
||||||
|
}
|
||||||
|
if (j > i + 1) {
|
||||||
|
float averageRank = (float)sumRanks / (j - i);
|
||||||
|
for (int k = i; k < j; k++) {
|
||||||
|
ranks[ranksOrder[k].first] = averageRank;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i = j;
|
||||||
|
}
|
||||||
|
return ranks;
|
||||||
|
}
|
||||||
|
void Statistics::computeRanks()
|
||||||
|
{
|
||||||
|
map<string, float> ranksLine;
|
||||||
|
for (const auto& dataset : datasets) {
|
||||||
|
vector<pair<string, double>> ranksOrder;
|
||||||
|
for (const auto& model : models) {
|
||||||
|
double value = data[model].at(dataset).at(0).get<double>();
|
||||||
|
ranksOrder.push_back({ model, value });
|
||||||
|
}
|
||||||
|
// Assign the ranks
|
||||||
|
ranksLine = assignRanks(ranksOrder);
|
||||||
|
if (ranks.size() == 0) {
|
||||||
|
ranks = ranksLine;
|
||||||
|
} else {
|
||||||
|
for (const auto& rank : ranksLine) {
|
||||||
|
ranks[rank.first] += rank.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Average the ranks
|
||||||
|
for (const auto& rank : ranks) {
|
||||||
|
ranks[rank.first] /= nDatasets;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Statistics::computeWTL()
|
||||||
|
{
|
||||||
|
// Compute the WTL matrix
|
||||||
|
for (int i = 0; i < nModels; ++i) {
|
||||||
|
wtl[i] = { 0, 0, 0 };
|
||||||
|
}
|
||||||
|
json origin = data.begin().value();
|
||||||
|
for (auto const& item : origin.items()) {
|
||||||
|
auto controlModel = models.at(controlIdx);
|
||||||
|
double controlValue = data[controlModel].at(item.key()).at(0).get<double>();
|
||||||
|
for (int i = 0; i < nModels; ++i) {
|
||||||
|
if (i == controlIdx) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
double value = data[models[i]].at(item.key()).at(0).get<double>();
|
||||||
|
if (value < controlValue) {
|
||||||
|
wtl[i].win++;
|
||||||
|
} else if (value == controlValue) {
|
||||||
|
wtl[i].tie++;
|
||||||
|
} else {
|
||||||
|
wtl[i].loss++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Statistics::postHocHolmTest(bool friedmanResult)
|
||||||
|
{
|
||||||
|
if (!fitted) {
|
||||||
|
fit();
|
||||||
|
}
|
||||||
|
// Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
|
||||||
|
// Post-hoc Holm test
|
||||||
|
// Calculate the p-value for the models paired with the control model
|
||||||
|
map<int, double> stats; // p-value of each model paired with the control model
|
||||||
|
boost::math::normal dist(0.0, 1.0);
|
||||||
|
double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
|
||||||
|
for (int i = 0; i < nModels; i++) {
|
||||||
|
if (i == controlIdx) {
|
||||||
|
stats[i] = 0.0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
|
||||||
|
double p_value = (long double)2 * (1 - cdf(dist, z));
|
||||||
|
stats[i] = p_value;
|
||||||
|
}
|
||||||
|
// Sort the models by p-value
|
||||||
|
vector<pair<int, double>> statsOrder;
|
||||||
|
for (const auto& stat : stats) {
|
||||||
|
statsOrder.push_back({ stat.first, stat.second });
|
||||||
|
}
|
||||||
|
sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) {
|
||||||
|
return a.second < b.second;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Holm adjustment
|
||||||
|
for (int i = 0; i < statsOrder.size(); ++i) {
|
||||||
|
auto item = statsOrder.at(i);
|
||||||
|
double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
|
||||||
|
double p_value = min((double)1.0, item.second * (nModels - i));
|
||||||
|
p_value = max(before, p_value);
|
||||||
|
statsOrder[i] = { item.first, p_value };
|
||||||
|
}
|
||||||
|
auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
|
||||||
|
cout << color;
|
||||||
|
cout << " *************************************************************************************************************" << endl;
|
||||||
|
cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
|
||||||
|
cout << " Control model: " << models[controlIdx] << endl;
|
||||||
|
cout << " Model p-value rank win tie loss Status" << endl;
|
||||||
|
cout << " ============ ============ ========= === === ==== =============" << endl;
|
||||||
|
// sort ranks from lowest to highest
|
||||||
|
vector<pair<string, float>> ranksOrder;
|
||||||
|
for (const auto& rank : ranks) {
|
||||||
|
ranksOrder.push_back({ rank.first, rank.second });
|
||||||
|
}
|
||||||
|
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, float>& a, const pair<string, float>& b) {
|
||||||
|
return a.second < b.second;
|
||||||
|
});
|
||||||
|
for (const auto& item : ranksOrder) {
|
||||||
|
if (item.first == models.at(controlIdx)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
|
||||||
|
double pvalue = 0.0;
|
||||||
|
for (const auto& stat : statsOrder) {
|
||||||
|
if (stat.first == idx) {
|
||||||
|
pvalue = stat.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
|
||||||
|
auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
|
||||||
|
auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
|
||||||
|
cout << " " << colorStatus << left << setw(12) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
|
||||||
|
cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss;
|
||||||
|
cout << " " << status << textStatus << endl;
|
||||||
|
}
|
||||||
|
cout << color << " *************************************************************************************************************" << endl;
|
||||||
|
cout << Colors::RESET();
|
||||||
|
}
|
||||||
|
bool Statistics::friedmanTest()
|
||||||
|
{
|
||||||
|
if (!fitted) {
|
||||||
|
fit();
|
||||||
|
}
|
||||||
|
// Friedman test
|
||||||
|
// Calculate the Friedman statistic
|
||||||
|
cout << Colors::BLUE() << endl;
|
||||||
|
cout << "***************************************************************************************************************" << endl;
|
||||||
|
cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
|
||||||
|
double degreesOfFreedom = nModels - 1.0;
|
||||||
|
double sumSquared = 0;
|
||||||
|
for (const auto& rank : ranks) {
|
||||||
|
sumSquared += pow(rank.second, 2);
|
||||||
|
}
|
||||||
|
// Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
|
||||||
|
double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
|
||||||
|
cout << "Friedman statistic: " << friedmanQ << endl;
|
||||||
|
// Calculate the critical value
|
||||||
|
boost::math::chi_squared chiSquared(degreesOfFreedom);
|
||||||
|
long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
|
||||||
|
double criticalValue = quantile(chiSquared, 1 - significance);
|
||||||
|
std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
|
||||||
|
<< " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
|
||||||
|
cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
|
||||||
|
bool result;
|
||||||
|
if (p_value < significance) {
|
||||||
|
cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl;
|
||||||
|
result = true;
|
||||||
|
} else {
|
||||||
|
cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl;
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
cout << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << endl;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} // namespace platform
|
37
src/Platform/Statistics.h
Normal file
37
src/Platform/Statistics.h
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
#ifndef STATISTICS_H
|
||||||
|
#define STATISTICS_H
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
namespace platform {
|
||||||
|
struct WTL {
|
||||||
|
int win;
|
||||||
|
int tie;
|
||||||
|
int loss;
|
||||||
|
};
|
||||||
|
class Statistics {
|
||||||
|
public:
|
||||||
|
Statistics(vector<string>& models, vector<string>& datasets, json data, double significance = 0.05);
|
||||||
|
bool friedmanTest();
|
||||||
|
void postHocHolmTest(bool friedmanResult);
|
||||||
|
private:
|
||||||
|
void fit();
|
||||||
|
void computeRanks();
|
||||||
|
void computeWTL();
|
||||||
|
vector<string> models;
|
||||||
|
vector<string> datasets;
|
||||||
|
json data;
|
||||||
|
double significance;
|
||||||
|
bool fitted = false;
|
||||||
|
int nModels = 0;
|
||||||
|
int nDatasets = 0;
|
||||||
|
int controlIdx = 0;
|
||||||
|
map<int, WTL> wtl;
|
||||||
|
map<string, float> ranks;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif // !STATISTICS_H
|
18
src/Platform/Symbols.h
Normal file
18
src/Platform/Symbols.h
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#ifndef SYMBOLS_H
|
||||||
|
#define SYMBOLS_H
|
||||||
|
#include <string>
|
||||||
|
using namespace std;
|
||||||
|
namespace platform {
|
||||||
|
class Symbols {
|
||||||
|
public:
|
||||||
|
inline static const string check_mark{ "\u2714" };
|
||||||
|
inline static const string exclamation{ "\u2757" };
|
||||||
|
inline static const string black_star{ "\u2605" };
|
||||||
|
inline static const string cross{ "\u2717" };
|
||||||
|
inline static const string upward_arrow{ "\u27B6" };
|
||||||
|
inline static const string down_arrow{ "\u27B4" };
|
||||||
|
inline static const string equal_best{ check_mark };
|
||||||
|
inline static const string better_best{ black_star };
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif // !SYMBOLS_H
|
71
src/Platform/best.cc
Normal file
71
src/Platform/best.cc
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <argparse/argparse.hpp>
|
||||||
|
#include "Paths.h"
|
||||||
|
#include "BestResults.h"
|
||||||
|
#include "Colors.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
argparse::ArgumentParser manageArguments(int argc, char** argv)
|
||||||
|
{
|
||||||
|
argparse::ArgumentParser program("best");
|
||||||
|
program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
|
||||||
|
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
|
||||||
|
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
|
||||||
|
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
|
||||||
|
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
|
||||||
|
try {
|
||||||
|
program.parse_args(argc, argv);
|
||||||
|
auto model = program.get<string>("model");
|
||||||
|
auto score = program.get<string>("score");
|
||||||
|
auto build = program.get<bool>("build");
|
||||||
|
auto report = program.get<bool>("report");
|
||||||
|
auto friedman = program.get<bool>("friedman");
|
||||||
|
if (model == "" || score == "") {
|
||||||
|
throw runtime_error("Model and score name must be supplied");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const exception& err) {
|
||||||
|
cerr << err.what() << endl;
|
||||||
|
cerr << program;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
return program;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv)
|
||||||
|
{
|
||||||
|
auto program = manageArguments(argc, argv);
|
||||||
|
auto model = program.get<string>("model");
|
||||||
|
auto score = program.get<string>("score");
|
||||||
|
auto build = program.get<bool>("build");
|
||||||
|
auto report = program.get<bool>("report");
|
||||||
|
auto friedman = program.get<bool>("friedman");
|
||||||
|
if (friedman && model != "any") {
|
||||||
|
cerr << "Friedman test can only be used with all models" << endl;
|
||||||
|
cerr << program;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (!report && !build) {
|
||||||
|
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
|
||||||
|
cerr << program;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
|
||||||
|
if (build) {
|
||||||
|
if (model == "any") {
|
||||||
|
results.buildAll();
|
||||||
|
} else {
|
||||||
|
string fileName = results.build();
|
||||||
|
cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (report) {
|
||||||
|
if (model == "any") {
|
||||||
|
results.reportAll();
|
||||||
|
} else {
|
||||||
|
results.reportSingle();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
@@ -8,7 +8,6 @@
|
|||||||
#include "ArffFiles.h"
|
#include "ArffFiles.h"
|
||||||
#include "CPPFImdlp.h"
|
#include "CPPFImdlp.h"
|
||||||
using namespace std;
|
using namespace std;
|
||||||
const string PATH = "../../data/";
|
|
||||||
|
|
||||||
bool file_exists(const std::string& name);
|
bool file_exists(const std::string& name);
|
||||||
vector<string> split(const string& text, char delimiter);
|
vector<string> split(const string& text, char delimiter);
|
||||||
|
Reference in New Issue
Block a user