Add Python Classifiers

Add STree, Odte, SVC & RandomForest Classifiers
Remove using namespace ... in project
This commit is contained in:
Ricardo Montañana Gómez 2023-11-17 11:11:05 +01:00
commit 28f3d87e32
116 changed files with 1981 additions and 1359 deletions

28
.vscode/launch.json vendored
View File

@ -5,7 +5,7 @@
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "sample", "name": "sample",
"program": "${workspaceFolder}/build/sample/BayesNetSample", "program": "${workspaceFolder}/build_debug/sample/BayesNetSample",
"args": [ "args": [
"-d", "-d",
"iris", "iris",
@ -14,7 +14,7 @@
"-s", "-s",
"271", "271",
"-p", "-p",
"/Users/rmontanana/Code/discretizbench/datasets/", "/home/rmontanana/Code/discretizbench/datasets/",
], ],
//"cwd": "${workspaceFolder}/build/sample/", //"cwd": "${workspaceFolder}/build/sample/",
}, },
@ -22,24 +22,24 @@
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "experiment", "name": "experiment",
"program": "${workspaceFolder}/build/src/Platform/b_main", "program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [ "args": [
"-m", "-m",
"TAN", "STree",
"--stratified", "--stratified",
"-d", "-d",
"zoo", "iris",
"--discretize" //"--discretize"
// "--hyperparameters", // "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}" // "{\"repeatSparent\": true, \"maxModels\": 12}"
], ],
"cwd": "/Users/rmontanana/Code/odtebench", "cwd": "/home/rmontanana/Code/discretizbench",
}, },
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "best", "name": "best",
"program": "${workspaceFolder}/build/src/Platform/b_best", "program": "${workspaceFolder}/build_debug/src/Platform/b_best",
"args": [ "args": [
"-m", "-m",
"BoostAODE", "BoostAODE",
@ -47,24 +47,24 @@
"accuracy", "accuracy",
"--build", "--build",
], ],
"cwd": "/Users/rmontanana/Code/discretizbench", "cwd": "/home/rmontanana/Code/discretizbench",
}, },
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "manage", "name": "manage",
"program": "${workspaceFolder}/build/src/Platform/b_manage", "program": "${workspaceFolder}/build_debug/src/Platform/b_manage",
"args": [ "args": [
"-n", "-n",
"20" "20"
], ],
"cwd": "/Users/rmontanana/Code/discretizbench", "cwd": "/home/rmontanana/Code/discretizbench",
}, },
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "list", "name": "list",
"program": "${workspaceFolder}/build/src/Platform/b_list", "program": "${workspaceFolder}/build_debug/src/Platform/b_list",
"args": [], "args": [],
//"cwd": "/Users/rmontanana/Code/discretizbench", //"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "/home/rmontanana/Code/covbench", "cwd": "/home/rmontanana/Code/covbench",
@ -73,7 +73,7 @@
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "test", "name": "test",
"program": "${workspaceFolder}/build/tests/unit_tests", "program": "${workspaceFolder}/build_debug/tests/unit_tests",
"args": [ "args": [
"-c=\"Metrics Test\"", "-c=\"Metrics Test\"",
// "-s", // "-s",
@ -84,7 +84,7 @@
"name": "Build & debug active file", "name": "Build & debug active file",
"type": "cppdbg", "type": "cppdbg",
"request": "launch", "request": "launch",
"program": "${workspaceFolder}/build/bayesnet", "program": "${workspaceFolder}/build_debug/bayesnet",
"args": [], "args": [],
"stopAtEntry": false, "stopAtEntry": false,
"cwd": "${workspaceFolder}", "cwd": "${workspaceFolder}",

View File

@ -24,6 +24,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# Options # Options
# ------- # -------
@ -35,17 +36,21 @@ option(CODE_COVERAGE "Collect coverage from test library" OFF)
set(Boost_USE_STATIC_LIBS OFF) set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON) set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF) set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.66.0 REQUIRED) find_package(Boost 1.66.0 REQUIRED COMPONENTS python3 numpy3)
if(Boost_FOUND) if(Boost_FOUND)
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}") message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
include_directories(${Boost_INCLUDE_DIRS}) include_directories(${Boost_INCLUDE_DIRS})
endif() endif()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # Python
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}")
# CMakes modules # CMakes modules
# -------------- # --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule) include(AddGitSubmodule)
if (CODE_COVERAGE) if (CODE_COVERAGE)
enable_testing() enable_testing()
include(CodeCoverage) include(CodeCoverage)
@ -76,6 +81,7 @@ add_subdirectory(config)
add_subdirectory(lib/Files) add_subdirectory(lib/Files)
add_subdirectory(src/BayesNet) add_subdirectory(src/BayesNet)
add_subdirectory(src/Platform) add_subdirectory(src/Platform)
add_subdirectory(src/PyClassifiers)
add_subdirectory(sample) add_subdirectory(sample)
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h) file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h)

View File

@ -4,11 +4,9 @@
#include <map> #include <map>
#include <iostream> #include <iostream>
using namespace std;
ArffFiles::ArffFiles() = default; ArffFiles::ArffFiles() = default;
vector<string> ArffFiles::getLines() const std::vector<std::string> ArffFiles::getLines() const
{ {
return lines; return lines;
} }
@ -18,48 +16,48 @@ unsigned long int ArffFiles::getSize() const
return lines.size(); return lines.size();
} }
vector<pair<string, string>> ArffFiles::getAttributes() const std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
{ {
return attributes; return attributes;
} }
string ArffFiles::getClassName() const std::string ArffFiles::getClassName() const
{ {
return className; return className;
} }
string ArffFiles::getClassType() const std::string ArffFiles::getClassType() const
{ {
return classType; return classType;
} }
vector<vector<float>>& ArffFiles::getX() std::vector<std::vector<float>>& ArffFiles::getX()
{ {
return X; return X;
} }
vector<int>& ArffFiles::getY() std::vector<int>& ArffFiles::getY()
{ {
return y; return y;
} }
void ArffFiles::loadCommon(string fileName) void ArffFiles::loadCommon(std::string fileName)
{ {
ifstream file(fileName); std::ifstream file(fileName);
if (!file.is_open()) { if (!file.is_open()) {
throw invalid_argument("Unable to open file"); throw std::invalid_argument("Unable to open file");
} }
string line; std::string line;
string keyword; std::string keyword;
string attribute; std::string attribute;
string type; std::string type;
string type_w; std::string type_w;
while (getline(file, line)) { while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue; continue;
} }
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
stringstream ss(line); std::stringstream ss(line);
ss >> keyword >> attribute; ss >> keyword >> attribute;
type = ""; type = "";
while (ss >> type_w) while (ss >> type_w)
@ -74,35 +72,35 @@ void ArffFiles::loadCommon(string fileName)
} }
file.close(); file.close();
if (attributes.empty()) if (attributes.empty())
throw invalid_argument("No attributes found"); throw std::invalid_argument("No attributes found");
} }
void ArffFiles::load(const string& fileName, bool classLast) void ArffFiles::load(const std::string& fileName, bool classLast)
{ {
int labelIndex; int labelIndex;
loadCommon(fileName); loadCommon(fileName);
if (classLast) { if (classLast) {
className = get<0>(attributes.back()); className = std::get<0>(attributes.back());
classType = get<1>(attributes.back()); classType = std::get<1>(attributes.back());
attributes.pop_back(); attributes.pop_back();
labelIndex = static_cast<int>(attributes.size()); labelIndex = static_cast<int>(attributes.size());
} else { } else {
className = get<0>(attributes.front()); className = std::get<0>(attributes.front());
classType = get<1>(attributes.front()); classType = std::get<1>(attributes.front());
attributes.erase(attributes.begin()); attributes.erase(attributes.begin());
labelIndex = 0; labelIndex = 0;
} }
generateDataset(labelIndex); generateDataset(labelIndex);
} }
void ArffFiles::load(const string& fileName, const string& name) void ArffFiles::load(const std::string& fileName, const std::string& name)
{ {
int labelIndex; int labelIndex;
loadCommon(fileName); loadCommon(fileName);
bool found = false; bool found = false;
for (int i = 0; i < attributes.size(); ++i) { for (int i = 0; i < attributes.size(); ++i) {
if (attributes[i].first == name) { if (attributes[i].first == name) {
className = get<0>(attributes[i]); className = std::get<0>(attributes[i]);
classType = get<1>(attributes[i]); classType = std::get<1>(attributes[i]);
attributes.erase(attributes.begin() + i); attributes.erase(attributes.begin() + i);
labelIndex = i; labelIndex = i;
found = true; found = true;
@ -110,19 +108,19 @@ void ArffFiles::load(const string& fileName, const string& name)
} }
} }
if (!found) { if (!found) {
throw invalid_argument("Class name not found"); throw std::invalid_argument("Class name not found");
} }
generateDataset(labelIndex); generateDataset(labelIndex);
} }
void ArffFiles::generateDataset(int labelIndex) void ArffFiles::generateDataset(int labelIndex)
{ {
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size())); X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
auto yy = vector<string>(lines.size(), ""); auto yy = std::vector<std::string>(lines.size(), "");
auto removeLines = vector<int>(); // Lines with missing values auto removeLines = std::vector<int>(); // Lines with missing values
for (size_t i = 0; i < lines.size(); i++) { for (size_t i = 0; i < lines.size(); i++) {
stringstream ss(lines[i]); std::stringstream ss(lines[i]);
string value; std::string value;
int pos = 0; int pos = 0;
int xIndex = 0; int xIndex = 0;
while (getline(ss, value, ',')) { while (getline(ss, value, ',')) {
@ -146,21 +144,21 @@ void ArffFiles::generateDataset(int labelIndex)
y = factorize(yy); y = factorize(yy);
} }
string ArffFiles::trim(const string& source) std::string ArffFiles::trim(const std::string& source)
{ {
string s(source); std::string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t")); s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1); s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s; return s;
} }
vector<int> ArffFiles::factorize(const vector<string>& labels_t) std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
{ {
vector<int> yy; std::vector<int> yy;
yy.reserve(labels_t.size()); yy.reserve(labels_t.size());
map<string, int> labelMap; std::map<std::string, int> labelMap;
int i = 0; int i = 0;
for (const string& label : labels_t) { for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) { if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++; labelMap[label] = i++;
} }

View File

@ -4,31 +4,29 @@
#include <string> #include <string>
#include <vector> #include <vector>
using namespace std;
class ArffFiles { class ArffFiles {
private: private:
vector<string> lines; std::vector<std::string> lines;
vector<pair<string, string>> attributes; std::vector<std::pair<std::string, std::string>> attributes;
string className; std::string className;
string classType; std::string classType;
vector<vector<float>> X; std::vector<std::vector<float>> X;
vector<int> y; std::vector<int> y;
void generateDataset(int); void generateDataset(int);
void loadCommon(string); void loadCommon(std::string);
public: public:
ArffFiles(); ArffFiles();
void load(const string&, bool = true); void load(const std::string&, bool = true);
void load(const string&, const string&); void load(const std::string&, const std::string&);
vector<string> getLines() const; std::vector<std::string> getLines() const;
unsigned long int getSize() const; unsigned long int getSize() const;
string getClassName() const; std::string getClassName() const;
string getClassType() const; std::string getClassType() const;
static string trim(const string&); static std::string trim(const std::string&);
vector<vector<float>>& getX(); std::vector<std::vector<float>>& getX();
vector<int>& getY(); std::vector<int>& getY();
vector<pair<string, string>> getAttributes() const; std::vector<std::pair<std::string, std::string>> getAttributes() const;
static vector<int> factorize(const vector<string>& labels_t); static std::vector<int> factorize(const std::vector<std::string>& labels_t);
}; };
#endif #endif

View File

@ -1,6 +1,6 @@
#include <iostream> #include <iostream>
#include <torch/torch.h> #include <torch/torch.h>
#include <string> #include <std::string>
#include <map> #include <map>
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
@ -12,14 +12,12 @@
#include "modelRegister.h" #include "modelRegister.h"
#include <fstream> #include <fstream>
using namespace std; const std::string PATH = "../../data/";
const string PATH = "../../data/"; pair<std::vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<std::string> features)
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features)
{ {
vector<mdlp::labels_t>Xd; std::vector<mdlp::labels_t>Xd;
map<string, int> maxes; map<std::string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp(); auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) { for (int i = 0; i < X.size(); i++) {
@ -31,7 +29,7 @@ pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t
return { Xd, maxes }; return { Xd, maxes };
} }
bool file_exists(const std::string& name) bool file_exists(const std::std::std::string& name)
{ {
if (FILE* file = fopen(name.c_str(), "r")) { if (FILE* file = fopen(name.c_str(), "r")) {
fclose(file); fclose(file);
@ -40,12 +38,12 @@ bool file_exists(const std::string& name)
return false; return false;
} }
} }
pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vector<vector<int>> X, vector<int> y) pair<std::vector<std::vector<int>>, std::vector<int>> extract_indices(std::vector<int> indices, std::vector<std::vector<int>> X, std::vector<int> y)
{ {
vector<vector<int>> Xr; // nxm std::vector<std::vector<int>> Xr; // nxm
vector<int> yr; std::vector<int> yr;
for (int col = 0; col < X.size(); ++col) { for (int col = 0; col < X.size(); ++col) {
Xr.push_back(vector<int>()); Xr.push_back(std::vector<int>());
} }
for (auto index : indices) { for (auto index : indices) {
for (int col = 0; col < X.size(); ++col) { for (int col = 0; col < X.size(); ++col) {
@ -58,7 +56,7 @@ pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vect
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
map<string, bool> datasets = { map<std::string, bool> datasets = {
{"diabetes", true}, {"diabetes", true},
{"ecoli", true}, {"ecoli", true},
{"glass", true}, {"glass", true},
@ -68,13 +66,13 @@ int main(int argc, char** argv)
{"liver-disorders", true}, {"liver-disorders", true},
{"mfeat-factors", true}, {"mfeat-factors", true},
}; };
auto valid_datasets = vector<string>(); auto valid_datasets = std::vector<std::string>();
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets), transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
[](const pair<string, bool>& pair) { return pair.first; }); [](const pair<std::string, bool>& pair) { return pair.first; });
argparse::ArgumentParser program("BayesNetSample"); argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-d", "--dataset") program.add_argument("-d", "--dataset")
.help("Dataset file name") .help("Dataset file name")
.action([valid_datasets](const std::string& value) { .action([valid_datasets](const std::std::std::string& value) {
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
return value; return value;
} }
@ -83,23 +81,23 @@ int main(int argc, char** argv)
); );
program.add_argument("-p", "--path") program.add_argument("-p", "--path")
.help(" folder where the data files are located, default") .help(" folder where the data files are located, default")
.default_value(string{ PATH } .default_value(std::string{ PATH }
); );
program.add_argument("-m", "--model") program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString()) .help("Model to use " + platform::Models::instance()->tostd::string())
.action([](const std::string& value) { .action([](const std::std::std::string& value) {
static const vector<string> choices = platform::Models::instance()->getNames(); static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) { if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value; return value;
} }
throw runtime_error("Model must be one of " + platform::Models::instance()->toString()); throw runtime_error("Model must be one of " + platform::Models::instance()->tostd::string());
} }
); );
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true); program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const std::std::string& value) {
try { try {
auto k = stoi(value); auto k = stoi(value);
if (k < 2) { if (k < 2) {
@ -115,13 +113,13 @@ int main(int argc, char** argv)
}}); }});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors, dump_cpt; bool class_last, stratified, tensors, dump_cpt;
string model_name, file_name, path, complete_file_name; std::string model_name, file_name, path, complete_file_name;
int nFolds, seed; int nFolds, seed;
try { try {
program.parse_args(argc, argv); program.parse_args(argc, argv);
file_name = program.get<string>("dataset"); file_name = program.get<std::string>("dataset");
path = program.get<string>("path"); path = program.get<std::string>("path");
model_name = program.get<string>("model"); model_name = program.get<std::string>("model");
complete_file_name = path + file_name + ".arff"; complete_file_name = path + file_name + ".arff";
stratified = program.get<bool>("stratified"); stratified = program.get<bool>("stratified");
tensors = program.get<bool>("tensors"); tensors = program.get<bool>("tensors");
@ -134,7 +132,7 @@ int main(int argc, char** argv)
} }
} }
catch (const exception& err) { catch (const exception& err) {
cerr << err.what() << endl; cerr << err.what() << std::endl;
cerr << program; cerr << program;
exit(1); exit(1);
} }
@ -145,50 +143,50 @@ int main(int argc, char** argv)
auto handler = ArffFiles(); auto handler = ArffFiles();
handler.load(complete_file_name, class_last); handler.load(complete_file_name, class_last);
// Get Dataset X, y // Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX(); std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY(); mdlp::labels_t& y = handler.getY();
// Get className & Features // Get className & Features
auto className = handler.getClassName(); auto className = handler.getClassName();
vector<string> features; std::vector<std::string> features;
auto attributes = handler.getAttributes(); auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), transform(attributes.begin(), attributes.end(), back_inserter(features),
[](const pair<string, string>& item) { return item.first; }); [](const pair<std::string, std::string>& item) { return item.first; });
// Discretize Dataset // Discretize Dataset
auto [Xd, maxes] = discretize(X, y, features); auto [Xd, maxes] = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1; maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<string, vector<int>> states; map<std::string, std::vector<int>> states;
for (auto feature : features) { for (auto feature : features) {
states[feature] = vector<int>(maxes[feature]); states[feature] = std::vector<int>(maxes[feature]);
} }
states[className] = vector<int>(maxes[className]); states[className] = std::vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name); auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states); clf->fit(Xd, y, features, className, states);
if (dump_cpt) { if (dump_cpt) {
cout << "--- CPT Tables ---" << endl; std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt(); clf->dump_cpt();
} }
auto lines = clf->show(); auto lines = clf->show();
for (auto line : lines) { for (auto line : lines) {
cout << line << endl; std::cout << line << std::endl;
} }
cout << "--- Topological Order ---" << endl; std::cout << "--- Topological Order ---" << std::endl;
auto order = clf->topological_order(); auto order = clf->topological_order();
for (auto name : order) { for (auto name : order) {
cout << name << ", "; std::cout << name << ", ";
} }
cout << "end." << endl; std::cout << "end." << std::endl;
auto score = clf->score(Xd, y); auto score = clf->score(Xd, y);
cout << "Score: " << score << endl; std::cout << "Score: " << score << std::endl;
auto graph = clf->graph(); auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name; auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot"); ofstream file(dot_file + ".dot");
file << graph; file << graph;
file.close(); file.close();
cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl; std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl; std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
string stratified_string = stratified ? " Stratified" : ""; std::string stratified_std::string = stratified ? " Stratified" : "";
cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl; std::cout << nFolds << " Folds" << stratified_std::string << " Cross validation" << std::endl;
cout << "==========================================" << endl; std::cout << "==========================================" << std::endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32); torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32); torch::Tensor yt = torch::tensor(y, torch::kInt32);
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
@ -202,7 +200,7 @@ int main(int argc, char** argv)
fold = new platform::KFold(nFolds, y.size(), seed); fold = new platform::KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) { for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i); auto [train, test] = fold->getFold(i);
cout << "Fold: " << i + 1 << endl; std::cout << "Fold: " << i + 1 << std::endl;
if (tensors) { if (tensors) {
auto ttrain = torch::tensor(train, torch::kInt64); auto ttrain = torch::tensor(train, torch::kInt64);
auto ttest = torch::tensor(test, torch::kInt64); auto ttest = torch::tensor(test, torch::kInt64);
@ -222,16 +220,16 @@ int main(int argc, char** argv)
score_test = clf->score(Xtest, ytest); score_test = clf->score(Xtest, ytest);
} }
if (dump_cpt) { if (dump_cpt) {
cout << "--- CPT Tables ---" << endl; std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt(); clf->dump_cpt();
} }
total_score_train += score_train; total_score_train += score_train;
total_score += score_test; total_score += score_test;
cout << "Score Train: " << score_train << endl; std::cout << "Score Train: " << score_train << std::endl;
cout << "Score Test : " << score_test << endl; std::cout << "Score Test : " << score_test << std::endl;
cout << "-------------------------------------------------------------------------------" << endl; std::cout << "-------------------------------------------------------------------------------" << std::endl;
} }
cout << "**********************************************************************************" << endl; std::cout << "**********************************************************************************" << std::endl;
cout << "Average Score Train: " << total_score_train / nFolds << endl; std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
cout << "Average Score Test : " << total_score / nFolds << endl;return 0; std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
} }

View File

@ -9,9 +9,9 @@ namespace bayesnet {
models.push_back(std::make_unique<SPODE>(i)); models.push_back(std::make_unique<SPODE>(i));
} }
n_models = models.size(); n_models = models.size();
significanceModels = vector<double>(n_models, 1.0); significanceModels = std::vector<double>(n_models, 1.0);
} }
vector<string> AODE::graph(const string& title) const std::vector<std::string> AODE::graph(const std::string& title) const
{ {
return Ensemble::graph(title); return Ensemble::graph(title);
} }

View File

@ -9,7 +9,7 @@ namespace bayesnet {
public: public:
AODE(); AODE();
virtual ~AODE() {}; virtual ~AODE() {};
vector<string> graph(const string& title = "AODE") const override; std::vector<std::string> graph(const std::string& title = "AODE") const override;
}; };
} }
#endif #endif

View File

@ -1,17 +1,15 @@
#include "AODELd.h" #include "AODELd.h"
#include "Models.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {} AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
className = className_; className = className_;
Xf = X_; Xf = X_;
y = y_; y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
@ -26,7 +24,7 @@ namespace bayesnet {
models.push_back(std::make_unique<SPODELd>(i)); models.push_back(std::make_unique<SPODELd>(i));
} }
n_models = models.size(); n_models = models.size();
significanceModels = vector<double>(n_models, 1.0); significanceModels = std::vector<double>(n_models, 1.0);
} }
void AODELd::trainModel(const torch::Tensor& weights) void AODELd::trainModel(const torch::Tensor& weights)
{ {
@ -34,7 +32,7 @@ namespace bayesnet {
model->fit(Xf, y, features, className, states); model->fit(Xf, y, features, className, states);
} }
} }
vector<string> AODELd::graph(const string& name) const std::vector<std::string> AODELd::graph(const std::string& name) const
{ {
return Ensemble::graph(name); return Ensemble::graph(name);
} }

View File

@ -5,17 +5,16 @@
#include "SPODELd.h" #include "SPODELd.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
class AODELd : public Ensemble, public Proposal { class AODELd : public Ensemble, public Proposal {
protected: protected:
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
public: public:
AODELd(); AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) override; AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
virtual ~AODELd() = default; virtual ~AODELd() = default;
vector<string> graph(const string& name = "AODELd") const override; std::vector<std::string> graph(const std::string& name = "AODELd") const override;
static inline string version() { return "0.0.1"; }; static inline std::string version() { return "0.0.1"; };
}; };
} }
#endif // !AODELD_H #endif // !AODELD_H

View File

@ -4,31 +4,30 @@
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include <vector> #include <vector>
namespace bayesnet { namespace bayesnet {
using namespace std;
enum status_t { NORMAL, WARNING, ERROR }; enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier { class BaseClassifier {
protected: protected:
virtual void trainModel(const torch::Tensor& weights) = 0; virtual void trainModel(const torch::Tensor& weights) = 0;
public: public:
// X is nxm vector, y is nx1 vector // X is nxm std::vector, y is nx1 std::vector
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor // X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) = 0; virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default; virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0; torch::Tensor virtual predict(torch::Tensor& X) = 0;
vector<int> virtual predict(vector<vector<int>>& X) = 0; std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
status_t virtual getStatus() const = 0; status_t virtual getStatus() const = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0; float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes()const = 0; int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges()const = 0; int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() const = 0; int virtual getNumberOfStates() const = 0;
vector<string> virtual show() const = 0; std::vector<std::string> virtual show() const = 0;
vector<string> virtual graph(const string& title = "") const = 0; std::vector<std::string> virtual graph(const std::string& title = "") const = 0;
const string inline getVersion() const { return "0.2.0"; }; virtual std::string getVersion() = 0;
vector<string> virtual topological_order() = 0; std::vector<std::string> virtual topological_order() = 0;
void virtual dump_cpt()const = 0; void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0; virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;
}; };

View File

@ -2,15 +2,15 @@
#include "Mst.h" #include "Mst.h"
namespace bayesnet { namespace bayesnet {
//samples is n+1xm tensor used to fit the model //samples is n+1xm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates) Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: samples(samples) : samples(samples)
, features(features) , features(features)
, className(className) , className(className)
, classNumStates(classNumStates) , classNumStates(classNumStates)
{ {
} }
//samples is nxm vector used to fit the model //samples is nxm std::vector used to fit the model
Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates) Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: features(features) : features(features)
, className(className) , className(className)
, classNumStates(classNumStates) , classNumStates(classNumStates)
@ -21,7 +21,7 @@ namespace bayesnet {
} }
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
} }
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k) std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
{ {
// Return the K Best features // Return the K Best features
auto n = samples.size(0) - 1; auto n = samples.size(0) - 1;
@ -56,15 +56,15 @@ namespace bayesnet {
} }
return featuresKBest; return featuresKBest;
} }
vector<double> Metrics::getScoresKBest() const std::vector<double> Metrics::getScoresKBest() const
{ {
return scoresKBest; return scoresKBest;
} }
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights) torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
{ {
auto result = vector<double>(); auto result = std::vector<double>();
auto source = vector<string>(features); auto source = std::vector<std::string>(features);
source.push_back(className); source.push_back(className);
auto combinations = doCombinations(source); auto combinations = doCombinations(source);
// Compute class prior // Compute class prior
@ -100,7 +100,7 @@ namespace bayesnet {
return matrix; return matrix;
} }
// To use in Python // To use in Python
vector<float> Metrics::conditionalEdgeWeights(vector<float>& weights_) std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_)
{ {
const torch::Tensor weights = torch::tensor(weights_); const torch::Tensor weights = torch::tensor(weights_);
auto matrix = conditionalEdge(weights); auto matrix = conditionalEdge(weights);
@ -121,7 +121,7 @@ namespace bayesnet {
{ {
int numSamples = firstFeature.sizes()[0]; int numSamples = firstFeature.sizes()[0];
torch::Tensor featureCounts = secondFeature.bincount(weights); torch::Tensor featureCounts = secondFeature.bincount(weights);
unordered_map<int, unordered_map<int, double>> jointCounts; std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
double totalWeight = 0; double totalWeight = 0;
for (auto i = 0; i < numSamples; i++) { for (auto i = 0; i < numSamples; i++) {
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>(); jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
@ -155,7 +155,7 @@ namespace bayesnet {
and the indices of the weights as nodes of this square matrix using and the indices of the weights as nodes of this square matrix using
Kruskal algorithm Kruskal algorithm
*/ */
vector<pair<int, int>> Metrics::maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root) std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
{ {
auto mst = MST(features, weights, root); auto mst = MST(features, weights, root);
return mst.maximumSpanningTree(); return mst.maximumSpanningTree();

View File

@ -4,23 +4,21 @@
#include <vector> #include <vector>
#include <string> #include <string>
namespace bayesnet { namespace bayesnet {
using namespace std;
using namespace torch;
class Metrics { class Metrics {
private: private:
int classNumStates = 0; int classNumStates = 0;
vector<double> scoresKBest; std::vector<double> scoresKBest;
vector<int> featuresKBest; // sorted indices of the features std::vector<int> featuresKBest; // sorted indices of the features
double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights); double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
protected: protected:
Tensor samples; // n+1xm tensor used to fit the model where samples[-1] is the y vector torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
string className; std::string className;
double entropy(const Tensor& feature, const Tensor& weights); double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
vector<string> features; std::vector<std::string> features;
template <class T> template <class T>
vector<pair<T, T>> doCombinations(const vector<T>& source) std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
{ {
vector<pair<T, T>> result; std::vector<std::pair<T, T>> result;
for (int i = 0; i < source.size(); ++i) { for (int i = 0; i < source.size(); ++i) {
T temp = source[i]; T temp = source[i];
for (int j = i + 1; j < source.size(); ++j) { for (int j = i + 1; j < source.size(); ++j) {
@ -30,7 +28,7 @@ namespace bayesnet {
return result; return result;
} }
template <class T> template <class T>
T pop_first(vector<T>& v) T pop_first(std::vector<T>& v)
{ {
T temp = v[0]; T temp = v[0];
v.erase(v.begin()); v.erase(v.begin());
@ -38,14 +36,14 @@ namespace bayesnet {
} }
public: public:
Metrics() = default; Metrics() = default;
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates); Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates); Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0); std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
vector<double> getScoresKBest() const; std::vector<double> getScoresKBest() const;
double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights); double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
Tensor conditionalEdge(const torch::Tensor& weights); torch::Tensor conditionalEdge(const torch::Tensor& weights);
vector<pair<int, int>> maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root); std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
}; };
} }
#endif #endif

View File

@ -46,7 +46,7 @@ namespace bayesnet {
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters) void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
{ {
// Check if hyperparameters are valid // Check if hyperparameters are valid
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" }; const std::vector<std::string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" };
checkHyperparameters(validKeys, hyperparameters); checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("repeatSparent")) { if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"]; repeatSparent = hyperparameters["repeatSparent"];
@ -65,38 +65,38 @@ namespace bayesnet {
} }
if (hyperparameters.contains("select_features")) { if (hyperparameters.contains("select_features")) {
auto selectedAlgorithm = hyperparameters["select_features"]; auto selectedAlgorithm = hyperparameters["select_features"];
vector<string> algos = { "IWSS", "FCBF", "CFS" }; std::vector<std::string> algos = { "IWSS", "FCBF", "CFS" };
selectFeatures = true; selectFeatures = true;
algorithm = selectedAlgorithm; algorithm = selectedAlgorithm;
if (find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) { if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
throw invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]"); throw std::invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]");
} }
} }
} }
unordered_set<int> BoostAODE::initializeModels() std::unordered_set<int> BoostAODE::initializeModels()
{ {
unordered_set<int> featuresUsed; std::unordered_set<int> featuresUsed;
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
int maxFeatures = 0; int maxFeatures = 0;
if (algorithm == "CFS") { if (algorithm == "CFS") {
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_); featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
} else if (algorithm == "IWSS") { } else if (algorithm == "IWSS") {
if (threshold < 0 || threshold >0.5) { if (threshold < 0 || threshold >0.5) {
throw invalid_argument("Invalid threshold value for IWSS [0, 0.5]"); throw std::invalid_argument("Invalid threshold value for IWSS [0, 0.5]");
} }
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
} else if (algorithm == "FCBF") { } else if (algorithm == "FCBF") {
if (threshold < 1e-7 || threshold > 1) { if (threshold < 1e-7 || threshold > 1) {
throw invalid_argument("Invalid threshold value [1e-7, 1]"); throw std::invalid_argument("Invalid threshold value [1e-7, 1]");
} }
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
} }
featureSelector->fit(); featureSelector->fit();
auto cfsFeatures = featureSelector->getFeatures(); auto cfsFeatures = featureSelector->getFeatures();
for (const int& feature : cfsFeatures) { for (const int& feature : cfsFeatures) {
// cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl; // std::cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << std::endl;
featuresUsed.insert(feature); featuresUsed.insert(feature);
unique_ptr<Classifier> model = std::make_unique<SPODE>(feature); std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_);
models.push_back(std::move(model)); models.push_back(std::move(model));
significanceModels.push_back(1.0); significanceModels.push_back(1.0);
@ -107,7 +107,7 @@ namespace bayesnet {
} }
void BoostAODE::trainModel(const torch::Tensor& weights) void BoostAODE::trainModel(const torch::Tensor& weights)
{ {
unordered_set<int> featuresUsed; std::unordered_set<int> featuresUsed;
int tolerance = 5; // number of times the accuracy can be lower than the threshold int tolerance = 5; // number of times the accuracy can be lower than the threshold
if (selectFeatures) { if (selectFeatures) {
featuresUsed = initializeModels(); featuresUsed = initializeModels();
@ -115,13 +115,12 @@ namespace bayesnet {
} }
if (maxModels == 0) if (maxModels == 0)
maxModels = .1 * n > 10 ? .1 * n : n; maxModels = .1 * n > 10 ? .1 * n : n;
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool exitCondition = false; bool exitCondition = false;
// Variables to control the accuracy finish condition // Variables to control the accuracy finish condition
double priorAccuracy = 0.0; double priorAccuracy = 0.0;
double delta = 1.0; double delta = 1.0;
double threshold = 1e-4; double threshold = 1e-4;
int count = 0; // number of times the accuracy is lower than the threshold int count = 0; // number of times the accuracy is lower than the threshold
fitted = true; // to enable predict fitted = true; // to enable predict
// Step 0: Set the finish condition // Step 0: Set the finish condition
@ -132,12 +131,12 @@ namespace bayesnet {
while (!exitCondition) { while (!exitCondition) {
// Step 1: Build ranking with mutual information // Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
unique_ptr<Classifier> model; std::unique_ptr<Classifier> model;
auto feature = featureSelection[0]; auto feature = featureSelection[0];
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) { if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
bool used = true; bool used = true;
for (const auto& feat : featureSelection) { for (const auto& feat : featureSelection) {
if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) { if (std::find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
continue; continue;
} }
used = false; used = false;
@ -190,7 +189,7 @@ namespace bayesnet {
status = WARNING; status = WARNING;
} }
} }
vector<string> BoostAODE::graph(const string& title) const std::vector<std::string> BoostAODE::graph(const std::string& title) const
{ {
return Ensemble::graph(title); return Ensemble::graph(title);
} }

View File

@ -9,7 +9,7 @@ namespace bayesnet {
public: public:
BoostAODE(); BoostAODE();
virtual ~BoostAODE() {}; virtual ~BoostAODE() {};
vector<string> graph(const string& title = "BoostAODE") const override; std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override; void setHyperparameters(nlohmann::json& hyperparameters) override;
protected: protected:
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
@ -17,14 +17,14 @@ namespace bayesnet {
private: private:
torch::Tensor dataset_; torch::Tensor dataset_;
torch::Tensor X_train, y_train, X_test, y_test; torch::Tensor X_train, y_train, X_test, y_test;
unordered_set<int> initializeModels(); std::unordered_set<int> initializeModels();
// Hyperparameters // Hyperparameters
bool repeatSparent = false; // if true, a feature can be selected more than once bool repeatSparent = false; // if true, a feature can be selected more than once
int maxModels = 0; int maxModels = 0;
bool ascending = false; //Process KBest features ascending or descending order bool ascending = false; //Process KBest features ascending or descending order
bool convergence = false; //if true, stop when the model does not improve bool convergence = false; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection bool selectFeatures = false; // if true, use feature selection
string algorithm = ""; // Selected feature selection algorithm std::string algorithm = ""; // Selected feature selection algorithm
FeatureSelect* featureSelector = nullptr; FeatureSelect* featureSelector = nullptr;
double threshold = -1; double threshold = -1;
}; };

View File

@ -13,7 +13,7 @@ namespace bayesnet {
selectedScores.push_back(suLabels[feature]); selectedScores.push_back(suLabels[feature]);
selectedFeatures.erase(selectedFeatures.begin()); selectedFeatures.erase(selectedFeatures.begin());
while (continueCondition) { while (continueCondition) {
double merit = numeric_limits<double>::lowest(); double merit = std::numeric_limits<double>::lowest();
int bestFeature = -1; int bestFeature = -1;
for (auto feature : featureOrder) { for (auto feature : featureOrder) {
selectedFeatures.push_back(feature); selectedFeatures.push_back(feature);
@ -36,7 +36,7 @@ namespace bayesnet {
} }
fitted = true; fitted = true;
} }
bool CFS::computeContinueCondition(const vector<int>& featureOrder) bool CFS::computeContinueCondition(const std::vector<int>& featureOrder)
{ {
if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) { if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) {
return false; return false;
@ -49,11 +49,11 @@ namespace bayesnet {
subsets show no improvement over the current best subset." subsets show no improvement over the current best subset."
as stated in Mark A.Hall Thesis as stated in Mark A.Hall Thesis
*/ */
double item_ant = numeric_limits<double>::lowest(); double item_ant = std::numeric_limits<double>::lowest();
int num = 0; int num = 0;
vector<double> lastFive(selectedScores.end() - 5, selectedScores.end()); std::vector<double> lastFive(selectedScores.end() - 5, selectedScores.end());
for (auto item : lastFive) { for (auto item : lastFive) {
if (item_ant == numeric_limits<double>::lowest()) { if (item_ant == std::numeric_limits<double>::lowest()) {
item_ant = item; item_ant = item;
} }
if (item > item_ant) { if (item > item_ant) {

View File

@ -3,19 +3,18 @@
#include <torch/torch.h> #include <torch/torch.h>
#include <vector> #include <vector>
#include "FeatureSelect.h" #include "FeatureSelect.h"
using namespace std;
namespace bayesnet { namespace bayesnet {
class CFS : public FeatureSelect { class CFS : public FeatureSelect {
public: public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y vector // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
CFS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) : CFS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights) FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights)
{ {
} }
virtual ~CFS() {}; virtual ~CFS() {};
void fit() override; void fit() override;
private: private:
bool computeContinueCondition(const vector<int>& featureOrder); bool computeContinueCondition(const std::vector<int>& featureOrder);
}; };
} }
#endif #endif

View File

@ -3,6 +3,9 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform) include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${Python3_INCLUDE_DIRS})
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)

View File

@ -2,10 +2,8 @@
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
using namespace torch;
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
Classifier& Classifier::build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{ {
this->features = features; this->features = features;
this->className = className; this->className = className;
@ -21,7 +19,7 @@ namespace bayesnet {
fitted = true; fitted = true;
return *this; return *this;
} }
void Classifier::buildDataset(Tensor& ytmp) void Classifier::buildDataset(torch::Tensor& ytmp)
{ {
try { try {
auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1); auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
@ -29,8 +27,8 @@ namespace bayesnet {
} }
catch (const std::exception& e) { catch (const std::exception& e) {
std::cerr << e.what() << '\n'; std::cerr << e.what() << '\n';
cout << "X dimensions: " << dataset.sizes() << "\n"; std::cout << "X dimensions: " << dataset.sizes() << "\n";
cout << "y dimensions: " << ytmp.sizes() << "\n"; std::cout << "y dimensions: " << ytmp.sizes() << "\n";
exit(1); exit(1);
} }
} }
@ -39,7 +37,7 @@ namespace bayesnet {
model.fit(dataset, weights, features, className, states); model.fit(dataset, weights, features, className, states);
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{ {
dataset = X; dataset = X;
buildDataset(y); buildDataset(y);
@ -47,24 +45,24 @@ namespace bayesnet {
return build(features, className, states, weights); return build(features, className, states, weights);
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{ {
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, kInt32); dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
for (int i = 0; i < X.size(); ++i) { for (int i = 0; i < X.size(); ++i) {
dataset.index_put_({ i, "..." }, torch::tensor(X[i], kInt32)); dataset.index_put_({ i, "..." }, torch::tensor(X[i], torch::kInt32));
} }
auto ytmp = torch::tensor(y, kInt32); auto ytmp = torch::tensor(y, torch::kInt32);
buildDataset(ytmp); buildDataset(ytmp);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights);
} }
Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{ {
this->dataset = dataset; this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights);
} }
Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{ {
this->dataset = dataset; this->dataset = dataset;
return build(features, className, states, weights); return build(features, className, states, weights);
@ -72,57 +70,57 @@ namespace bayesnet {
void Classifier::checkFitParameters() void Classifier::checkFitParameters()
{ {
if (torch::is_floating_point(dataset)) { if (torch::is_floating_point(dataset)) {
throw invalid_argument("dataset (X, y) must be of type Integer"); throw std::invalid_argument("dataset (X, y) must be of type Integer");
} }
if (n != features.size()) { if (n != features.size()) {
throw invalid_argument("Classifier: X " + to_string(n) + " and features " + to_string(features.size()) + " must have the same number of features"); throw std::invalid_argument("Classifier: X " + std::to_string(n) + " and features " + std::to_string(features.size()) + " must have the same number of features");
} }
if (states.find(className) == states.end()) { if (states.find(className) == states.end()) {
throw invalid_argument("className not found in states"); throw std::invalid_argument("className not found in states");
} }
for (auto feature : features) { for (auto feature : features) {
if (states.find(feature) == states.end()) { if (states.find(feature) == states.end()) {
throw invalid_argument("feature [" + feature + "] not found in states"); throw std::invalid_argument("feature [" + feature + "] not found in states");
} }
} }
} }
Tensor Classifier::predict(Tensor& X) torch::Tensor Classifier::predict(torch::Tensor& X)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("Classifier has not been fitted"); throw std::logic_error("Classifier has not been fitted");
} }
return model.predict(X); return model.predict(X);
} }
vector<int> Classifier::predict(vector<vector<int>>& X) std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("Classifier has not been fitted"); throw std::logic_error("Classifier has not been fitted");
} }
auto m_ = X[0].size(); auto m_ = X[0].size();
auto n_ = X.size(); auto n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0)); std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) { for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end()); Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
} }
auto yp = model.predict(Xd); auto yp = model.predict(Xd);
return yp; return yp;
} }
float Classifier::score(Tensor& X, Tensor& y) float Classifier::score(torch::Tensor& X, torch::Tensor& y)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("Classifier has not been fitted"); throw std::logic_error("Classifier has not been fitted");
} }
Tensor y_pred = predict(X); torch::Tensor y_pred = predict(X);
return (y_pred == y).sum().item<float>() / y.size(0); return (y_pred == y).sum().item<float>() / y.size(0);
} }
float Classifier::score(vector<vector<int>>& X, vector<int>& y) float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("Classifier has not been fitted"); throw std::logic_error("Classifier has not been fitted");
} }
return model.score(X, y); return model.score(X, y);
} }
vector<string> Classifier::show() const std::vector<std::string> Classifier::show() const
{ {
return model.show(); return model.show();
} }
@ -147,7 +145,7 @@ namespace bayesnet {
{ {
return fitted ? model.getStates() : 0; return fitted ? model.getStates() : 0;
} }
vector<string> Classifier::topological_order() std::vector<std::string> Classifier::topological_order()
{ {
return model.topological_sort(); return model.topological_sort();
} }
@ -155,18 +153,18 @@ namespace bayesnet {
{ {
model.dump_cpt(); model.dump_cpt();
} }
void Classifier::checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters) void Classifier::checkHyperparameters(const std::vector<std::string>& validKeys, nlohmann::json& hyperparameters)
{ {
for (const auto& item : hyperparameters.items()) { for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) { if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw invalid_argument("Hyperparameter " + item.key() + " is not valid"); throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid");
} }
} }
} }
void Classifier::setHyperparameters(nlohmann::json& hyperparameters) void Classifier::setHyperparameters(nlohmann::json& hyperparameters)
{ {
// Check if hyperparameters are valid, default is no hyperparameters // Check if hyperparameters are valid, default is no hyperparameters
const vector<string> validKeys = { }; const std::vector<std::string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters); checkHyperparameters(validKeys, hyperparameters);
} }
} }

View File

@ -4,46 +4,45 @@
#include "BaseClassifier.h" #include "BaseClassifier.h"
#include "Network.h" #include "Network.h"
#include "BayesMetrics.h" #include "BayesMetrics.h"
using namespace std;
using namespace torch;
namespace bayesnet { namespace bayesnet {
class Classifier : public BaseClassifier { class Classifier : public BaseClassifier {
private: private:
Classifier& build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights); Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
protected: protected:
bool fitted; bool fitted;
int m, n; // m: number of samples, n: number of features int m, n; // m: number of samples, n: number of features
Network model; Network model;
Metrics metrics; Metrics metrics;
vector<string> features; std::vector<std::string> features;
string className; std::string className;
map<string, vector<int>> states; std::map<std::string, std::vector<int>> states;
Tensor dataset; // (n+1)xm tensor torch::Tensor dataset; // (n+1)xm tensor
status_t status = NORMAL; status_t status = NORMAL;
void checkFitParameters(); void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0; virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
void checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters); void checkHyperparameters(const std::vector<std::string>& validKeys, nlohmann::json& hyperparameters);
void buildDataset(torch::Tensor& y); void buildDataset(torch::Tensor& y);
public: public:
Classifier(Network model); Classifier(Network model);
virtual ~Classifier() = default; virtual ~Classifier() = default;
Classifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override; Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override; Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override; Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) override; Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override;
void addNodes(); void addNodes();
int getNumberOfNodes() const override; int getNumberOfNodes() const override;
int getNumberOfEdges() const override; int getNumberOfEdges() const override;
int getNumberOfStates() const override; int getNumberOfStates() const override;
Tensor predict(Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
status_t getStatus() const override { return status; } status_t getStatus() const override { return status; }
vector<int> predict(vector<vector<int>>& X) override; std::string getVersion() override { return "0.2.0"; };
float score(Tensor& X, Tensor& y) override; std::vector<int> predict(std::vector<std::vector<int>>& X) override;
float score(vector<vector<int>>& X, vector<int>& y) override; float score(torch::Tensor& X, torch::Tensor& y) override;
vector<string> show() const override; float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
vector<string> topological_order() override; std::vector<std::string> show() const override;
std::vector<std::string> topological_order() override;
void dump_cpt() const override; void dump_cpt() const override;
void setHyperparameters(nlohmann::json& hyperparameters) override; void setHyperparameters(nlohmann::json& hyperparameters) override;
}; };

View File

@ -1,7 +1,6 @@
#include "Ensemble.h" #include "Ensemble.h"
namespace bayesnet { namespace bayesnet {
using namespace torch;
Ensemble::Ensemble() : Classifier(Network()), n_models(0) {} Ensemble::Ensemble() : Classifier(Network()), n_models(0) {}
@ -9,20 +8,20 @@ namespace bayesnet {
{ {
n_models = models.size(); n_models = models.size();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
// fit with vectors // fit with std::vectors
models[i]->fit(dataset, features, className, states); models[i]->fit(dataset, features, className, states);
} }
} }
vector<int> Ensemble::voting(Tensor& y_pred) std::vector<int> Ensemble::voting(torch::Tensor& y_pred)
{ {
auto y_pred_ = y_pred.accessor<int, 2>(); auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final; std::vector<int> y_pred_final;
int numClasses = states.at(className).size(); int numClasses = states.at(className).size();
// y_pred is m x n_models with the prediction of every model for each sample // y_pred is m x n_models with the prediction of every model for each sample
for (int i = 0; i < y_pred.size(0); ++i) { for (int i = 0; i < y_pred.size(0); ++i) {
// votes store in each index (value of class) the significance added by each model // votes store in each index (value of class) the significance added by each model
// i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions // i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
vector<double> votes(numClasses, 0.0); std::vector<double> votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) { for (int j = 0; j < n_models; ++j) {
votes[y_pred_[i][j]] += significanceModels.at(j); votes[y_pred_[i][j]] += significanceModels.at(j);
} }
@ -32,18 +31,18 @@ namespace bayesnet {
} }
return y_pred_final; return y_pred_final;
} }
Tensor Ensemble::predict(Tensor& X) torch::Tensor Ensemble::predict(torch::Tensor& X)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("Ensemble has not been fitted"); throw std::logic_error("Ensemble has not been fitted");
} }
Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32); torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
auto threads{ vector<thread>() }; auto threads{ std::vector<std::thread>() };
mutex mtx; std::mutex mtx;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
threads.push_back(thread([&, i]() { threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict(X); auto ypredict = models[i]->predict(X);
lock_guard<mutex> lock(mtx); std::lock_guard<std::mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict); y_pred.index_put_({ "...", i }, ypredict);
})); }));
} }
@ -52,27 +51,27 @@ namespace bayesnet {
} }
return torch::tensor(voting(y_pred)); return torch::tensor(voting(y_pred));
} }
vector<int> Ensemble::predict(vector<vector<int>>& X) std::vector<int> Ensemble::predict(std::vector<std::vector<int>>& X)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("Ensemble has not been fitted"); throw std::logic_error("Ensemble has not been fitted");
} }
long m_ = X[0].size(); long m_ = X[0].size();
long n_ = X.size(); long n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0)); std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) { for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end()); Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
} }
Tensor y_pred = torch::zeros({ m_, n_models }, kInt32); torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32);
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt32)); y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32));
} }
return voting(y_pred); return voting(y_pred);
} }
float Ensemble::score(Tensor& X, Tensor& y) float Ensemble::score(torch::Tensor& X, torch::Tensor& y)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("Ensemble has not been fitted"); throw std::logic_error("Ensemble has not been fitted");
} }
auto y_pred = predict(X); auto y_pred = predict(X);
int correct = 0; int correct = 0;
@ -83,10 +82,10 @@ namespace bayesnet {
} }
return (double)correct / y_pred.size(0); return (double)correct / y_pred.size(0);
} }
float Ensemble::score(vector<vector<int>>& X, vector<int>& y) float Ensemble::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("Ensemble has not been fitted"); throw std::logic_error("Ensemble has not been fitted");
} }
auto y_pred = predict(X); auto y_pred = predict(X);
int correct = 0; int correct = 0;
@ -97,20 +96,20 @@ namespace bayesnet {
} }
return (double)correct / y_pred.size(); return (double)correct / y_pred.size();
} }
vector<string> Ensemble::show() const std::vector<std::string> Ensemble::show() const
{ {
auto result = vector<string>(); auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->show(); auto res = models[i]->show();
result.insert(result.end(), res.begin(), res.end()); result.insert(result.end(), res.begin(), res.end());
} }
return result; return result;
} }
vector<string> Ensemble::graph(const string& title) const std::vector<std::string> Ensemble::graph(const std::string& title) const
{ {
auto result = vector<string>(); auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->graph(title + "_" + to_string(i)); auto res = models[i]->graph(title + "_" + std::to_string(i));
result.insert(result.end(), res.begin(), res.end()); result.insert(result.end(), res.begin(), res.end());
} }
return result; return result;

View File

@ -4,34 +4,32 @@
#include "Classifier.h" #include "Classifier.h"
#include "BayesMetrics.h" #include "BayesMetrics.h"
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
using namespace std;
using namespace torch;
namespace bayesnet { namespace bayesnet {
class Ensemble : public Classifier { class Ensemble : public Classifier {
private: private:
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states); Ensemble& build(std::vector<std::string>& features, std::string className, std::map<std::string, std::vector<int>>& states);
protected: protected:
unsigned n_models; unsigned n_models;
vector<unique_ptr<Classifier>> models; std::vector<std::unique_ptr<Classifier>> models;
vector<double> significanceModels; std::vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
vector<int> voting(Tensor& y_pred); std::vector<int> voting(torch::Tensor& y_pred);
public: public:
Ensemble(); Ensemble();
virtual ~Ensemble() = default; virtual ~Ensemble() = default;
Tensor predict(Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override; std::vector<int> predict(std::vector<std::vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override; float score(torch::Tensor& X, torch::Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override; float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
int getNumberOfNodes() const override; int getNumberOfNodes() const override;
int getNumberOfEdges() const override; int getNumberOfEdges() const override;
int getNumberOfStates() const override; int getNumberOfStates() const override;
vector<string> show() const override; std::vector<std::string> show() const override;
vector<string> graph(const string& title) const override; std::vector<std::string> graph(const std::string& title) const override;
vector<string> topological_order() override std::vector<std::string> topological_order() override
{ {
return vector<string>(); return std::vector<std::string>();
} }
void dump_cpt() const override void dump_cpt() const override
{ {

View File

@ -2,7 +2,7 @@
#include "FCBF.h" #include "FCBF.h"
namespace bayesnet { namespace bayesnet {
FCBF::FCBF(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) : FCBF::FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold) FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
{ {
if (threshold < 1e-7) { if (threshold < 1e-7) {

View File

@ -3,12 +3,11 @@
#include <torch/torch.h> #include <torch/torch.h>
#include <vector> #include <vector>
#include "FeatureSelect.h" #include "FeatureSelect.h"
using namespace std;
namespace bayesnet { namespace bayesnet {
class FCBF : public FeatureSelect { class FCBF : public FeatureSelect {
public: public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y vector // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
FCBF(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold); FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
virtual ~FCBF() {}; virtual ~FCBF() {};
void fit() override; void fit() override;
private: private:

View File

@ -2,7 +2,7 @@
#include <limits> #include <limits>
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
FeatureSelect::FeatureSelect(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) : FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights) Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
{ {
@ -42,7 +42,7 @@ namespace bayesnet {
try { try {
return suFeatures.at({ firstFeature, secondFeature }); return suFeatures.at({ firstFeature, secondFeature });
} }
catch (const out_of_range& e) { catch (const std::out_of_range& e) {
double result = symmetricalUncertainty(firstFeature, secondFeature); double result = symmetricalUncertainty(firstFeature, secondFeature);
suFeatures[{firstFeature, secondFeature}] = result; suFeatures[{firstFeature, secondFeature}] = result;
return result; return result;
@ -62,17 +62,17 @@ namespace bayesnet {
} }
return rcf / sqrt(n + (n * n - n) * rff); return rcf / sqrt(n + (n * n - n) * rff);
} }
vector<int> FeatureSelect::getFeatures() const std::vector<int> FeatureSelect::getFeatures() const
{ {
if (!fitted) { if (!fitted) {
throw runtime_error("FeatureSelect not fitted"); throw std::runtime_error("FeatureSelect not fitted");
} }
return selectedFeatures; return selectedFeatures;
} }
vector<double> FeatureSelect::getScores() const std::vector<double> FeatureSelect::getScores() const
{ {
if (!fitted) { if (!fitted) {
throw runtime_error("FeatureSelect not fitted"); throw std::runtime_error("FeatureSelect not fitted");
} }
return selectedScores; return selectedScores;
} }

View File

@ -3,16 +3,15 @@
#include <torch/torch.h> #include <torch/torch.h>
#include <vector> #include <vector>
#include "BayesMetrics.h" #include "BayesMetrics.h"
using namespace std;
namespace bayesnet { namespace bayesnet {
class FeatureSelect : public Metrics { class FeatureSelect : public Metrics {
public: public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y vector // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
FeatureSelect(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights); FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights);
virtual ~FeatureSelect() {}; virtual ~FeatureSelect() {};
virtual void fit() = 0; virtual void fit() = 0;
vector<int> getFeatures() const; std::vector<int> getFeatures() const;
vector<double> getScores() const; std::vector<double> getScores() const;
protected: protected:
void initialize(); void initialize();
void computeSuLabels(); void computeSuLabels();
@ -21,10 +20,10 @@ namespace bayesnet {
double computeMeritCFS(); double computeMeritCFS();
const torch::Tensor& weights; const torch::Tensor& weights;
int maxFeatures; int maxFeatures;
vector<int> selectedFeatures; std::vector<int> selectedFeatures;
vector<double> selectedScores; std::vector<double> selectedScores;
vector<double> suLabels; std::vector<double> suLabels;
map<pair<int, int>, double> suFeatures; std::map<std::pair<int, int>, double> suFeatures;
bool fitted = false; bool fitted = false;
}; };
} }

View File

@ -2,7 +2,7 @@
#include <limits> #include <limits>
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
IWSS::IWSS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) : IWSS::IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold) FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
{ {
if (threshold < 0 || threshold > .5) { if (threshold < 0 || threshold > .5) {

View File

@ -3,12 +3,11 @@
#include <torch/torch.h> #include <torch/torch.h>
#include <vector> #include <vector>
#include "FeatureSelect.h" #include "FeatureSelect.h"
using namespace std;
namespace bayesnet { namespace bayesnet {
class IWSS : public FeatureSelect { class IWSS : public FeatureSelect {
public: public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y vector // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
IWSS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold); IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
virtual ~IWSS() {}; virtual ~IWSS() {};
void fit() override; void fit() override;
private: private:

View File

@ -1,13 +1,11 @@
#include "KDB.h" #include "KDB.h"
namespace bayesnet { namespace bayesnet {
using namespace torch;
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {} KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::setHyperparameters(nlohmann::json& hyperparameters) void KDB::setHyperparameters(nlohmann::json& hyperparameters)
{ {
// Check if hyperparameters are valid // Check if hyperparameters are valid
const vector<string> validKeys = { "k", "theta" }; const std::vector<std::string> validKeys = { "k", "theta" };
checkHyperparameters(validKeys, hyperparameters); checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("k")) { if (hyperparameters.contains("k")) {
k = hyperparameters["k"]; k = hyperparameters["k"];
@ -40,16 +38,16 @@ namespace bayesnet {
// 1. For each feature Xi, compute mutual information, I(X;C), // 1. For each feature Xi, compute mutual information, I(X;C),
// where C is the class. // where C is the class.
addNodes(); addNodes();
const Tensor& y = dataset.index({ -1, "..." }); const torch::Tensor& y = dataset.index({ -1, "..." });
vector<double> mi; std::vector<double> mi;
for (auto i = 0; i < features.size(); i++) { for (auto i = 0; i < features.size(); i++) {
Tensor firstFeature = dataset.index({ i, "..." }); torch::Tensor firstFeature = dataset.index({ i, "..." });
mi.push_back(metrics.mutualInformation(firstFeature, y, weights)); mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
} }
// 2. Compute class conditional mutual information I(Xi;XjIC), f or each // 2. Compute class conditional mutual information I(Xi;XjIC), f or each
auto conditionalEdgeWeights = metrics.conditionalEdge(weights); auto conditionalEdgeWeights = metrics.conditionalEdge(weights);
// 3. Let the used variable list, S, be empty. // 3. Let the used variable list, S, be empty.
vector<int> S; std::vector<int> S;
// 4. Let the DAG network being constructed, BN, begin with a single // 4. Let the DAG network being constructed, BN, begin with a single
// class node, C. // class node, C.
// 5. Repeat until S includes all domain features // 5. Repeat until S includes all domain features
@ -67,9 +65,9 @@ namespace bayesnet {
S.push_back(idx); S.push_back(idx);
} }
} }
void KDB::add_m_edges(int idx, vector<int>& S, Tensor& weights) void KDB::add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights)
{ {
auto n_edges = min(k, static_cast<int>(S.size())); auto n_edges = std::min(k, static_cast<int>(S.size()));
auto cond_w = clone(weights); auto cond_w = clone(weights);
bool exit_cond = k == 0; bool exit_cond = k == 0;
int num = 0; int num = 0;
@ -81,7 +79,7 @@ namespace bayesnet {
model.addEdge(features[max_minfo], features[idx]); model.addEdge(features[max_minfo], features[idx]);
num++; num++;
} }
catch (const invalid_argument& e) { catch (const std::invalid_argument& e) {
// Loops are not allowed // Loops are not allowed
} }
} }
@ -91,11 +89,11 @@ namespace bayesnet {
exit_cond = num == n_edges || candidates.size(0) == 0; exit_cond = num == n_edges || candidates.size(0) == 0;
} }
} }
vector<string> KDB::graph(const string& title) const std::vector<std::string> KDB::graph(const std::string& title) const
{ {
string header{ title }; std::string header{ title };
if (title == "KDB") { if (title == "KDB") {
header += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")"; header += " (k=" + std::to_string(k) + ", theta=" + std::to_string(theta) + ")";
} }
return model.graph(header); return model.graph(header);
} }

View File

@ -4,20 +4,18 @@
#include "Classifier.h" #include "Classifier.h"
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
using namespace torch;
class KDB : public Classifier { class KDB : public Classifier {
private: private:
int k; int k;
float theta; float theta;
void add_m_edges(int idx, vector<int>& S, Tensor& weights); void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
protected: protected:
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
public: public:
explicit KDB(int k, float theta = 0.03); explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {}; virtual ~KDB() {};
void setHyperparameters(nlohmann::json& hyperparameters) override; void setHyperparameters(nlohmann::json& hyperparameters) override;
vector<string> graph(const string& name = "KDB") const override; std::vector<std::string> graph(const std::string& name = "KDB") const override;
}; };
} }
#endif #endif

View File

@ -1,16 +1,15 @@
#include "KDBLd.h" #include "KDBLd.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {} KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
className = className_; className = className_;
Xf = X_; Xf = X_;
y = y_; y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
@ -18,12 +17,12 @@ namespace bayesnet {
states = localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
return *this; return *this;
} }
Tensor KDBLd::predict(Tensor& X) torch::Tensor KDBLd::predict(torch::Tensor& X)
{ {
auto Xt = prepareX(X); auto Xt = prepareX(X);
return KDB::predict(Xt); return KDB::predict(Xt);
} }
vector<string> KDBLd::graph(const string& name) const std::vector<std::string> KDBLd::graph(const std::string& name) const
{ {
return KDB::graph(name); return KDB::graph(name);
} }

View File

@ -4,16 +4,15 @@
#include "Proposal.h" #include "Proposal.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
class KDBLd : public KDB, public Proposal { class KDBLd : public KDB, public Proposal {
private: private:
public: public:
explicit KDBLd(int k); explicit KDBLd(int k);
virtual ~KDBLd() = default; virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override; KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") const override; std::vector<std::string> graph(const std::string& name = "KDB") const override;
Tensor predict(Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
static inline string version() { return "0.0.1"; }; static inline std::string version() { return "0.0.1"; };
}; };
} }
#endif // !KDBLD_H #endif // !KDBLD_H

View File

@ -7,8 +7,7 @@
*/ */
namespace bayesnet { namespace bayesnet {
using namespace std; Graph::Graph(int V) : V(V), parent(std::vector<int>(V))
Graph::Graph(int V) : V(V), parent(vector<int>(V))
{ {
for (int i = 0; i < V; i++) for (int i = 0; i < V; i++)
parent[i] = i; parent[i] = i;
@ -41,35 +40,35 @@ namespace bayesnet {
uSt = find_set(G[i].second.first); uSt = find_set(G[i].second.first);
vEd = find_set(G[i].second.second); vEd = find_set(G[i].second.second);
if (uSt != vEd) { if (uSt != vEd) {
T.push_back(G[i]); // add to mst vector T.push_back(G[i]); // add to mst std::vector
union_set(uSt, vEd); union_set(uSt, vEd);
} }
} }
} }
void Graph::display_mst() void Graph::display_mst()
{ {
cout << "Edge :" << " Weight" << endl; std::cout << "Edge :" << " Weight" << std::endl;
for (int i = 0; i < T.size(); i++) { for (int i = 0; i < T.size(); i++) {
cout << T[i].second.first << " - " << T[i].second.second << " : " std::cout << T[i].second.first << " - " << T[i].second.second << " : "
<< T[i].first; << T[i].first;
cout << endl; std::cout << std::endl;
} }
} }
void insertElement(list<int>& variables, int variable) void insertElement(std::list<int>& variables, int variable)
{ {
if (find(variables.begin(), variables.end(), variable) == variables.end()) { if (std::find(variables.begin(), variables.end(), variable) == variables.end()) {
variables.push_front(variable); variables.push_front(variable);
} }
} }
vector<pair<int, int>> reorder(vector<pair<float, pair<int, int>>> T, int root_original) std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
{ {
// Create the edges of a DAG from the MST // Create the edges of a DAG from the MST
// replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted // replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted
auto result = vector<pair<int, int>>(); auto result = std::vector<std::pair<int, int>>();
auto visited = vector<int>(); auto visited = std::vector<int>();
auto nextVariables = list<int>(); auto nextVariables = std::list<int>();
nextVariables.push_front(root_original); nextVariables.push_front(root_original);
while (nextVariables.size() > 0) { while (nextVariables.size() > 0) {
int root = nextVariables.front(); int root = nextVariables.front();
@ -104,8 +103,8 @@ namespace bayesnet {
return result; return result;
} }
MST::MST(const vector<string>& features, const Tensor& weights, const int root) : features(features), weights(weights), root(root) {} MST::MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
vector<pair<int, int>> MST::maximumSpanningTree() std::vector<std::pair<int, int>> MST::maximumSpanningTree()
{ {
auto num_features = features.size(); auto num_features = features.size();
Graph g(num_features); Graph g(num_features);

View File

@ -4,24 +4,22 @@
#include <vector> #include <vector>
#include <string> #include <string>
namespace bayesnet { namespace bayesnet {
using namespace std;
using namespace torch;
class MST { class MST {
private: private:
Tensor weights; torch::Tensor weights;
vector<string> features; std::vector<std::string> features;
int root = 0; int root = 0;
public: public:
MST() = default; MST() = default;
MST(const vector<string>& features, const Tensor& weights, const int root); MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
vector<pair<int, int>> maximumSpanningTree(); std::vector<std::pair<int, int>> maximumSpanningTree();
}; };
class Graph { class Graph {
private: private:
int V; // number of nodes in graph int V; // number of nodes in graph
vector <pair<float, pair<int, int>>> G; // vector for graph std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
vector <pair<float, pair<int, int>>> T; // vector for mst std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
vector<int> parent; std::vector<int> parent;
public: public:
explicit Graph(int V); explicit Graph(int V);
void addEdge(int u, int v, float wt); void addEdge(int u, int v, float wt);
@ -29,7 +27,7 @@ namespace bayesnet {
void union_set(int u, int v); void union_set(int u, int v);
void kruskal_algorithm(); void kruskal_algorithm();
void display_mst(); void display_mst();
vector <pair<float, pair<int, int>>> get_mst() { return T; } std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; }
}; };
} }
#endif #endif

View File

@ -3,18 +3,18 @@
#include "Network.h" #include "Network.h"
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {} Network::Network() : features(std::vector<std::string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {} Network::Network(float maxT) : features(std::vector<std::string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other. Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
getmaxThreads()), fitted(other.fitted) getmaxThreads()), fitted(other.fitted)
{ {
for (const auto& pair : other.nodes) { for (const auto& node : other.nodes) {
nodes[pair.first] = std::make_unique<Node>(*pair.second); nodes[node.first] = std::make_unique<Node>(*node.second);
} }
} }
void Network::initialize() void Network::initialize()
{ {
features = vector<string>(); features = std::vector<std::string>();
className = ""; className = "";
classNumStates = 0; classNumStates = 0;
fitted = false; fitted = false;
@ -29,10 +29,10 @@ namespace bayesnet {
{ {
return samples; return samples;
} }
void Network::addNode(const string& name) void Network::addNode(const std::string& name)
{ {
if (name == "") { if (name == "") {
throw invalid_argument("Node name cannot be empty"); throw std::invalid_argument("Node name cannot be empty");
} }
if (nodes.find(name) != nodes.end()) { if (nodes.find(name) != nodes.end()) {
return; return;
@ -42,7 +42,7 @@ namespace bayesnet {
} }
nodes[name] = std::make_unique<Node>(name); nodes[name] = std::make_unique<Node>(name);
} }
vector<string> Network::getFeatures() const std::vector<std::string> Network::getFeatures() const
{ {
return features; return features;
} }
@ -58,11 +58,11 @@ namespace bayesnet {
} }
return result; return result;
} }
string Network::getClassName() const std::string Network::getClassName() const
{ {
return className; return className;
} }
bool Network::isCyclic(const string& nodeId, unordered_set<string>& visited, unordered_set<string>& recStack) bool Network::isCyclic(const std::string& nodeId, std::unordered_set<std::string>& visited, std::unordered_set<std::string>& recStack)
{ {
if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet
{ {
@ -78,78 +78,78 @@ namespace bayesnet {
recStack.erase(nodeId); // remove node from recursion stack before function ends recStack.erase(nodeId); // remove node from recursion stack before function ends
return false; return false;
} }
void Network::addEdge(const string& parent, const string& child) void Network::addEdge(const std::string& parent, const std::string& child)
{ {
if (nodes.find(parent) == nodes.end()) { if (nodes.find(parent) == nodes.end()) {
throw invalid_argument("Parent node " + parent + " does not exist"); throw std::invalid_argument("Parent node " + parent + " does not exist");
} }
if (nodes.find(child) == nodes.end()) { if (nodes.find(child) == nodes.end()) {
throw invalid_argument("Child node " + child + " does not exist"); throw std::invalid_argument("Child node " + child + " does not exist");
} }
// Temporarily add edge to check for cycles // Temporarily add edge to check for cycles
nodes[parent]->addChild(nodes[child].get()); nodes[parent]->addChild(nodes[child].get());
nodes[child]->addParent(nodes[parent].get()); nodes[child]->addParent(nodes[parent].get());
unordered_set<string> visited; std::unordered_set<std::string> visited;
unordered_set<string> recStack; std::unordered_set<std::string> recStack;
if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
{ {
// remove problematic edge // remove problematic edge
nodes[parent]->removeChild(nodes[child].get()); nodes[parent]->removeChild(nodes[child].get());
nodes[child]->removeParent(nodes[parent].get()); nodes[child]->removeParent(nodes[parent].get());
throw invalid_argument("Adding this edge forms a cycle in the graph."); throw std::invalid_argument("Adding this edge forms a cycle in the graph.");
} }
} }
map<string, std::unique_ptr<Node>>& Network::getNodes() std::map<std::string, std::unique_ptr<Node>>& Network::getNodes()
{ {
return nodes; return nodes;
} }
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights) void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{ {
if (weights.size(0) != n_samples) { if (weights.size(0) != n_samples) {
throw invalid_argument("Weights (" + to_string(weights.size(0)) + ") must have the same number of elements as samples (" + to_string(n_samples) + ") in Network::fit"); throw std::invalid_argument("Weights (" + std::to_string(weights.size(0)) + ") must have the same number of elements as samples (" + std::to_string(n_samples) + ") in Network::fit");
} }
if (n_samples != n_samples_y) { if (n_samples != n_samples_y) {
throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")"); throw std::invalid_argument("X and y must have the same number of samples in Network::fit (" + std::to_string(n_samples) + " != " + std::to_string(n_samples_y) + ")");
} }
if (n_features != featureNames.size()) { if (n_features != featureNames.size()) {
throw invalid_argument("X and features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(featureNames.size()) + ")"); throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")");
} }
if (n_features != features.size() - 1) { if (n_features != features.size() - 1) {
throw invalid_argument("X and local features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(features.size() - 1) + ")"); throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")");
} }
if (find(features.begin(), features.end(), className) == features.end()) { if (find(features.begin(), features.end(), className) == features.end()) {
throw invalid_argument("className not found in Network::features"); throw std::invalid_argument("className not found in Network::features");
} }
for (auto& feature : featureNames) { for (auto& feature : featureNames) {
if (find(features.begin(), features.end(), feature) == features.end()) { if (find(features.begin(), features.end(), feature) == features.end()) {
throw invalid_argument("Feature " + feature + " not found in Network::features"); throw std::invalid_argument("Feature " + feature + " not found in Network::features");
} }
if (states.find(feature) == states.end()) { if (states.find(feature) == states.end()) {
throw invalid_argument("Feature " + feature + " not found in states"); throw std::invalid_argument("Feature " + feature + " not found in states");
} }
} }
} }
void Network::setStates(const map<string, vector<int>>& states) void Network::setStates(const std::map<std::string, std::vector<int>>& states)
{ {
// Set states to every Node in the network // Set states to every Node in the network
for_each(features.begin(), features.end(), [this, &states](const string& feature) { for_each(features.begin(), features.end(), [this, &states](const std::string& feature) {
nodes.at(feature)->setNumStates(states.at(feature).size()); nodes.at(feature)->setNumStates(states.at(feature).size());
}); });
classNumStates = nodes.at(className)->getNumStates(); classNumStates = nodes.at(className)->getNumStates();
} }
// X comes in nxm, where n is the number of features and m the number of samples // X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states) void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{ {
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights); checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className; this->className = className;
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); torch::Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X , ytmp }, 0); samples = torch::cat({ X , ytmp }, 0);
for (int i = 0; i < featureNames.size(); ++i) { for (int i = 0; i < featureNames.size(); ++i) {
auto row_feature = X.index({ i, "..." }); auto row_feature = X.index({ i, "..." });
} }
completeFit(states, weights); completeFit(states, weights);
} }
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states) void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{ {
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className; this->className = className;
@ -157,7 +157,7 @@ namespace bayesnet {
completeFit(states, weights); completeFit(states, weights);
} }
// input_data comes in nxm, where n is the number of features and m the number of samples // input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<double>& weights_, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states) void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{ {
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64); const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights); checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
@ -170,11 +170,11 @@ namespace bayesnet {
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(states, weights); completeFit(states, weights);
} }
void Network::completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights) void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{ {
setStates(states); setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
vector<thread> threads; std::vector<std::thread> threads;
for (auto& node : nodes) { for (auto& node : nodes) {
threads.emplace_back([this, &node, &weights]() { threads.emplace_back([this, &node, &weights]() {
node.second->computeCPT(samples, features, laplaceSmoothing, weights); node.second->computeCPT(samples, features, laplaceSmoothing, weights);
@ -188,12 +188,12 @@ namespace bayesnet {
torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba) torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("You must call fit() before calling predict()"); throw std::logic_error("You must call fit() before calling predict()");
} }
torch::Tensor result; torch::Tensor result;
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
for (int i = 0; i < samples.size(1); ++i) { for (int i = 0; i < samples.size(1); ++i) {
const Tensor sample = samples.index({ "...", i }); const torch::Tensor sample = samples.index({ "...", i });
auto psample = predict_sample(sample); auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64); auto temp = torch::tensor(psample, torch::kFloat64);
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64)); // result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
@ -204,32 +204,32 @@ namespace bayesnet {
return result.argmax(1); return result.argmax(1);
} }
// Return mxn tensor of probabilities // Return mxn tensor of probabilities
Tensor Network::predict_proba(const Tensor& samples) torch::Tensor Network::predict_proba(const torch::Tensor& samples)
{ {
return predict_tensor(samples, true); return predict_tensor(samples, true);
} }
// Return mxn tensor of probabilities // Return mxn tensor of probabilities
Tensor Network::predict(const Tensor& samples) torch::Tensor Network::predict(const torch::Tensor& samples)
{ {
return predict_tensor(samples, false); return predict_tensor(samples, false);
} }
// Return mx1 vector of predictions // Return mx1 std::vector of predictions
// tsamples is nxm vector of samples // tsamples is nxm std::vector of samples
vector<int> Network::predict(const vector<vector<int>>& tsamples) std::vector<int> Network::predict(const std::vector<std::vector<int>>& tsamples)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("You must call fit() before calling predict()"); throw std::logic_error("You must call fit() before calling predict()");
} }
vector<int> predictions; std::vector<int> predictions;
vector<int> sample; std::vector<int> sample;
for (int row = 0; row < tsamples[0].size(); ++row) { for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear(); sample.clear();
for (int col = 0; col < tsamples.size(); ++col) { for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]); sample.push_back(tsamples[col][row]);
} }
vector<double> classProbabilities = predict_sample(sample); std::vector<double> classProbabilities = predict_sample(sample);
// Find the class with the maximum posterior probability // Find the class with the maximum posterior probability
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
int predictedClass = distance(classProbabilities.begin(), maxElem); int predictedClass = distance(classProbabilities.begin(), maxElem);
@ -237,14 +237,14 @@ namespace bayesnet {
} }
return predictions; return predictions;
} }
// Return mxn vector of probabilities // Return mxn std::vector of probabilities
vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples) std::vector<std::vector<double>> Network::predict_proba(const std::vector<std::vector<int>>& tsamples)
{ {
if (!fitted) { if (!fitted) {
throw logic_error("You must call fit() before calling predict_proba()"); throw std::logic_error("You must call fit() before calling predict_proba()");
} }
vector<vector<double>> predictions; std::vector<std::vector<double>> predictions;
vector<int> sample; std::vector<int> sample;
for (int row = 0; row < tsamples[0].size(); ++row) { for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear(); sample.clear();
for (int col = 0; col < tsamples.size(); ++col) { for (int col = 0; col < tsamples.size(); ++col) {
@ -254,9 +254,9 @@ namespace bayesnet {
} }
return predictions; return predictions;
} }
double Network::score(const vector<vector<int>>& tsamples, const vector<int>& labels) double Network::score(const std::vector<std::vector<int>>& tsamples, const std::vector<int>& labels)
{ {
vector<int> y_pred = predict(tsamples); std::vector<int> y_pred = predict(tsamples);
int correct = 0; int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) { for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == labels[i]) { if (y_pred[i] == labels[i]) {
@ -265,35 +265,35 @@ namespace bayesnet {
} }
return (double)correct / y_pred.size(); return (double)correct / y_pred.size();
} }
// Return 1xn vector of probabilities // Return 1xn std::vector of probabilities
vector<double> Network::predict_sample(const vector<int>& sample) std::vector<double> Network::predict_sample(const std::vector<int>& sample)
{ {
// Ensure the sample size is equal to the number of features // Ensure the sample size is equal to the number of features
if (sample.size() != features.size() - 1) { if (sample.size() != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size()) + throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
") does not match the number of features (" + to_string(features.size() - 1) + ")"); ") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
} }
map<string, int> evidence; std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(); ++i) { for (int i = 0; i < sample.size(); ++i) {
evidence[features[i]] = sample[i]; evidence[features[i]] = sample[i];
} }
return exactInference(evidence); return exactInference(evidence);
} }
// Return 1xn vector of probabilities // Return 1xn std::vector of probabilities
vector<double> Network::predict_sample(const Tensor& sample) std::vector<double> Network::predict_sample(const torch::Tensor& sample)
{ {
// Ensure the sample size is equal to the number of features // Ensure the sample size is equal to the number of features
if (sample.size(0) != features.size() - 1) { if (sample.size(0) != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size(0)) + throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
") does not match the number of features (" + to_string(features.size() - 1) + ")"); ") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
} }
map<string, int> evidence; std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(0); ++i) { for (int i = 0; i < sample.size(0); ++i) {
evidence[features[i]] = sample[i].item<int>(); evidence[features[i]] = sample[i].item<int>();
} }
return exactInference(evidence); return exactInference(evidence);
} }
double Network::computeFactor(map<string, int>& completeEvidence) double Network::computeFactor(std::map<std::string, int>& completeEvidence)
{ {
double result = 1.0; double result = 1.0;
for (auto& node : getNodes()) { for (auto& node : getNodes()) {
@ -301,17 +301,17 @@ namespace bayesnet {
} }
return result; return result;
} }
vector<double> Network::exactInference(map<string, int>& evidence) std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
{ {
vector<double> result(classNumStates, 0.0); std::vector<double> result(classNumStates, 0.0);
vector<thread> threads; std::vector<std::thread> threads;
mutex mtx; std::mutex mtx;
for (int i = 0; i < classNumStates; ++i) { for (int i = 0; i < classNumStates; ++i) {
threads.emplace_back([this, &result, &evidence, i, &mtx]() { threads.emplace_back([this, &result, &evidence, i, &mtx]() {
auto completeEvidence = map<string, int>(evidence); auto completeEvidence = std::map<std::string, int>(evidence);
completeEvidence[getClassName()] = i; completeEvidence[getClassName()] = i;
double factor = computeFactor(completeEvidence); double factor = computeFactor(completeEvidence);
lock_guard<mutex> lock(mtx); std::lock_guard<std::mutex> lock(mtx);
result[i] = factor; result[i] = factor;
}); });
} }
@ -323,12 +323,12 @@ namespace bayesnet {
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; }); transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result; return result;
} }
vector<string> Network::show() const std::vector<std::string> Network::show() const
{ {
vector<string> result; std::vector<std::string> result;
// Draw the network // Draw the network
for (auto& node : nodes) { for (auto& node : nodes) {
string line = node.first + " -> "; std::string line = node.first + " -> ";
for (auto child : node.second->getChildren()) { for (auto child : node.second->getChildren()) {
line += child->getName() + ", "; line += child->getName() + ", ";
} }
@ -336,12 +336,12 @@ namespace bayesnet {
} }
return result; return result;
} }
vector<string> Network::graph(const string& title) const std::vector<std::string> Network::graph(const std::string& title) const
{ {
auto output = vector<string>(); auto output = std::vector<std::string>();
auto prefix = "digraph BayesNet {\nlabel=<BayesNet "; auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n"; auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n";
string header = prefix + title + suffix; std::string header = prefix + title + suffix;
output.push_back(header); output.push_back(header);
for (auto& node : nodes) { for (auto& node : nodes) {
auto result = node.second->graph(className); auto result = node.second->graph(className);
@ -350,9 +350,9 @@ namespace bayesnet {
output.push_back("}\n"); output.push_back("}\n");
return output; return output;
} }
vector<pair<string, string>> Network::getEdges() const std::vector<std::pair<std::string, std::string>> Network::getEdges() const
{ {
auto edges = vector<pair<string, string>>(); auto edges = std::vector<std::pair<std::string, std::string>>();
for (const auto& node : nodes) { for (const auto& node : nodes) {
auto head = node.first; auto head = node.first;
for (const auto& child : node.second->getChildren()) { for (const auto& child : node.second->getChildren()) {
@ -366,7 +366,7 @@ namespace bayesnet {
{ {
return getEdges().size(); return getEdges().size();
} }
vector<string> Network::topological_sort() std::vector<std::string> Network::topological_sort()
{ {
/* Check if al the fathers of every node are before the node */ /* Check if al the fathers of every node are before the node */
auto result = features; auto result = features;
@ -393,10 +393,10 @@ namespace bayesnet {
ending = false; ending = false;
} }
} else { } else {
throw logic_error("Error in topological sort because of node " + feature + " is not in result"); throw std::logic_error("Error in topological sort because of node " + feature + " is not in result");
} }
} else { } else {
throw logic_error("Error in topological sort because of node father " + fatherName + " is not in result"); throw std::logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
} }
} }
} }
@ -406,8 +406,8 @@ namespace bayesnet {
void Network::dump_cpt() const void Network::dump_cpt() const
{ {
for (auto& node : nodes) { for (auto& node : nodes) {
cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl; std::cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
cout << node.second->getCPT() << endl; std::cout << node.second->getCPT() << std::endl;
} }
} }
} }

View File

@ -7,22 +7,22 @@
namespace bayesnet { namespace bayesnet {
class Network { class Network {
private: private:
map<string, unique_ptr<Node>> nodes; std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted; bool fitted;
float maxThreads = 0.95; float maxThreads = 0.95;
int classNumStates; int classNumStates;
vector<string> features; // Including classname std::vector<std::string> features; // Including classname
string className; std::string className;
double laplaceSmoothing; double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&); bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
vector<double> predict_sample(const vector<int>&); std::vector<double> predict_sample(const std::vector<int>&);
vector<double> predict_sample(const torch::Tensor&); std::vector<double> predict_sample(const torch::Tensor&);
vector<double> exactInference(map<string, int>&); std::vector<double> exactInference(std::map<std::string, int>&);
double computeFactor(map<string, int>&); double computeFactor(std::map<std::string, int>&);
void completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights); void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights); void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const map<string, vector<int>>&); void setStates(const std::map<std::string, std::vector<int>>&);
public: public:
Network(); Network();
explicit Network(float); explicit Network(float);
@ -30,33 +30,33 @@ namespace bayesnet {
~Network() = default; ~Network() = default;
torch::Tensor& getSamples(); torch::Tensor& getSamples();
float getmaxThreads(); float getmaxThreads();
void addNode(const string&); void addNode(const std::string&);
void addEdge(const string&, const string&); void addEdge(const std::string&, const std::string&);
map<string, std::unique_ptr<Node>>& getNodes(); std::map<std::string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures() const; std::vector<std::string> getFeatures() const;
int getStates() const; int getStates() const;
vector<pair<string, string>> getEdges() const; std::vector<std::pair<std::string, std::string>> getEdges() const;
int getNumEdges() const; int getNumEdges() const;
int getClassNumStates() const; int getClassNumStates() const;
string getClassName() const; std::string getClassName() const;
/* /*
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on. Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
*/ */
void fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<double>& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states); void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states); void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states); void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
vector<int> predict(const vector<vector<int>>&); // Return mx1 vector of predictions std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
vector<vector<double>> predict_proba(const vector<vector<int>>&); // Return mxn vector of probabilities std::vector<std::vector<double>> predict_proba(const std::vector<std::vector<int>>&); // Return mxn std::vector of probabilities
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const vector<vector<int>>&, const vector<int>&); double score(const std::vector<std::vector<int>>&, const std::vector<int>&);
vector<string> topological_sort(); std::vector<std::string> topological_sort();
vector<string> show() const; std::vector<std::string> show() const;
vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
void initialize(); void initialize();
void dump_cpt() const; void dump_cpt() const;
inline string version() { return "0.2.0"; } inline std::string version() { return "0.2.0"; }
}; };
} }
#endif #endif

View File

@ -3,7 +3,7 @@
namespace bayesnet { namespace bayesnet {
Node::Node(const std::string& name) Node::Node(const std::string& name)
: name(name), numStates(0), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>()) : name(name), numStates(0), cpTable(torch::Tensor()), parents(std::vector<Node*>()), children(std::vector<Node*>())
{ {
} }
void Node::clear() void Node::clear()
@ -14,7 +14,7 @@ namespace bayesnet {
dimensions.clear(); dimensions.clear();
numStates = 0; numStates = 0;
} }
string Node::getName() const std::string Node::getName() const
{ {
return name; return name;
} }
@ -34,11 +34,11 @@ namespace bayesnet {
{ {
children.push_back(child); children.push_back(child);
} }
vector<Node*>& Node::getParents() std::vector<Node*>& Node::getParents()
{ {
return parents; return parents;
} }
vector<Node*>& Node::getChildren() std::vector<Node*>& Node::getChildren()
{ {
return children; return children;
} }
@ -63,28 +63,28 @@ namespace bayesnet {
*/ */
unsigned Node::minFill() unsigned Node::minFill()
{ {
unordered_set<string> neighbors; std::unordered_set<std::string> neighbors;
for (auto child : children) { for (auto child : children) {
neighbors.emplace(child->getName()); neighbors.emplace(child->getName());
} }
for (auto parent : parents) { for (auto parent : parents) {
neighbors.emplace(parent->getName()); neighbors.emplace(parent->getName());
} }
auto source = vector<string>(neighbors.begin(), neighbors.end()); auto source = std::vector<std::string>(neighbors.begin(), neighbors.end());
return combinations(source).size(); return combinations(source).size();
} }
vector<pair<string, string>> Node::combinations(const vector<string>& source) std::vector<std::pair<std::string, std::string>> Node::combinations(const std::vector<std::string>& source)
{ {
vector<pair<string, string>> result; std::vector<std::pair<std::string, std::string>> result;
for (int i = 0; i < source.size(); ++i) { for (int i = 0; i < source.size(); ++i) {
string temp = source[i]; std::string temp = source[i];
for (int j = i + 1; j < source.size(); ++j) { for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] }); result.push_back({ temp, source[j] });
} }
} }
return result; return result;
} }
void Node::computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights) void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
{ {
dimensions.clear(); dimensions.clear();
// Get dimensions of the CPT // Get dimensions of the CPT
@ -96,7 +96,7 @@ namespace bayesnet {
// Fill table with counts // Fill table with counts
auto pos = find(features.begin(), features.end(), name); auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) { if (pos == features.end()) {
throw logic_error("Feature " + name + " not found in dataset"); throw std::logic_error("Feature " + name + " not found in dataset");
} }
int name_index = pos - features.begin(); int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) { for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
@ -105,7 +105,7 @@ namespace bayesnet {
for (auto parent : parents) { for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName()); pos = find(features.begin(), features.end(), parent->getName());
if (pos == features.end()) { if (pos == features.end()) {
throw logic_error("Feature parent " + parent->getName() + " not found in dataset"); throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
} }
int parent_index = pos - features.begin(); int parent_index = pos - features.begin();
coordinates.push_back(dataset.index({ parent_index, n_sample })); coordinates.push_back(dataset.index({ parent_index, n_sample }));
@ -116,17 +116,17 @@ namespace bayesnet {
// Normalize the counts // Normalize the counts
cpTable = cpTable / cpTable.sum(0); cpTable = cpTable / cpTable.sum(0);
} }
float Node::getFactorValue(map<string, int>& evidence) float Node::getFactorValue(std::map<std::string, int>& evidence)
{ {
c10::List<c10::optional<at::Tensor>> coordinates; c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h) // following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(at::tensor(evidence[name])); coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); }); transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<float>(); return cpTable.index({ coordinates }).item<float>();
} }
vector<string> Node::graph(const string& className) std::vector<std::string> Node::graph(const std::string& className)
{ {
auto output = vector<string>(); auto output = std::vector<std::string>();
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : ""; auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
output.push_back(name + " [shape=circle" + suffix + "] \n"); output.push_back(name + " [shape=circle" + suffix + "] \n");
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); }); transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); });

View File

@ -5,33 +5,32 @@
#include <vector> #include <vector>
#include <string> #include <string>
namespace bayesnet { namespace bayesnet {
using namespace std;
class Node { class Node {
private: private:
string name; std::string name;
vector<Node*> parents; std::vector<Node*> parents;
vector<Node*> children; std::vector<Node*> children;
int numStates; // number of states of the variable int numStates; // number of states of the variable
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ... torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
vector<int64_t> dimensions; // dimensions of the cpTable std::vector<int64_t> dimensions; // dimensions of the cpTable
vector<pair<string, string>> combinations(const vector<string>&); std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
public: public:
explicit Node(const string&); explicit Node(const std::string&);
void clear(); void clear();
void addParent(Node*); void addParent(Node*);
void addChild(Node*); void addChild(Node*);
void removeParent(Node*); void removeParent(Node*);
void removeChild(Node*); void removeChild(Node*);
string getName() const; std::string getName() const;
vector<Node*>& getParents(); std::vector<Node*>& getParents();
vector<Node*>& getChildren(); std::vector<Node*>& getChildren();
torch::Tensor& getCPT(); torch::Tensor& getCPT();
void computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights); void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
int getNumStates() const; int getNumStates() const;
void setNumStates(int); void setNumStates(int);
unsigned minFill(); unsigned minFill();
vector<string> graph(const string& clasName); // Returns a vector of strings representing the graph in graphviz format std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
float getFactorValue(map<string, int>&); float getFactorValue(std::map<std::string, int>&);
}; };
} }
#endif #endif

View File

@ -2,7 +2,7 @@
#include "ArffFiles.h" #include "ArffFiles.h"
namespace bayesnet { namespace bayesnet {
Proposal::Proposal(torch::Tensor& dataset_, vector<string>& features_, string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {} Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
Proposal::~Proposal() Proposal::~Proposal()
{ {
for (auto& [key, value] : discretizers) { for (auto& [key, value] : discretizers) {
@ -18,14 +18,14 @@ namespace bayesnet {
throw std::invalid_argument("y must be an integer tensor"); throw std::invalid_argument("y must be an integer tensor");
} }
} }
map<string, vector<int>> Proposal::localDiscretizationProposal(const map<string, vector<int>>& oldStates, Network& model) map<std::string, std::vector<int>> Proposal::localDiscretizationProposal(const map<std::string, std::vector<int>>& oldStates, Network& model)
{ {
// order of local discretization is important. no good 0, 1, 2... // order of local discretization is important. no good 0, 1, 2...
// although we rediscretize features after the local discretization of every feature // although we rediscretize features after the local discretization of every feature
auto order = model.topological_sort(); auto order = model.topological_sort();
auto& nodes = model.getNodes(); auto& nodes = model.getNodes();
map<string, vector<int>> states = oldStates; map<std::string, std::vector<int>> states = oldStates;
vector<int> indicesToReDiscretize; std::vector<int> indicesToReDiscretize;
bool upgrade = false; // Flag to check if we need to upgrade the model bool upgrade = false; // Flag to check if we need to upgrade the model
for (auto feature : order) { for (auto feature : order) {
auto nodeParents = nodes[feature]->getParents(); auto nodeParents = nodes[feature]->getParents();
@ -33,16 +33,16 @@ namespace bayesnet {
upgrade = true; upgrade = true;
int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin(); int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin();
indicesToReDiscretize.push_back(index); // We need to re-discretize this feature indicesToReDiscretize.push_back(index); // We need to re-discretize this feature
vector<string> parents; std::vector<std::string> parents;
transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); }); transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); });
// Remove class as parent as it will be added later // Remove class as parent as it will be added later
parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end()); parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end());
// Get the indices of the parents // Get the indices of the parents
vector<int> indices; std::vector<int> indices;
indices.push_back(-1); // Add class index indices.push_back(-1); // Add class index
transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); }); transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); });
// Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y) // Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y)
vector<string> yJoinParents(Xf.size(1)); std::vector<std::string> yJoinParents(Xf.size(1));
for (auto idx : indices) { for (auto idx : indices) {
for (int i = 0; i < Xf.size(1); ++i) { for (int i = 0; i < Xf.size(1); ++i) {
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>()); yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
@ -51,16 +51,16 @@ namespace bayesnet {
auto arff = ArffFiles(); auto arff = ArffFiles();
auto yxv = arff.factorize(yJoinParents); auto yxv = arff.factorize(yJoinParents);
auto xvf_ptr = Xf.index({ index }).data_ptr<float>(); auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
auto xvf = vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1)); auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
discretizers[feature]->fit(xvf, yxv); discretizers[feature]->fit(xvf, yxv);
} }
if (upgrade) { if (upgrade) {
// Discretize again X (only the affected indices) with the new fitted discretizers // Discretize again X (only the affected indices) with the new fitted discretizers
for (auto index : indicesToReDiscretize) { for (auto index : indicesToReDiscretize) {
auto Xt_ptr = Xf.index({ index }).data_ptr<float>(); auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1)); auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt))); pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt)));
auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1); auto xStates = std::vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0); iota(xStates.begin(), xStates.end(), 0);
//Update new states of the feature/node //Update new states of the feature/node
states[pFeatures[index]] = xStates; states[pFeatures[index]] = xStates;
@ -70,28 +70,28 @@ namespace bayesnet {
} }
return states; return states;
} }
map<string, vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y) map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
{ {
// Discretize the continuous input data and build pDataset (Classifier::dataset) // Discretize the continuous input data and build pDataset (Classifier::dataset)
int m = Xf.size(1); int m = Xf.size(1);
int n = Xf.size(0); int n = Xf.size(0);
map<string, vector<int>> states; map<std::string, std::vector<int>> states;
pDataset = torch::zeros({ n + 1, m }, kInt32); pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)); auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// discretize input data by feature(row) // discretize input data by feature(row)
for (auto i = 0; i < pFeatures.size(); ++i) { for (auto i = 0; i < pFeatures.size(); ++i) {
auto* discretizer = new mdlp::CPPFImdlp(); auto* discretizer = new mdlp::CPPFImdlp();
auto Xt_ptr = Xf.index({ i }).data_ptr<float>(); auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1)); auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
discretizer->fit(Xt, yv); discretizer->fit(Xt, yv);
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt))); pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
auto xStates = vector<int>(discretizer->getCutPoints().size() + 1); auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0); iota(xStates.begin(), xStates.end(), 0);
states[pFeatures[i]] = xStates; states[pFeatures[i]] = xStates;
discretizers[pFeatures[i]] = discretizer; discretizers[pFeatures[i]] = discretizer;
} }
int n_classes = torch::max(y).item<int>() + 1; int n_classes = torch::max(y).item<int>() + 1;
auto yStates = vector<int>(n_classes); auto yStates = std::vector<int>(n_classes);
iota(yStates.begin(), yStates.end(), 0); iota(yStates.begin(), yStates.end(), 0);
states[pClassName] = yStates; states[pClassName] = yStates;
pDataset.index_put_({ n, "..." }, y); pDataset.index_put_({ n, "..." }, y);
@ -101,7 +101,7 @@ namespace bayesnet {
{ {
auto Xtd = torch::zeros_like(X, torch::kInt32); auto Xtd = torch::zeros_like(X, torch::kInt32);
for (int i = 0; i < X.size(0); ++i) { for (int i = 0; i < X.size(0); ++i) {
auto Xt = vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1)); auto Xt = std::vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xd = discretizers[pFeatures[i]]->transform(Xt); auto Xd = discretizers[pFeatures[i]]->transform(Xt);
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32)); Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
} }

View File

@ -10,20 +10,20 @@
namespace bayesnet { namespace bayesnet {
class Proposal { class Proposal {
public: public:
Proposal(torch::Tensor& pDataset, vector<string>& features_, string& className_); Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_);
virtual ~Proposal(); virtual ~Proposal();
protected: protected:
void checkInput(const torch::Tensor& X, const torch::Tensor& y); void checkInput(const torch::Tensor& X, const torch::Tensor& y);
torch::Tensor prepareX(torch::Tensor& X); torch::Tensor prepareX(torch::Tensor& X);
map<string, vector<int>> localDiscretizationProposal(const map<string, vector<int>>& states, Network& model); map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
map<string, vector<int>> fit_local_discretization(const torch::Tensor& y); map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
torch::Tensor Xf; // X continuous nxm tensor torch::Tensor Xf; // X continuous nxm tensor
torch::Tensor y; // y discrete nx1 tensor torch::Tensor y; // y discrete nx1 tensor
map<string, mdlp::CPPFImdlp*> discretizers; map<std::string, mdlp::CPPFImdlp*> discretizers;
private: private:
torch::Tensor& pDataset; // (n+1)xm tensor torch::Tensor& pDataset; // (n+1)xm tensor
vector<string>& pFeatures; std::vector<std::string>& pFeatures;
string& pClassName; std::string& pClassName;
}; };
} }

View File

@ -17,7 +17,7 @@ namespace bayesnet {
} }
} }
} }
vector<string> SPODE::graph(const string& name) const std::vector<std::string> SPODE::graph(const std::string& name) const
{ {
return model.graph(name); return model.graph(name);
} }

View File

@ -11,7 +11,7 @@ namespace bayesnet {
public: public:
explicit SPODE(int root); explicit SPODE(int root);
virtual ~SPODE() {}; virtual ~SPODE() {};
vector<string> graph(const string& name = "SPODE") const override; std::vector<std::string> graph(const std::string& name = "SPODE") const override;
}; };
} }
#endif #endif

View File

@ -1,16 +1,15 @@
#include "SPODELd.h" #include "SPODELd.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {} SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
className = className_; className = className_;
Xf = X_; Xf = X_;
y = y_; y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
@ -18,7 +17,7 @@ namespace bayesnet {
states = localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
return *this; return *this;
} }
SPODELd& SPODELd::fit(torch::Tensor& dataset, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{ {
if (!torch::is_floating_point(dataset)) { if (!torch::is_floating_point(dataset)) {
throw std::runtime_error("Dataset must be a floating point tensor"); throw std::runtime_error("Dataset must be a floating point tensor");
@ -27,7 +26,7 @@ namespace bayesnet {
y = dataset.index({ -1, "..." }).clone(); y = dataset.index({ -1, "..." }).clone();
features = features_; features = features_;
className = className_; className = className_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
@ -36,12 +35,12 @@ namespace bayesnet {
return *this; return *this;
} }
Tensor SPODELd::predict(Tensor& X) torch::Tensor SPODELd::predict(torch::Tensor& X)
{ {
auto Xt = prepareX(X); auto Xt = prepareX(X);
return SPODE::predict(Xt); return SPODE::predict(Xt);
} }
vector<string> SPODELd::graph(const string& name) const std::vector<std::string> SPODELd::graph(const std::string& name) const
{ {
return SPODE::graph(name); return SPODE::graph(name);
} }

View File

@ -4,16 +4,15 @@
#include "Proposal.h" #include "Proposal.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
class SPODELd : public SPODE, public Proposal { class SPODELd : public SPODE, public Proposal {
public: public:
explicit SPODELd(int root); explicit SPODELd(int root);
virtual ~SPODELd() = default; virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override; SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
SPODELd& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override; SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") const override; std::vector<std::string> graph(const std::string& name = "SPODE") const override;
Tensor predict(Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
static inline string version() { return "0.0.1"; }; static inline std::string version() { return "0.0.1"; };
}; };
} }
#endif // !SPODELD_H #endif // !SPODELD_H

View File

@ -1,8 +1,6 @@
#include "TAN.h" #include "TAN.h"
namespace bayesnet { namespace bayesnet {
using namespace torch;
TAN::TAN() : Classifier(Network()) {} TAN::TAN() : Classifier(Network()) {}
void TAN::buildModel(const torch::Tensor& weights) void TAN::buildModel(const torch::Tensor& weights)
@ -11,10 +9,10 @@ namespace bayesnet {
addNodes(); addNodes();
// 1. Compute mutual information between each feature and the class and set the root node // 1. Compute mutual information between each feature and the class and set the root node
// as the highest mutual information with the class // as the highest mutual information with the class
auto mi = vector <pair<int, float >>(); auto mi = std::vector <std::pair<int, float >>();
Tensor class_dataset = dataset.index({ -1, "..." }); torch::Tensor class_dataset = dataset.index({ -1, "..." });
for (int i = 0; i < static_cast<int>(features.size()); ++i) { for (int i = 0; i < static_cast<int>(features.size()); ++i) {
Tensor feature_dataset = dataset.index({ i, "..." }); torch::Tensor feature_dataset = dataset.index({ i, "..." });
auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights); auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights);
mi.push_back({ i, mi_value }); mi.push_back({ i, mi_value });
} }
@ -34,7 +32,7 @@ namespace bayesnet {
model.addEdge(className, feature); model.addEdge(className, feature);
} }
} }
vector<string> TAN::graph(const string& title) const std::vector<std::string> TAN::graph(const std::string& title) const
{ {
return model.graph(title); return model.graph(title);
} }

View File

@ -2,7 +2,6 @@
#define TAN_H #define TAN_H
#include "Classifier.h" #include "Classifier.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
class TAN : public Classifier { class TAN : public Classifier {
private: private:
protected: protected:
@ -10,7 +9,7 @@ namespace bayesnet {
public: public:
TAN(); TAN();
virtual ~TAN() {}; virtual ~TAN() {};
vector<string> graph(const string& name = "TAN") const override; std::vector<std::string> graph(const std::string& name = "TAN") const override;
}; };
} }
#endif #endif

View File

@ -1,16 +1,15 @@
#include "TANLd.h" #include "TANLd.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
className = className_; className = className_;
Xf = X_; Xf = X_;
y = y_; y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
@ -19,12 +18,12 @@ namespace bayesnet {
return *this; return *this;
} }
Tensor TANLd::predict(Tensor& X) torch::Tensor TANLd::predict(torch::Tensor& X)
{ {
auto Xt = prepareX(X); auto Xt = prepareX(X);
return TAN::predict(Xt); return TAN::predict(Xt);
} }
vector<string> TANLd::graph(const string& name) const std::vector<std::string> TANLd::graph(const std::string& name) const
{ {
return TAN::graph(name); return TAN::graph(name);
} }

View File

@ -4,16 +4,15 @@
#include "Proposal.h" #include "Proposal.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
class TANLd : public TAN, public Proposal { class TANLd : public TAN, public Proposal {
private: private:
public: public:
TANLd(); TANLd();
virtual ~TANLd() = default; virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override; TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") const override; std::vector<std::string> graph(const std::string& name = "TAN") const override;
Tensor predict(Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
static inline string version() { return "0.0.1"; }; static inline std::string version() { return "0.0.1"; };
}; };
} }
#endif // !TANLD_H #endif // !TANLD_H

View File

@ -1,25 +1,23 @@
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
using namespace std;
using namespace torch;
// Return the indices in descending order // Return the indices in descending order
vector<int> argsort(vector<double>& nums) std::vector<int> argsort(std::vector<double>& nums)
{ {
int n = nums.size(); int n = nums.size();
vector<int> indices(n); std::vector<int> indices(n);
iota(indices.begin(), indices.end(), 0); iota(indices.begin(), indices.end(), 0);
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];}); sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
return indices; return indices;
} }
vector<vector<int>> tensorToVector(Tensor& tensor) std::vector<std::vector<int>> tensorToVector(torch::Tensor& tensor)
{ {
// convert mxn tensor to nxm vector // convert mxn tensor to nxm std::vector
vector<vector<int>> result; std::vector<std::vector<int>> result;
// Iterate over cols // Iterate over cols
for (int i = 0; i < tensor.size(1); ++i) { for (int i = 0; i < tensor.size(1); ++i) {
auto col_tensor = tensor.index({ "...", i }); auto col_tensor = tensor.index({ "...", i });
auto col = vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0)); auto col = std::vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
result.push_back(col); result.push_back(col);
} }
return result; return result;

View File

@ -3,9 +3,7 @@
#include <torch/torch.h> #include <torch/torch.h>
#include <vector> #include <vector>
namespace bayesnet { namespace bayesnet {
using namespace std; std::vector<int> argsort(std::vector<double>& nums);
using namespace torch; std::vector<std::vector<int>> tensorToVector(torch::Tensor& tensor);
vector<int> argsort(vector<double>& nums);
vector<vector<int>> tensorToVector(Tensor& tensor);
} }
#endif //BAYESNET_UTILS_H #endif //BAYESNET_UTILS_H

View File

@ -13,26 +13,25 @@
namespace fs = std::filesystem; namespace fs = std::filesystem;
// function ftime_to_string, Code taken from // function ftime_to_std::string, Code taken from
// https://stackoverflow.com/a/58237530/1389271 // https://stackoverflow.com/a/58237530/1389271
template <typename TP> template <typename TP>
std::string ftime_to_string(TP tp) std::string ftime_to_string(TP tp)
{ {
using namespace std::chrono; auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(tp - TP::clock::now()
auto sctp = time_point_cast<system_clock::duration>(tp - TP::clock::now() + std::chrono::system_clock::now());
+ system_clock::now()); auto tt = std::chrono::system_clock::to_time_t(sctp);
auto tt = system_clock::to_time_t(sctp);
std::tm* gmt = std::gmtime(&tt); std::tm* gmt = std::gmtime(&tt);
std::stringstream buffer; std::stringstream buffer;
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M"); buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
return buffer.str(); return buffer.str();
} }
namespace platform { namespace platform {
string BestResults::build() std::string BestResults::build()
{ {
auto files = loadResultFiles(); auto files = loadResultFiles();
if (files.size() == 0) { if (files.size() == 0) {
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl; std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
exit(1); exit(1);
} }
json bests; json bests;
@ -42,7 +41,7 @@ namespace platform {
for (auto const& item : data.at("results")) { for (auto const& item : data.at("results")) {
bool update = false; bool update = false;
// Check if results file contains only one dataset // Check if results file contains only one dataset
auto datasetName = item.at("dataset").get<string>(); auto datasetName = item.at("dataset").get<std::string>();
if (bests.contains(datasetName)) { if (bests.contains(datasetName)) {
if (item.at("score").get<double>() > bests[datasetName].at(0).get<double>()) { if (item.at("score").get<double>() > bests[datasetName].at(0).get<double>()) {
update = true; update = true;
@ -55,39 +54,39 @@ namespace platform {
} }
} }
} }
string bestFileName = path + bestResultFile(); std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest); fclose(fileTest);
cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl; std::cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << std::endl;
} }
ofstream file(bestFileName); std::ofstream file(bestFileName);
file << bests; file << bests;
file.close(); file.close();
return bestFileName; return bestFileName;
} }
string BestResults::bestResultFile() std::string BestResults::bestResultFile()
{ {
return "best_results_" + score + "_" + model + ".json"; return "best_results_" + score + "_" + model + ".json";
} }
pair<string, string> getModelScore(string name) std::pair<std::string, std::string> getModelScore(std::string name)
{ {
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json // results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
int i = 0; int i = 0;
auto pos = name.find("_"); auto pos = name.find("_");
auto pos2 = name.find("_", pos + 1); auto pos2 = name.find("_", pos + 1);
string score = name.substr(pos + 1, pos2 - pos - 1); std::string score = name.substr(pos + 1, pos2 - pos - 1);
pos = name.find("_", pos2 + 1); pos = name.find("_", pos2 + 1);
string model = name.substr(pos2 + 1, pos - pos2 - 1); std::string model = name.substr(pos2 + 1, pos - pos2 - 1);
return { model, score }; return { model, score };
} }
vector<string> BestResults::loadResultFiles() std::vector<std::string> BestResults::loadResultFiles()
{ {
vector<string> files; std::vector<std::string> files;
using std::filesystem::directory_iterator; using std::filesystem::directory_iterator;
string fileModel, fileScore; std::string fileModel, fileScore;
for (const auto& file : directory_iterator(path)) { for (const auto& file : directory_iterator(path)) {
auto fileName = file.path().filename().string(); auto fileName = file.path().filename().string();
if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) { if (fileName.find(".json") != std::string::npos && fileName.find("results_") == 0) {
tie(fileModel, fileScore) = getModelScore(fileName); tie(fileModel, fileScore) = getModelScore(fileName);
if (score == fileScore && (model == fileModel || model == "any")) { if (score == fileScore && (model == fileModel || model == "any")) {
files.push_back(fileName); files.push_back(fileName);
@ -96,37 +95,37 @@ namespace platform {
} }
return files; return files;
} }
json BestResults::loadFile(const string& fileName) json BestResults::loadFile(const std::string& fileName)
{ {
ifstream resultData(fileName); std::ifstream resultData(fileName);
if (resultData.is_open()) { if (resultData.is_open()) {
json data = json::parse(resultData); json data = json::parse(resultData);
return data; return data;
} }
throw invalid_argument("Unable to open result file. [" + fileName + "]"); throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
} }
vector<string> BestResults::getModels() std::vector<std::string> BestResults::getModels()
{ {
set<string> models; std::set<std::string> models;
vector<string> result; std::vector<std::string> result;
auto files = loadResultFiles(); auto files = loadResultFiles();
if (files.size() == 0) { if (files.size() == 0) {
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl; std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
exit(1); exit(1);
} }
string fileModel, fileScore; std::string fileModel, fileScore;
for (const auto& file : files) { for (const auto& file : files) {
// extract the model from the file name // extract the model from the file name
tie(fileModel, fileScore) = getModelScore(file); tie(fileModel, fileScore) = getModelScore(file);
// add the model to the vector of models // add the model to the std::vector of models
models.insert(fileModel); models.insert(fileModel);
} }
result = vector<string>(models.begin(), models.end()); result = std::vector<std::string>(models.begin(), models.end());
return result; return result;
} }
vector<string> BestResults::getDatasets(json table) std::vector<std::string> BestResults::getDatasets(json table)
{ {
vector<string> datasets; std::vector<std::string> datasets;
for (const auto& dataset : table.items()) { for (const auto& dataset : table.items()) {
datasets.push_back(dataset.key()); datasets.push_back(dataset.key());
} }
@ -136,7 +135,7 @@ namespace platform {
{ {
auto models = getModels(); auto models = getModels();
for (const auto& model : models) { for (const auto& model : models) {
cout << "Building best results for model: " << model << endl; std::cout << "Building best results for model: " << model << std::endl;
this->model = model; this->model = model;
build(); build();
} }
@ -144,62 +143,62 @@ namespace platform {
} }
void BestResults::listFile() void BestResults::listFile()
{ {
string bestFileName = path + bestResultFile(); std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest); fclose(fileTest);
} else { } else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl; std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
exit(1); exit(1);
} }
auto temp = ConfigLocale(); auto temp = ConfigLocale();
auto date = ftime_to_string(filesystem::last_write_time(bestFileName)); auto date = ftime_to_string(std::filesystem::last_write_time(bestFileName));
auto data = loadFile(bestFileName); auto data = loadFile(bestFileName);
auto datasets = getDatasets(data); auto datasets = getDatasets(data);
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
int maxFileName = 0; int maxFileName = 0;
int maxHyper = 15; int maxHyper = 15;
for (auto const& item : data.items()) { for (auto const& item : data.items()) {
maxHyper = max(maxHyper, (int)item.value().at(1).dump().size()); maxHyper = std::max(maxHyper, (int)item.value().at(1).dump().size());
maxFileName = max(maxFileName, (int)item.value().at(2).get<string>().size()); maxFileName = std::max(maxFileName, (int)item.value().at(2).get<std::string>().size());
} }
stringstream oss; std::stringstream oss;
oss << Colors::GREEN() << "Best results for " << model << " as of " << date << endl; oss << Colors::GREEN() << "Best results for " << model << " as of " << date << std::endl;
cout << oss.str(); std::cout << oss.str();
cout << string(oss.str().size() - 8, '-') << endl; std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << "Dataset" << "Score " << setw(maxFileName) << "File" << " Hyperparameters" << endl; std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << "Dataset" << "Score " << std::setw(maxFileName) << "File" << " Hyperparameters" << std::endl;
cout << "=== " << string(maxDatasetName, '=') << " =========== " << string(maxFileName, '=') << " " << string(maxHyper, '=') << endl; std::cout << "=== " << std::string(maxDatasetName, '=') << " =========== " << std::string(maxFileName, '=') << " " << std::string(maxHyper, '=') << std::endl;
auto i = 0; auto i = 0;
bool odd = true; bool odd = true;
double total = 0; double total = 0;
for (auto const& item : data.items()) { for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN(); auto color = odd ? Colors::BLUE() : Colors::CYAN();
double value = item.value().at(0).get<double>(); double value = item.value().at(0).get<double>();
cout << color << setw(3) << fixed << right << i++ << " "; std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
cout << setw(maxDatasetName) << left << item.key() << " "; std::cout << std::setw(maxDatasetName) << std::left << item.key() << " ";
cout << setw(11) << setprecision(9) << fixed << value << " "; std::cout << std::setw(11) << std::setprecision(9) << std::fixed << value << " ";
cout << setw(maxFileName) << item.value().at(2).get<string>() << " "; std::cout << std::setw(maxFileName) << item.value().at(2).get<std::string>() << " ";
cout << item.value().at(1) << " "; std::cout << item.value().at(1) << " ";
cout << endl; std::cout << std::endl;
total += value; total += value;
odd = !odd; odd = !odd;
} }
cout << Colors::GREEN() << "=== " << string(maxDatasetName, '=') << " ===========" << endl; std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ===========" << std::endl;
cout << setw(5 + maxDatasetName) << "Total.................. " << setw(11) << setprecision(8) << fixed << total << endl; std::cout << std::setw(5 + maxDatasetName) << "Total.................. " << std::setw(11) << std::setprecision(8) << std::fixed << total << std::endl;
} }
json BestResults::buildTableResults(vector<string> models) json BestResults::buildTableResults(std::vector<std::string> models)
{ {
json table; json table;
auto maxDate = filesystem::file_time_type::max(); auto maxDate = std::filesystem::file_time_type::max();
for (const auto& model : models) { for (const auto& model : models) {
this->model = model; this->model = model;
string bestFileName = path + bestResultFile(); std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest); fclose(fileTest);
} else { } else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl; std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
exit(1); exit(1);
} }
auto dateWrite = filesystem::last_write_time(bestFileName); auto dateWrite = std::filesystem::last_write_time(bestFileName);
if (dateWrite < maxDate) { if (dateWrite < maxDate) {
maxDate = dateWrite; maxDate = dateWrite;
} }
@ -209,25 +208,25 @@ namespace platform {
table["dateTable"] = ftime_to_string(maxDate); table["dateTable"] = ftime_to_string(maxDate);
return table; return table;
} }
void BestResults::printTableResults(vector<string> models, json table) void BestResults::printTableResults(std::vector<std::string> models, json table)
{ {
stringstream oss; std::stringstream oss;
oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl; oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<std::string>() << std::endl;
cout << oss.str(); std::cout << oss.str();
cout << string(oss.str().size() - 8, '-') << endl; std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << string("Dataset"); std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset");
for (const auto& model : models) { for (const auto& model : models) {
cout << setw(maxModelName) << left << model << " "; std::cout << std::setw(maxModelName) << std::left << model << " ";
} }
cout << endl; std::cout << std::endl;
cout << "=== " << string(maxDatasetName, '=') << " "; std::cout << "=== " << std::string(maxDatasetName, '=') << " ";
for (const auto& model : models) { for (const auto& model : models) {
cout << string(maxModelName, '=') << " "; std::cout << std::string(maxModelName, '=') << " ";
} }
cout << endl; std::cout << std::endl;
auto i = 0; auto i = 0;
bool odd = true; bool odd = true;
map<string, double> totals; std::map<std::string, double> totals;
int nDatasets = table.begin().value().size(); int nDatasets = table.begin().value().size();
for (const auto& model : models) { for (const auto& model : models) {
totals[model] = 0.0; totals[model] = 0.0;
@ -235,8 +234,8 @@ namespace platform {
auto datasets = getDatasets(table.begin().value()); auto datasets = getDatasets(table.begin().value());
for (auto const& dataset : datasets) { for (auto const& dataset : datasets) {
auto color = odd ? Colors::BLUE() : Colors::CYAN(); auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " "; std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
cout << setw(maxDatasetName) << left << dataset << " "; std::cout << std::setw(maxDatasetName) << std::left << dataset << " ";
double maxValue = 0; double maxValue = 0;
// Find out the max value for this dataset // Find out the max value for this dataset
for (const auto& model : models) { for (const auto& model : models) {
@ -247,23 +246,23 @@ namespace platform {
} }
// Print the row with red colors on max values // Print the row with red colors on max values
for (const auto& model : models) { for (const auto& model : models) {
string efectiveColor = color; std::string efectiveColor = color;
double value = table[model].at(dataset).at(0).get<double>(); double value = table[model].at(dataset).at(0).get<double>();
if (value == maxValue) { if (value == maxValue) {
efectiveColor = Colors::RED(); efectiveColor = Colors::RED();
} }
totals[model] += value; totals[model] += value;
cout << efectiveColor << setw(maxModelName) << setprecision(maxModelName - 2) << fixed << value << " "; std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " ";
} }
cout << endl; std::cout << std::endl;
odd = !odd; odd = !odd;
} }
cout << Colors::GREEN() << "=== " << string(maxDatasetName, '=') << " "; std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ";
for (const auto& model : models) { for (const auto& model : models) {
cout << string(maxModelName, '=') << " "; std::cout << std::string(maxModelName, '=') << " ";
} }
cout << endl; std::cout << std::endl;
cout << Colors::GREEN() << setw(5 + maxDatasetName) << " Totals..................."; std::cout << Colors::GREEN() << std::setw(5 + maxDatasetName) << " Totals...................";
double max = 0.0; double max = 0.0;
for (const auto& total : totals) { for (const auto& total : totals) {
if (total.second > max) { if (total.second > max) {
@ -271,13 +270,13 @@ namespace platform {
} }
} }
for (const auto& model : models) { for (const auto& model : models) {
string efectiveColor = Colors::GREEN(); std::string efectiveColor = Colors::GREEN();
if (totals[model] == max) { if (totals[model] == max) {
efectiveColor = Colors::RED(); efectiveColor = Colors::RED();
} }
cout << efectiveColor << right << setw(maxModelName) << setprecision(maxModelName - 4) << fixed << totals[model] << " "; std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << totals[model] << " ";
} }
cout << endl; std::cout << std::endl;
} }
void BestResults::reportSingle(bool excel) void BestResults::reportSingle(bool excel)
{ {
@ -286,7 +285,7 @@ namespace platform {
auto models = getModels(); auto models = getModels();
// Build the table of results // Build the table of results
json table = buildTableResults(models); json table = buildTableResults(models);
vector<string> datasets = getDatasets(table.begin().value()); std::vector<std::string> datasets = getDatasets(table.begin().value());
BestResultsExcel excel(score, datasets); BestResultsExcel excel(score, datasets);
excel.reportSingle(model, path + bestResultFile()); excel.reportSingle(model, path + bestResultFile());
messageExcelFile(excel.getFileName()); messageExcelFile(excel.getFileName());
@ -297,15 +296,15 @@ namespace platform {
auto models = getModels(); auto models = getModels();
// Build the table of results // Build the table of results
json table = buildTableResults(models); json table = buildTableResults(models);
vector<string> datasets = getDatasets(table.begin().value()); std::vector<std::string> datasets = getDatasets(table.begin().value());
maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); maxModelName = (*max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxModelName = max(12, maxModelName); maxModelName = std::max(12, maxModelName);
maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxDatasetName = max(25, maxDatasetName); maxDatasetName = std::max(25, maxDatasetName);
// Print the table of results // Print the table of results
printTableResults(models, table); printTableResults(models, table);
// Compute the Friedman test // Compute the Friedman test
map<string, map<string, float>> ranksModels; std::map<std::string, std::map<std::string, float>> ranksModels;
if (friedman) { if (friedman) {
Statistics stats(models, datasets, table, significance); Statistics stats(models, datasets, table, significance);
auto result = stats.friedmanTest(); auto result = stats.friedmanTest();
@ -319,7 +318,7 @@ namespace platform {
int idx = -1; int idx = -1;
double min = 2000; double min = 2000;
// Find out the control model // Find out the control model
auto totals = vector<double>(models.size(), 0.0); auto totals = std::vector<double>(models.size(), 0.0);
for (const auto& dataset : datasets) { for (const auto& dataset : datasets) {
for (int i = 0; i < models.size(); ++i) { for (int i = 0; i < models.size(); ++i) {
totals[i] += ranksModels[dataset][models[i]]; totals[i] += ranksModels[dataset][models[i]];
@ -337,8 +336,8 @@ namespace platform {
messageExcelFile(excel.getFileName()); messageExcelFile(excel.getFileName());
} }
} }
void BestResults::messageExcelFile(const string& fileName) void BestResults::messageExcelFile(const std::string& fileName)
{ {
cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << endl; std::cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << std::endl;
} }
} }

View File

@ -2,32 +2,31 @@
#define BESTRESULTS_H #define BESTRESULTS_H
#include <string> #include <string>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json; using json = nlohmann::json;
namespace platform { namespace platform {
class BestResults { class BestResults {
public: public:
explicit BestResults(const string& path, const string& score, const string& model, bool friedman, double significance = 0.05) explicit BestResults(const std::string& path, const std::string& score, const std::string& model, bool friedman, double significance = 0.05)
: path(path), score(score), model(model), friedman(friedman), significance(significance) : path(path), score(score), model(model), friedman(friedman), significance(significance)
{ {
} }
string build(); std::string build();
void reportSingle(bool excel); void reportSingle(bool excel);
void reportAll(bool excel); void reportAll(bool excel);
void buildAll(); void buildAll();
private: private:
vector<string> getModels(); std::vector<std::string> getModels();
vector<string> getDatasets(json table); std::vector<std::string> getDatasets(json table);
vector<string> loadResultFiles(); std::vector<std::string> loadResultFiles();
void messageExcelFile(const string& fileName); void messageExcelFile(const std::string& fileName);
json buildTableResults(vector<string> models); json buildTableResults(std::vector<std::string> models);
void printTableResults(vector<string> models, json table); void printTableResults(std::vector<std::string> models, json table);
string bestResultFile(); std::string bestResultFile();
json loadFile(const string& fileName); json loadFile(const std::string& fileName);
void listFile(); void listFile();
string path; std::string path;
string score; std::string score;
string model; std::string model;
bool friedman; bool friedman;
double significance; double significance;
int maxModelName = 0; int maxModelName = 0;

View File

@ -7,20 +7,20 @@
#include "ReportExcel.h" #include "ReportExcel.h"
namespace platform { namespace platform {
json loadResultData(const string& fileName) json loadResultData(const std::string& fileName)
{ {
json data; json data;
ifstream resultData(fileName); std::ifstream resultData(fileName);
if (resultData.is_open()) { if (resultData.is_open()) {
data = json::parse(resultData); data = json::parse(resultData);
} else { } else {
throw invalid_argument("Unable to open result file. [" + fileName + "]"); throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
} }
return data; return data;
} }
string getColumnName(int colNum) std::string getColumnName(int colNum)
{ {
string columnName = ""; std::string columnName = "";
if (colNum == 0) if (colNum == 0)
return "A"; return "A";
while (colNum > 0) { while (colNum > 0) {
@ -30,15 +30,15 @@ namespace platform {
} }
return columnName; return columnName;
} }
BestResultsExcel::BestResultsExcel(const string& score, const vector<string>& datasets) : score(score), datasets(datasets) BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets)
{ {
workbook = workbook_new((Paths::excel() + fileName).c_str()); workbook = workbook_new((Paths::excel() + fileName).c_str());
setProperties("Best Results"); setProperties("Best Results");
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
datasetNameSize = max(datasetNameSize, maxDatasetName); datasetNameSize = std::max(datasetNameSize, maxDatasetName);
createFormats(); createFormats();
} }
void BestResultsExcel::reportAll(const vector<string>& models, const json& table, const map<string, map<string, float>>& ranks, bool friedman, double significance) void BestResultsExcel::reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance)
{ {
this->table = table; this->table = table;
this->models = models; this->models = models;
@ -46,23 +46,23 @@ namespace platform {
this->friedman = friedman; this->friedman = friedman;
this->significance = significance; this->significance = significance;
worksheet = workbook_add_worksheet(workbook, "Best Results"); worksheet = workbook_add_worksheet(workbook, "Best Results");
int maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); int maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
modelNameSize = max(modelNameSize, maxModelName); modelNameSize = std::max(modelNameSize, maxModelName);
formatColumns(); formatColumns();
build(); build();
} }
void BestResultsExcel::reportSingle(const string& model, const string& fileName) void BestResultsExcel::reportSingle(const std::string& model, const std::string& fileName)
{ {
worksheet = workbook_add_worksheet(workbook, "Report"); worksheet = workbook_add_worksheet(workbook, "Report");
if (FILE* fileTest = fopen(fileName.c_str(), "r")) { if (FILE* fileTest = fopen(fileName.c_str(), "r")) {
fclose(fileTest); fclose(fileTest);
} else { } else {
cerr << "File " << fileName << " doesn't exist." << endl; std::cerr << "File " << fileName << " doesn't exist." << std::endl;
exit(1); exit(1);
} }
json data = loadResultData(fileName); json data = loadResultData(fileName);
string title = "Best results for " + model; std::string title = "Best results for " + model;
worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]); worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]);
// Body header // Body header
row = 3; row = 3;
@ -73,30 +73,30 @@ namespace platform {
writeString(row, 3, "File", "bodyHeader"); writeString(row, 3, "File", "bodyHeader");
writeString(row, 4, "Hyperparameters", "bodyHeader"); writeString(row, 4, "Hyperparameters", "bodyHeader");
auto i = 0; auto i = 0;
string hyperparameters; std::string hyperparameters;
int hypSize = 22; int hypSize = 22;
map<string, string> files; // map of files imported and their tabs std::map<std::string, std::string> files; // map of files imported and their tabs
for (auto const& item : data.items()) { for (auto const& item : data.items()) {
row++; row++;
writeInt(row, 0, i++, "ints"); writeInt(row, 0, i++, "ints");
writeString(row, 1, item.key().c_str(), "text"); writeString(row, 1, item.key().c_str(), "text");
writeDouble(row, 2, item.value().at(0).get<double>(), "result"); writeDouble(row, 2, item.value().at(0).get<double>(), "result");
auto fileName = item.value().at(2).get<string>(); auto fileName = item.value().at(2).get<std::string>();
string hyperlink = ""; std::string hyperlink = "";
try { try {
hyperlink = files.at(fileName); hyperlink = files.at(fileName);
} }
catch (const out_of_range& oor) { catch (const std::out_of_range& oor) {
auto tabName = "table_" + to_string(i); auto tabName = "table_" + std::to_string(i);
auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str()); auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str());
json data = loadResultData(Paths::results() + fileName); json data = loadResultData(Paths::results() + fileName);
auto report = ReportExcel(data, false, workbook, worksheetNew); auto report = ReportExcel(data, false, workbook, worksheetNew);
report.show(); report.show();
hyperlink = "#table_" + to_string(i); hyperlink = "#table_" + std::to_string(i);
files[fileName] = hyperlink; files[fileName] = hyperlink;
} }
hyperlink += "!H" + to_string(i + 6); hyperlink += "!H" + std::to_string(i + 6);
string fileNameText = "=HYPERLINK(\"" + hyperlink + "\",\"" + fileName + "\")"; std::string fileNameText = "=HYPERLINK(\"" + hyperlink + "\",\"" + fileName + "\")";
worksheet_write_formula(worksheet, row, 3, fileNameText.c_str(), efectiveStyle("text")); worksheet_write_formula(worksheet, row, 3, fileNameText.c_str(), efectiveStyle("text"));
hyperparameters = item.value().at(1).dump(); hyperparameters = item.value().at(1).dump();
if (hyperparameters.size() > hypSize) { if (hyperparameters.size() > hypSize) {
@ -107,13 +107,13 @@ namespace platform {
row++; row++;
// Set Totals // Set Totals
writeString(row, 1, "Total", "bodyHeader"); writeString(row, 1, "Total", "bodyHeader");
stringstream oss; std::stringstream oss;
auto colName = getColumnName(2); auto colName = getColumnName(2);
oss << "=sum(" << colName << "5:" << colName << row << ")"; oss << "=sum(" << colName << "5:" << colName << row << ")";
worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]); worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]);
// Set format // Set format
worksheet_freeze_panes(worksheet, 4, 2); worksheet_freeze_panes(worksheet, 4, 2);
vector<int> columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 }; std::vector<int> columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 };
for (int i = 0; i < columns_sizes.size(); ++i) { for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
} }
@ -125,7 +125,7 @@ namespace platform {
void BestResultsExcel::formatColumns() void BestResultsExcel::formatColumns()
{ {
worksheet_freeze_panes(worksheet, 4, 2); worksheet_freeze_panes(worksheet, 4, 2);
vector<int> columns_sizes = { 5, datasetNameSize }; std::vector<int> columns_sizes = { 5, datasetNameSize };
for (int i = 0; i < models.size(); ++i) { for (int i = 0; i < models.size(); ++i) {
columns_sizes.push_back(modelNameSize); columns_sizes.push_back(modelNameSize);
} }
@ -133,7 +133,7 @@ namespace platform {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
} }
} }
void BestResultsExcel::addConditionalFormat(string formula) void BestResultsExcel::addConditionalFormat(std::string formula)
{ {
// Add conditional format for max/min values in scores/ranks sheets // Add conditional format for max/min values in scores/ranks sheets
lxw_format* custom_format = workbook_add_format(workbook); lxw_format* custom_format = workbook_add_format(workbook);
@ -142,8 +142,8 @@ namespace platform {
// Create a conditional format object. A static object would also work. // Create a conditional format object. A static object would also work.
lxw_conditional_format* conditional_format = (lxw_conditional_format*)calloc(1, sizeof(lxw_conditional_format)); lxw_conditional_format* conditional_format = (lxw_conditional_format*)calloc(1, sizeof(lxw_conditional_format));
conditional_format->type = LXW_CONDITIONAL_TYPE_FORMULA; conditional_format->type = LXW_CONDITIONAL_TYPE_FORMULA;
string col = getColumnName(models.size() + 1); std::string col = getColumnName(models.size() + 1);
stringstream oss; std::stringstream oss;
oss << "=C5=" << formula << "($C5:$" << col << "5)"; oss << "=C5=" << formula << "($C5:$" << col << "5)";
auto formulaValue = oss.str(); auto formulaValue = oss.str();
conditional_format->value_string = formulaValue.c_str(); conditional_format->value_string = formulaValue.c_str();
@ -170,14 +170,14 @@ namespace platform {
doFriedman(); doFriedman();
} }
} }
string BestResultsExcel::getFileName() std::string BestResultsExcel::getFileName()
{ {
return Paths::excel() + fileName; return Paths::excel() + fileName;
} }
void BestResultsExcel::header(bool ranks) void BestResultsExcel::header(bool ranks)
{ {
row = 0; row = 0;
string message = ranks ? "Ranks for score " + score : "Best results for " + score; std::string message = ranks ? "Ranks for score " + score : "Best results for " + score;
worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), message.c_str(), styles["headerFirst"]); worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), message.c_str(), styles["headerFirst"]);
// Body header // Body header
row = 3; row = 3;
@ -210,7 +210,7 @@ namespace platform {
writeString(row, 1, "Total", "bodyHeader"); writeString(row, 1, "Total", "bodyHeader");
int col = 1; int col = 1;
for (const auto& model : models) { for (const auto& model : models) {
stringstream oss; std::stringstream oss;
auto colName = getColumnName(col + 1); auto colName = getColumnName(col + 1);
oss << "=SUM(" << colName << "5:" << colName << row << ")"; oss << "=SUM(" << colName << "5:" << colName << row << ")";
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]); worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
@ -221,7 +221,7 @@ namespace platform {
int col = 1; int col = 1;
for (const auto& model : models) { for (const auto& model : models) {
auto colName = getColumnName(col + 1); auto colName = getColumnName(col + 1);
stringstream oss; std::stringstream oss;
oss << "=SUM(" << colName << "5:" << colName << row - 1 << ")/" << datasets.size(); oss << "=SUM(" << colName << "5:" << colName << row - 1 << ")/" << datasets.size();
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]); worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
} }
@ -230,7 +230,7 @@ namespace platform {
void BestResultsExcel::doFriedman() void BestResultsExcel::doFriedman()
{ {
worksheet = workbook_add_worksheet(workbook, "Friedman"); worksheet = workbook_add_worksheet(workbook, "Friedman");
vector<int> columns_sizes = { 5, datasetNameSize }; std::vector<int> columns_sizes = { 5, datasetNameSize };
for (int i = 0; i < models.size(); ++i) { for (int i = 0; i < models.size(); ++i) {
columns_sizes.push_back(modelNameSize); columns_sizes.push_back(modelNameSize);
} }
@ -262,7 +262,7 @@ namespace platform {
row += 2; row += 2;
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between the control model and the other models.'", styles["headerSmall"]); worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between the control model and the other models.'", styles["headerSmall"]);
row += 2; row += 2;
string controlModel = "Control Model: " + holmResult.model; std::string controlModel = "Control Model: " + holmResult.model;
worksheet_merge_range(worksheet, row, 1, row, 7, controlModel.c_str(), styles["bodyHeader_odd"]); worksheet_merge_range(worksheet, row, 1, row, 7, controlModel.c_str(), styles["bodyHeader_odd"]);
row++; row++;
writeString(row, 1, "Model", "bodyHeader"); writeString(row, 1, "Model", "bodyHeader");

View File

@ -5,18 +5,17 @@
#include <map> #include <map>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json; using json = nlohmann::json;
namespace platform { namespace platform {
class BestResultsExcel : ExcelFile { class BestResultsExcel : ExcelFile {
public: public:
BestResultsExcel(const string& score, const vector<string>& datasets); BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets);
~BestResultsExcel(); ~BestResultsExcel();
void reportAll(const vector<string>& models, const json& table, const map<string, map<string, float>>& ranks, bool friedman, double significance); void reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance);
void reportSingle(const string& model, const string& fileName); void reportSingle(const std::string& model, const std::string& fileName);
string getFileName(); std::string getFileName();
private: private:
void build(); void build();
void header(bool ranks); void header(bool ranks);
@ -24,13 +23,13 @@ namespace platform {
void footer(bool ranks); void footer(bool ranks);
void formatColumns(); void formatColumns();
void doFriedman(); void doFriedman();
void addConditionalFormat(string formula); void addConditionalFormat(std::string formula);
const string fileName = "BestResults.xlsx"; const std::string fileName = "BestResults.xlsx";
string score; std::string score;
vector<string> models; std::vector<std::string> models;
vector<string> datasets; std::vector<std::string> datasets;
json table; json table;
map<string, map<string, float>> ranksModels; std::map<std::string, std::map<std::string, float>> ranksModels;
bool friedman; bool friedman;
double significance; double significance;
int modelNameSize = 12; // Min size of the column int modelNameSize = 12; // Min size of the column

View File

@ -7,14 +7,14 @@
namespace platform { namespace platform {
class BestScore { class BestScore {
public: public:
static pair<string, double> getScore(const std::string& metric) static std::pair<std::string, double> getScore(const std::string& metric)
{ {
static map<pair<string, string>, pair<string, double>> data = { static std::map<std::pair<std::string, std::string>, std::pair<std::string, double>> data = {
{{"discretiz", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}}, {{"discretiz", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
{{"odte", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}}, {{"odte", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
}; };
auto env = platform::DotEnv(); auto env = platform::DotEnv();
string experiment = env.get("experiment"); std::string experiment = env.get("experiment");
try { try {
return data[{experiment, metric}]; return data[{experiment, metric}];
} }

View File

@ -2,22 +2,20 @@
#define LOCALE_H #define LOCALE_H
#include <locale> #include <locale>
#include <iostream> #include <iostream>
#include <sstream>
#include <string> #include <string>
using namespace std;
namespace platform { namespace platform {
struct separation : numpunct<char> { struct separation : std::numpunct<char> {
char do_decimal_point() const { return ','; } char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; } char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; } std::string do_grouping() const { return "\03"; }
}; };
class ConfigLocale { class ConfigLocale {
public: public:
explicit ConfigLocale() explicit ConfigLocale()
{ {
locale mylocale(cout.getloc(), new separation); std::locale mylocale(std::cout.getloc(), new separation);
locale::global(mylocale); std::locale::global(mylocale);
cout.imbue(mylocale); std::cout.imbue(mylocale);
} }
}; };
} }

View File

@ -1,17 +1,19 @@
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform) include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
include_directories(${Python3_INCLUDE_DIRS})
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc) add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
add_executable(b_list b_list.cc Datasets.cc Dataset.cc) add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap)
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp) target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp)
target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@ -6,16 +6,16 @@
#include "Utils.h" #include "Utils.h"
namespace platform { namespace platform {
void CommandParser::messageError(const string& message) void CommandParser::messageError(const std::string& message)
{ {
cout << Colors::RED() << message << Colors::RESET() << endl; std::cout << Colors::RED() << message << Colors::RESET() << std::endl;
} }
pair<char, int> CommandParser::parse(const string& color, const vector<tuple<string, char, bool>>& options, const char defaultCommand, const int maxIndex) std::pair<char, int> CommandParser::parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex)
{ {
bool finished = false; bool finished = false;
while (!finished) { while (!finished) {
stringstream oss; std::stringstream oss;
string line; std::string line;
oss << color << "Choose option ("; oss << color << "Choose option (";
bool first = true; bool first = true;
for (auto& option : options) { for (auto& option : options) {
@ -24,12 +24,12 @@ namespace platform {
} else { } else {
oss << ", "; oss << ", ";
} }
oss << get<char>(option) << "=" << get<string>(option); oss << std::get<char>(option) << "=" << std::get<std::string>(option);
} }
oss << "): "; oss << "): ";
cout << oss.str(); std::cout << oss.str();
getline(cin, line); getline(std::cin, line);
cout << Colors::RESET(); std::cout << Colors::RESET();
line = trim(line); line = trim(line);
if (line.size() == 0) if (line.size() == 0)
continue; continue;
@ -45,15 +45,15 @@ namespace platform {
} }
bool found = false; bool found = false;
for (auto& option : options) { for (auto& option : options) {
if (line[0] == get<char>(option)) { if (line[0] == std::get<char>(option)) {
found = true; found = true;
// it's a match // it's a match
line.erase(line.begin()); line.erase(line.begin());
line = trim(line); line = trim(line);
if (get<bool>(option)) { if (std::get<bool>(option)) {
// The option requires a value // The option requires a value
if (line.size() == 0) { if (line.size() == 0) {
messageError("Option " + get<string>(option) + " requires a value"); messageError("Option " + std::get<std::string>(option) + " requires a value");
break; break;
} }
try { try {
@ -69,11 +69,11 @@ namespace platform {
} }
} else { } else {
if (line.size() > 0) { if (line.size() > 0) {
messageError("option " + get<string>(option) + " doesn't accept values"); messageError("option " + std::get<std::string>(option) + " doesn't accept values");
break; break;
} }
} }
command = get<char>(option); command = std::get<char>(option);
finished = true; finished = true;
break; break;
} }

View File

@ -3,17 +3,16 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <tuple> #include <tuple>
using namespace std;
namespace platform { namespace platform {
class CommandParser { class CommandParser {
public: public:
CommandParser() = default; CommandParser() = default;
pair<char, int> parse(const string& color, const vector<tuple<string, char, bool>>& options, const char defaultCommand, const int maxIndex); std::pair<char, int> parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex);
char getCommand() const { return command; }; char getCommand() const { return command; };
int getIndex() const { return index; }; int getIndex() const { return index; };
private: private:
void messageError(const string& message); void messageError(const std::string& message);
char command; char command;
int index; int index;
}; };

View File

@ -5,20 +5,20 @@ namespace platform {
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType) Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{ {
} }
string Dataset::getName() const std::string Dataset::getName() const
{ {
return name; return name;
} }
string Dataset::getClassName() const std::string Dataset::getClassName() const
{ {
return className; return className;
} }
vector<string> Dataset::getFeatures() const std::vector<std::string> Dataset::getFeatures() const
{ {
if (loaded) { if (loaded) {
return features; return features;
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
int Dataset::getNFeatures() const int Dataset::getNFeatures() const
@ -26,7 +26,7 @@ namespace platform {
if (loaded) { if (loaded) {
return n_features; return n_features;
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
int Dataset::getNSamples() const int Dataset::getNSamples() const
@ -34,31 +34,31 @@ namespace platform {
if (loaded) { if (loaded) {
return n_samples; return n_samples;
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
map<string, vector<int>> Dataset::getStates() const std::map<std::string, std::vector<int>> Dataset::getStates() const
{ {
if (loaded) { if (loaded) {
return states; return states;
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
pair<vector<vector<float>>&, vector<int>&> Dataset::getVectors() pair<std::vector<std::vector<float>>&, std::vector<int>&> Dataset::getVectors()
{ {
if (loaded) { if (loaded) {
return { Xv, yv }; return { Xv, yv };
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
pair<vector<vector<int>>&, vector<int>&> Dataset::getVectorsDiscretized() pair<std::vector<std::vector<int>>&, std::vector<int>&> Dataset::getVectorsDiscretized()
{ {
if (loaded) { if (loaded) {
return { Xd, yv }; return { Xd, yv };
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors() pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
@ -67,22 +67,22 @@ namespace platform {
buildTensors(); buildTensors();
return { X, y }; return { X, y };
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
void Dataset::load_csv() void Dataset::load_csv()
{ {
ifstream file(path + "/" + name + ".csv"); ifstream file(path + "/" + name + ".csv");
if (file.is_open()) { if (file.is_open()) {
string line; std::string line;
getline(file, line); getline(file, line);
vector<string> tokens = split(line, ','); std::vector<std::string> tokens = split(line, ',');
features = vector<string>(tokens.begin(), tokens.end() - 1); features = std::vector<std::string>(tokens.begin(), tokens.end() - 1);
if (className == "-1") { if (className == "-1") {
className = tokens.back(); className = tokens.back();
} }
for (auto i = 0; i < features.size(); ++i) { for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(vector<float>()); Xv.push_back(std::vector<float>());
} }
while (getline(file, line)) { while (getline(file, line)) {
tokens = split(line, ','); tokens = split(line, ',');
@ -93,17 +93,17 @@ namespace platform {
} }
file.close(); file.close();
} else { } else {
throw invalid_argument("Unable to open dataset file."); throw std::invalid_argument("Unable to open dataset file.");
} }
} }
void Dataset::computeStates() void Dataset::computeStates()
{ {
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1); states[features[i]] = std::vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
auto item = states.at(features[i]); auto item = states.at(features[i]);
iota(begin(item), end(item), 0); iota(begin(item), end(item), 0);
} }
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1); states[className] = std::vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0); iota(begin(states.at(className)), end(states.at(className)), 0);
} }
void Dataset::load_arff() void Dataset::load_arff()
@ -118,12 +118,12 @@ namespace platform {
auto attributes = arff.getAttributes(); auto attributes = arff.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; }); transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
} }
vector<string> tokenize(string line) std::vector<std::string> tokenize(std::string line)
{ {
vector<string> tokens; std::vector<std::string> tokens;
for (auto i = 0; i < line.size(); ++i) { for (auto i = 0; i < line.size(); ++i) {
if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') { if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
string token = line.substr(0, i); std::string token = line.substr(0, i);
tokens.push_back(token); tokens.push_back(token);
line.erase(line.begin(), line.begin() + i + 1); line.erase(line.begin(), line.begin() + i + 1);
i = 0; i = 0;
@ -140,16 +140,16 @@ namespace platform {
{ {
ifstream file(path + "/" + name + "_R.dat"); ifstream file(path + "/" + name + "_R.dat");
if (file.is_open()) { if (file.is_open()) {
string line; std::string line;
getline(file, line); getline(file, line);
line = ArffFiles::trim(line); line = ArffFiles::trim(line);
vector<string> tokens = tokenize(line); std::vector<std::string> tokens = tokenize(line);
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); }); transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
if (className == "-1") { if (className == "-1") {
className = ArffFiles::trim(tokens.back()); className = ArffFiles::trim(tokens.back());
} }
for (auto i = 0; i < features.size(); ++i) { for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(vector<float>()); Xv.push_back(std::vector<float>());
} }
while (getline(file, line)) { while (getline(file, line)) {
tokens = tokenize(line); tokens = tokenize(line);
@ -162,7 +162,7 @@ namespace platform {
} }
file.close(); file.close();
} else { } else {
throw invalid_argument("Unable to open dataset file."); throw std::invalid_argument("Unable to open dataset file.");
} }
} }
void Dataset::load() void Dataset::load()
@ -201,9 +201,9 @@ namespace platform {
} }
y = torch::tensor(yv, torch::kInt32); y = torch::tensor(yv, torch::kInt32);
} }
vector<mdlp::labels_t> Dataset::discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y) std::vector<mdlp::labels_t> Dataset::discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{ {
vector<mdlp::labels_t> Xd; std::vector<mdlp::labels_t> Xd;
auto fimdlp = mdlp::CPPFImdlp(); auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) { for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y); fimdlp.fit(X[i], y);

View File

@ -7,12 +7,10 @@
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
#include "Utils.h" #include "Utils.h"
namespace platform { namespace platform {
using namespace std;
enum fileType_t { CSV, ARFF, RDATA }; enum fileType_t { CSV, ARFF, RDATA };
class SourceData { class SourceData {
public: public:
SourceData(string source) SourceData(std::string source)
{ {
if (source == "Surcov") { if (source == "Surcov") {
path = "datasets/"; path = "datasets/";
@ -24,10 +22,10 @@ namespace platform {
path = "data/"; path = "data/";
fileType = RDATA; fileType = RDATA;
} else { } else {
throw invalid_argument("Unknown source."); throw std::invalid_argument("Unknown source.");
} }
} }
string getPath() std::string getPath()
{ {
return path; return path;
} }
@ -36,40 +34,40 @@ namespace platform {
return fileType; return fileType;
} }
private: private:
string path; std::string path;
fileType_t fileType; fileType_t fileType;
}; };
class Dataset { class Dataset {
private: private:
string path; std::string path;
string name; std::string name;
fileType_t fileType; fileType_t fileType;
string className; std::string className;
int n_samples{ 0 }, n_features{ 0 }; int n_samples{ 0 }, n_features{ 0 };
vector<string> features; std::vector<std::string> features;
map<string, vector<int>> states; std::map<std::string, std::vector<int>> states;
bool loaded; bool loaded;
bool discretize; bool discretize;
torch::Tensor X, y; torch::Tensor X, y;
vector<vector<float>> Xv; std::vector<std::vector<float>> Xv;
vector<vector<int>> Xd; std::vector<std::vector<int>> Xd;
vector<int> yv; std::vector<int> yv;
void buildTensors(); void buildTensors();
void load_csv(); void load_csv();
void load_arff(); void load_arff();
void load_rdata(); void load_rdata();
void computeStates(); void computeStates();
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y); std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
public: public:
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&); explicit Dataset(const Dataset&);
string getName() const; std::string getName() const;
string getClassName() const; std::string getClassName() const;
vector<string> getFeatures() const; std::vector<string> getFeatures() const;
map<string, vector<int>> getStates() const; std::map<std::string, std::vector<int>> getStates() const;
pair<vector<vector<float>>&, vector<int>&> getVectors(); std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(); std::pair<vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized();
pair<torch::Tensor&, torch::Tensor&> getTensors(); std::pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures() const; int getNFeatures() const;
int getNSamples() const; int getNSamples() const;
void load(); void load();

View File

@ -8,14 +8,14 @@ namespace platform {
path = sd.getPath(); path = sd.getPath();
ifstream catalog(path + "all.txt"); ifstream catalog(path + "all.txt");
if (catalog.is_open()) { if (catalog.is_open()) {
string line; std::string line;
while (getline(catalog, line)) { while (getline(catalog, line)) {
if (line.empty() || line[0] == '#') { if (line.empty() || line[0] == '#') {
continue; continue;
} }
vector<string> tokens = split(line, ','); std::vector<std::string> tokens = split(line, ',');
string name = tokens[0]; std::string name = tokens[0];
string className; std::string className;
if (tokens.size() == 1) { if (tokens.size() == 1) {
className = "-1"; className = "-1";
} else { } else {
@ -25,32 +25,32 @@ namespace platform {
} }
catalog.close(); catalog.close();
} else { } else {
throw invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]"); throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]");
} }
} }
vector<string> Datasets::getNames() std::vector<std::string> Datasets::getNames()
{ {
vector<string> result; std::vector<std::string> result;
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; }); transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result; return result;
} }
vector<string> Datasets::getFeatures(const string& name) const std::vector<std::string> Datasets::getFeatures(const std::string& name) const
{ {
if (datasets.at(name)->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getFeatures(); return datasets.at(name)->getFeatures();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
map<string, vector<int>> Datasets::getStates(const string& name) const map<std::string, std::vector<int>> Datasets::getStates(const std::string& name) const
{ {
if (datasets.at(name)->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getStates(); return datasets.at(name)->getStates();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
void Datasets::loadDataset(const string& name) const void Datasets::loadDataset(const std::string& name) const
{ {
if (datasets.at(name)->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return; return;
@ -58,23 +58,23 @@ namespace platform {
datasets.at(name)->load(); datasets.at(name)->load();
} }
} }
string Datasets::getClassName(const string& name) const std::string Datasets::getClassName(const std::string& name) const
{ {
if (datasets.at(name)->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getClassName(); return datasets.at(name)->getClassName();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
int Datasets::getNSamples(const string& name) const int Datasets::getNSamples(const std::string& name) const
{ {
if (datasets.at(name)->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getNSamples(); return datasets.at(name)->getNSamples();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
int Datasets::getNClasses(const string& name) int Datasets::getNClasses(const std::string& name)
{ {
if (datasets.at(name)->isLoaded()) { if (datasets.at(name)->isLoaded()) {
auto className = datasets.at(name)->getClassName(); auto className = datasets.at(name)->getClassName();
@ -83,46 +83,46 @@ namespace platform {
return states.at(className).size(); return states.at(className).size();
} }
auto [Xv, yv] = getVectors(name); auto [Xv, yv] = getVectors(name);
return *max_element(yv.begin(), yv.end()) + 1; return *std::max_element(yv.begin(), yv.end()) + 1;
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
vector<int> Datasets::getClassesCounts(const string& name) const std::vector<int> Datasets::getClassesCounts(const std::string& name) const
{ {
if (datasets.at(name)->isLoaded()) { if (datasets.at(name)->isLoaded()) {
auto [Xv, yv] = datasets.at(name)->getVectors(); auto [Xv, yv] = datasets.at(name)->getVectors();
vector<int> counts(*max_element(yv.begin(), yv.end()) + 1); std::vector<int> counts(*std::max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) { for (auto y : yv) {
counts[y]++; counts[y]++;
} }
return counts; return counts;
} else { } else {
throw invalid_argument("Dataset not loaded."); throw std::invalid_argument("Dataset not loaded.");
} }
} }
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(const string& name) pair<std::vector<std::vector<float>>&, std::vector<int>&> Datasets::getVectors(const std::string& name)
{ {
if (!datasets[name]->isLoaded()) { if (!datasets[name]->isLoaded()) {
datasets[name]->load(); datasets[name]->load();
} }
return datasets[name]->getVectors(); return datasets[name]->getVectors();
} }
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(const string& name) pair<std::vector<std::vector<int>>&, std::vector<int>&> Datasets::getVectorsDiscretized(const std::string& name)
{ {
if (!datasets[name]->isLoaded()) { if (!datasets[name]->isLoaded()) {
datasets[name]->load(); datasets[name]->load();
} }
return datasets[name]->getVectorsDiscretized(); return datasets[name]->getVectorsDiscretized();
} }
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const string& name) pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const std::string& name)
{ {
if (!datasets[name]->isLoaded()) { if (!datasets[name]->isLoaded()) {
datasets[name]->load(); datasets[name]->load();
} }
return datasets[name]->getTensors(); return datasets[name]->getTensors();
} }
bool Datasets::isDataset(const string& name) const bool Datasets::isDataset(const std::string& name) const
{ {
return datasets.find(name) != datasets.end(); return datasets.find(name) != datasets.end();
} }

View File

@ -2,29 +2,28 @@
#define DATASETS_H #define DATASETS_H
#include "Dataset.h" #include "Dataset.h"
namespace platform { namespace platform {
using namespace std;
class Datasets { class Datasets {
private: private:
string path; std::string path;
fileType_t fileType; fileType_t fileType;
string sfileType; std::string sfileType;
map<string, unique_ptr<Dataset>> datasets; std::map<std::string, std::unique_ptr<Dataset>> datasets;
bool discretize; bool discretize;
void load(); // Loads the list of datasets void load(); // Loads the list of datasets
public: public:
explicit Datasets(bool discretize, string sfileType) : discretize(discretize), sfileType(sfileType) { load(); }; explicit Datasets(bool discretize, std::string sfileType) : discretize(discretize), sfileType(sfileType) { load(); };
vector<string> getNames(); std::vector<string> getNames();
vector<string> getFeatures(const string& name) const; std::vector<string> getFeatures(const std::string& name) const;
int getNSamples(const string& name) const; int getNSamples(const std::string& name) const;
string getClassName(const string& name) const; std::string getClassName(const std::string& name) const;
int getNClasses(const string& name); int getNClasses(const std::string& name);
vector<int> getClassesCounts(const string& name) const; std::vector<int> getClassesCounts(const std::string& name) const;
map<string, vector<int>> getStates(const string& name) const; std::map<std::string, std::vector<int>> getStates(const std::string& name) const;
pair<vector<vector<float>>&, vector<int>&> getVectors(const string& name); std::pair<std::vector<std::vector<float>>&, std::vector<int>&> getVectors(const std::string& name);
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(const string& name); std::pair<std::vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized(const std::string& name);
pair<torch::Tensor&, torch::Tensor&> getTensors(const string& name); std::pair<torch::Tensor&, torch::Tensor&> getTensors(const std::string& name);
bool isDataset(const string& name) const; bool isDataset(const std::string& name) const;
void loadDataset(const string& name) const; void loadDataset(const std::string& name) const;
}; };
}; };

View File

@ -26,7 +26,7 @@ namespace platform {
{ {
return workbook; return workbook;
} }
void ExcelFile::setProperties(string title) void ExcelFile::setProperties(std::string title)
{ {
char line[title.size() + 1]; char line[title.size() + 1];
strcpy(line, title.c_str()); strcpy(line, title.c_str());
@ -40,34 +40,34 @@ namespace platform {
}; };
workbook_set_properties(workbook, &properties); workbook_set_properties(workbook, &properties);
} }
lxw_format* ExcelFile::efectiveStyle(const string& style) lxw_format* ExcelFile::efectiveStyle(const std::string& style)
{ {
lxw_format* efectiveStyle = NULL; lxw_format* efectiveStyle = NULL;
if (style != "") { if (style != "") {
string suffix = row % 2 ? "_odd" : "_even"; std::string suffix = row % 2 ? "_odd" : "_even";
try { try {
efectiveStyle = styles.at(style + suffix); efectiveStyle = styles.at(style + suffix);
} }
catch (const out_of_range& oor) { catch (const std::out_of_range& oor) {
try { try {
efectiveStyle = styles.at(style); efectiveStyle = styles.at(style);
} }
catch (const out_of_range& oor) { catch (const std::out_of_range& oor) {
throw invalid_argument("Style " + style + " not found"); throw std::invalid_argument("Style " + style + " not found");
} }
} }
} }
return efectiveStyle; return efectiveStyle;
} }
void ExcelFile::writeString(int row, int col, const string& text, const string& style) void ExcelFile::writeString(int row, int col, const std::string& text, const std::string& style)
{ {
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style)); worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
} }
void ExcelFile::writeInt(int row, int col, const int number, const string& style) void ExcelFile::writeInt(int row, int col, const int number, const std::string& style)
{ {
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style)); worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
} }
void ExcelFile::writeDouble(int row, int col, const double number, const string& style) void ExcelFile::writeDouble(int row, int col, const double number, const std::string& style)
{ {
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style)); worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
} }
@ -76,7 +76,7 @@ namespace platform {
uint32_t efectiveColor = odd ? colorEven : colorOdd; uint32_t efectiveColor = odd ? colorEven : colorOdd;
format_set_bg_color(style, lxw_color_t(efectiveColor)); format_set_bg_color(style, lxw_color_t(efectiveColor));
} }
void ExcelFile::createStyle(const string& name, lxw_format* style, bool odd) void ExcelFile::createStyle(const std::string& name, lxw_format* style, bool odd)
{ {
addColor(style, odd); addColor(style, odd);
if (name == "textCentered") { if (name == "textCentered") {
@ -116,7 +116,7 @@ namespace platform {
{ {
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" }; auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
lxw_format* style; lxw_format* style;
for (string name : styleNames) { for (std::string name : styleNames) {
lxw_format* style = workbook_add_format(workbook); lxw_format* style = workbook_add_format(workbook);
style = workbook_add_format(workbook); style = workbook_add_format(workbook);
createStyle(name, style, true); createStyle(name, style, true);

View File

@ -5,14 +5,13 @@
#include <map> #include <map>
#include "xlsxwriter.h" #include "xlsxwriter.h"
using namespace std;
namespace platform { namespace platform {
struct separated : numpunct<char> { struct separated : std::numpunct<char> {
char do_decimal_point() const { return ','; } char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; } char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; } std::string do_grouping() const { return "\03"; }
}; };
class ExcelFile { class ExcelFile {
public: public:
@ -21,17 +20,17 @@ namespace platform {
ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet); ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet);
lxw_workbook* getWorkbook(); lxw_workbook* getWorkbook();
protected: protected:
void setProperties(string title); void setProperties(std::string title);
void writeString(int row, int col, const string& text, const string& style = ""); void writeString(int row, int col, const std::string& text, const std::string& style = "");
void writeInt(int row, int col, const int number, const string& style = ""); void writeInt(int row, int col, const int number, const std::string& style = "");
void writeDouble(int row, int col, const double number, const string& style = ""); void writeDouble(int row, int col, const double number, const std::string& style = "");
void createFormats(); void createFormats();
void createStyle(const string& name, lxw_format* style, bool odd); void createStyle(const std::string& name, lxw_format* style, bool odd);
void addColor(lxw_format* style, bool odd); void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const string& name); lxw_format* efectiveStyle(const std::string& name);
lxw_workbook* workbook; lxw_workbook* workbook;
lxw_worksheet* worksheet; lxw_worksheet* worksheet;
map<string, lxw_format*> styles; std::map<std::string, lxw_format*> styles;
int row; int row;
int normalSize; //font size for report body int normalSize; //font size for report body
uint32_t colorTitle; uint32_t colorTitle;

View File

@ -6,7 +6,7 @@
#include "Paths.h" #include "Paths.h"
namespace platform { namespace platform {
using json = nlohmann::json; using json = nlohmann::json;
string get_date() std::string get_date()
{ {
time_t rawtime; time_t rawtime;
tm* timeinfo; tm* timeinfo;
@ -16,7 +16,7 @@ namespace platform {
oss << std::put_time(timeinfo, "%Y-%m-%d"); oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str(); return oss.str();
} }
string get_time() std::string get_time()
{ {
time_t rawtime; time_t rawtime;
tm* timeinfo; tm* timeinfo;
@ -27,9 +27,9 @@ namespace platform {
return oss.str(); return oss.str();
} }
Experiment::Experiment() : hyperparameters(json::parse("{}")) {} Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
string Experiment::get_file_name() std::string Experiment::get_file_name()
{ {
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json"; std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
return result; return result;
} }
@ -81,7 +81,7 @@ namespace platform {
} }
return result; return result;
} }
void Experiment::save(const string& path) void Experiment::save(const std::string& path)
{ {
json data = build_json(); json data = build_json();
ofstream file(path + "/" + get_file_name()); ofstream file(path + "/" + get_file_name());
@ -99,20 +99,20 @@ namespace platform {
void Experiment::show() void Experiment::show()
{ {
json data = build_json(); json data = build_json();
cout << data.dump(4) << endl; std::cout << data.dump(4) << std::endl;
} }
void Experiment::go(vector<string> filesToProcess, bool quiet) void Experiment::go(std::vector<std::string> filesToProcess, bool quiet)
{ {
cout << "*** Starting experiment: " << title << " ***" << endl; std::cout << "*** Starting experiment: " << title << " ***" << std::endl;
for (auto fileName : filesToProcess) { for (auto fileName : filesToProcess) {
cout << "- " << setw(20) << left << fileName << " " << right << flush; std::cout << "- " << setw(20) << left << fileName << " " << right << flush;
cross_validation(fileName, quiet); cross_validation(fileName, quiet);
cout << endl; std::cout << std::endl;
} }
} }
string getColor(bayesnet::status_t status) std::string getColor(bayesnet::status_t status)
{ {
switch (status) { switch (status) {
case bayesnet::NORMAL: case bayesnet::NORMAL:
@ -126,13 +126,13 @@ namespace platform {
} }
} }
void showProgress(int fold, const string& color, const string& phase) void showProgress(int fold, const std::string& color, const std::string& phase)
{ {
string prefix = phase == "a" ? "" : "\b\b\b\b"; std::string prefix = phase == "a" ? "" : "\b\b\b\b";
cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush; std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
} }
void Experiment::cross_validation(const string& fileName, bool quiet) void Experiment::cross_validation(const std::string& fileName, bool quiet)
{ {
auto datasets = platform::Datasets(discretized, Paths::datasets()); auto datasets = platform::Datasets(discretized, Paths::datasets());
// Get dataset // Get dataset
@ -142,14 +142,14 @@ namespace platform {
auto samples = datasets.getNSamples(fileName); auto samples = datasets.getNSamples(fileName);
auto className = datasets.getClassName(fileName); auto className = datasets.getClassName(fileName);
if (!quiet) { if (!quiet) {
cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush; std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
} }
// Prepare Result // Prepare Result
auto result = Result(); auto result = Result();
auto [values, counts] = at::_unique(y); auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0)); result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters); result.setHyperparameters(hyperparameters);
// Initialize results vectors // Initialize results std::vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size()); int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64); auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64); auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
@ -162,7 +162,7 @@ namespace platform {
int item = 0; int item = 0;
for (auto seed : randomSeeds) { for (auto seed : randomSeeds) {
if (!quiet) if (!quiet)
cout << "(" << seed << ") doing Fold: " << flush; std::cout << "(" << seed << ") doing Fold: " << flush;
Fold* fold; Fold* fold;
if (stratified) if (stratified)
fold = new StratifiedKFold(nfolds, y, seed); fold = new StratifiedKFold(nfolds, y, seed);
@ -204,17 +204,16 @@ namespace platform {
accuracy_train[item] = accuracy_train_value; accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value; accuracy_test[item] = accuracy_test_value;
if (!quiet) if (!quiet)
cout << "\b\b\b, " << flush; std::cout << "\b\b\b, " << flush;
// Store results and times in vector // Store results and times in std::vector
result.addScoreTrain(accuracy_train_value); result.addScoreTrain(accuracy_train_value);
result.addScoreTest(accuracy_test_value); result.addScoreTest(accuracy_test_value);
result.addTimeTrain(train_time[item].item<double>()); result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>()); result.addTimeTest(test_time[item].item<double>());
item++; item++;
clf.reset();
} }
if (!quiet) if (!quiet)
cout << "end. " << flush; std::cout << "end. " << flush;
delete fold; delete fold;
} }
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>()); result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());

View File

@ -10,34 +10,33 @@
#include "KDB.h" #include "KDB.h"
#include "AODE.h" #include "AODE.h"
using namespace std;
namespace platform { namespace platform {
using json = nlohmann::json; using json = nlohmann::json;
class Timer { class Timer {
private: private:
chrono::high_resolution_clock::time_point begin; std::chrono::high_resolution_clock::time_point begin;
public: public:
Timer() = default; Timer() = default;
~Timer() = default; ~Timer() = default;
void start() { begin = chrono::high_resolution_clock::now(); } void start() { begin = std::chrono::high_resolution_clock::now(); }
double getDuration() double getDuration()
{ {
chrono::high_resolution_clock::time_point end = chrono::high_resolution_clock::now(); std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
chrono::duration<double> time_span = chrono::duration_cast<chrono::duration<double>>(end - begin); std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (end - begin);
return time_span.count(); return time_span.count();
} }
}; };
class Result { class Result {
private: private:
string dataset, model_version; std::string dataset, model_version;
json hyperparameters; json hyperparameters;
int samples{ 0 }, features{ 0 }, classes{ 0 }; int samples{ 0 }, features{ 0 }, classes{ 0 };
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 }; double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
float nodes{ 0 }, leaves{ 0 }, depth{ 0 }; float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
vector<double> scores_train, scores_test, times_train, times_test; std::vector<double> scores_train, scores_test, times_train, times_test;
public: public:
Result() = default; Result() = default;
Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; } Result& setDataset(const std::string& dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; } Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; } Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; } Result& setFeatures(int features) { this->features = features; return *this; }
@ -59,7 +58,7 @@ namespace platform {
Result& addTimeTest(double time) { times_test.push_back(time); return *this; } Result& addTimeTest(double time) { times_test.push_back(time); return *this; }
const float get_score_train() const { return score_train; } const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; } float get_score_test() { return score_test; }
const string& getDataset() const { return dataset; } const std::string& getDataset() const { return dataset; }
const json& getHyperparameters() const { return hyperparameters; } const json& getHyperparameters() const { return hyperparameters; }
const int getSamples() const { return samples; } const int getSamples() const { return samples; }
const int getFeatures() const { return features; } const int getFeatures() const { return features; }
@ -75,30 +74,30 @@ namespace platform {
const float getNodes() const { return nodes; } const float getNodes() const { return nodes; }
const float getLeaves() const { return leaves; } const float getLeaves() const { return leaves; }
const float getDepth() const { return depth; } const float getDepth() const { return depth; }
const vector<double>& getScoresTrain() const { return scores_train; } const std::vector<double>& getScoresTrain() const { return scores_train; }
const vector<double>& getScoresTest() const { return scores_test; } const std::vector<double>& getScoresTest() const { return scores_test; }
const vector<double>& getTimesTrain() const { return times_train; } const std::vector<double>& getTimesTrain() const { return times_train; }
const vector<double>& getTimesTest() const { return times_test; } const std::vector<double>& getTimesTest() const { return times_test; }
}; };
class Experiment { class Experiment {
private: private:
string title, model, platform, score_name, model_version, language_version, language; std::string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false }; bool discretized{ false }, stratified{ false };
vector<Result> results; std::vector<Result> results;
vector<int> randomSeeds; std::vector<int> randomSeeds;
json hyperparameters = "{}"; json hyperparameters = "{}";
int nfolds{ 0 }; int nfolds{ 0 };
float duration{ 0 }; float duration{ 0 };
json build_json(); json build_json();
public: public:
Experiment(); Experiment();
Experiment& setTitle(const string& title) { this->title = title; return *this; } Experiment& setTitle(const std::string& title) { this->title = title; return *this; }
Experiment& setModel(const string& model) { this->model = model; return *this; } Experiment& setModel(const std::string& model) { this->model = model; return *this; }
Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; } Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; }
Experiment& setScoreName(const string& score_name) { this->score_name = score_name; return *this; } Experiment& setScoreName(const std::string& score_name) { this->score_name = score_name; return *this; }
Experiment& setModelVersion(const string& model_version) { this->model_version = model_version; return *this; } Experiment& setModelVersion(const std::string& model_version) { this->model_version = model_version; return *this; }
Experiment& setLanguage(const string& language) { this->language = language; return *this; } Experiment& setLanguage(const std::string& language) { this->language = language; return *this; }
Experiment& setLanguageVersion(const string& language_version) { this->language_version = language_version; return *this; } Experiment& setLanguageVersion(const std::string& language_version) { this->language_version = language_version; return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; } Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; } Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; } Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
@ -106,10 +105,10 @@ namespace platform {
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; } Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; } Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; } Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
string get_file_name(); std::string get_file_name();
void save(const string& path); void save(const std::string& path);
void cross_validation(const string& fileName, bool quiet); void cross_validation(const std::string& fileName, bool quiet);
void go(vector<string> filesToProcess, bool quiet); void go(std::vector<std::string> filesToProcess, bool quiet);
void show(); void show();
void report(); void report();
}; };

View File

@ -4,23 +4,23 @@
namespace platform { namespace platform {
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed) Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
{ {
random_device rd; std::random_device rd;
random_seed = default_random_engine(seed == -1 ? rd() : seed); random_seed = std::default_random_engine(seed == -1 ? rd() : seed);
srand(seed == -1 ? time(0) : seed); std::srand(seed == -1 ? time(0) : seed);
} }
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector<int>(n)) KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(std::vector<int>(n))
{ {
iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1 std::iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
shuffle(indices.begin(), indices.end(), random_seed); shuffle(indices.begin(), indices.end(), random_seed);
} }
pair<vector<int>, vector<int>> KFold::getFold(int nFold) std::pair<std::vector<int>, std::vector<int>> KFold::getFold(int nFold)
{ {
if (nFold >= k || nFold < 0) { if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")"); throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")");
} }
int nTest = n / k; int nTest = n / k;
auto train = vector<int>(); auto train = std::vector<int>();
auto test = vector<int>(); auto test = std::vector<int>();
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
if (i >= nTest * nFold && i < nTest * (nFold + 1)) { if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
test.push_back(indices[i]); test.push_back(indices[i]);
@ -33,10 +33,10 @@ namespace platform {
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed) StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{ {
n = y.numel(); n = y.numel();
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n); this->y = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build(); build();
} }
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed) StratifiedKFold::StratifiedKFold(int k, const std::vector<int>& y, int seed)
: Fold(k, y.size(), seed) : Fold(k, y.size(), seed)
{ {
this->y = y; this->y = y;
@ -45,12 +45,12 @@ namespace platform {
} }
void StratifiedKFold::build() void StratifiedKFold::build()
{ {
stratified_indices = vector<vector<int>>(k); stratified_indices = std::vector<std::vector<int>>(k);
int fold_size = n / k; int fold_size = n / k;
// Compute class counts and indices // Compute class counts and indices
auto class_indices = map<int, vector<int>>(); auto class_indices = std::map<int, std::vector<int>>();
vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0); std::vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
for (auto i = 0; i < n; ++i) { for (auto i = 0; i < n; ++i) {
class_counts[y[i]]++; class_counts[y[i]]++;
class_indices[y[i]].push_back(i); class_indices[y[i]].push_back(i);
@ -63,8 +63,8 @@ namespace platform {
for (auto label = 0; label < class_counts.size(); ++label) { for (auto label = 0; label < class_counts.size(); ++label) {
auto num_samples_to_take = class_counts.at(label) / k; auto num_samples_to_take = class_counts.at(label) / k;
if (num_samples_to_take == 0) { if (num_samples_to_take == 0) {
cerr << "Warning! The number of samples in class " << label << " (" << class_counts.at(label) std::cerr << "Warning! The number of samples in class " << label << " (" << class_counts.at(label)
<< ") is less than the number of folds (" << k << ")." << endl; << ") is less than the number of folds (" << k << ")." << std::endl;
faulty = true; faulty = true;
continue; continue;
} }
@ -74,7 +74,7 @@ namespace platform {
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ## move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
class_indices[label].erase(class_indices[label].begin(), it); class_indices[label].erase(class_indices[label].begin(), it);
} }
auto chosen = vector<bool>(k, false); auto chosen = std::vector<bool>(k, false);
while (remainder_samples_to_take > 0) { while (remainder_samples_to_take > 0) {
int fold = (rand() % static_cast<int>(k)); int fold = (rand() % static_cast<int>(k));
if (chosen.at(fold)) { if (chosen.at(fold)) {
@ -88,13 +88,13 @@ namespace platform {
} }
} }
} }
pair<vector<int>, vector<int>> StratifiedKFold::getFold(int nFold) std::pair<std::vector<int>, std::vector<int>> StratifiedKFold::getFold(int nFold)
{ {
if (nFold >= k || nFold < 0) { if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")"); throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")");
} }
vector<int> test_indices = stratified_indices[nFold]; std::vector<int> test_indices = stratified_indices[nFold];
vector<int> train_indices; std::vector<int> train_indices;
for (int i = 0; i < k; ++i) { for (int i = 0; i < k; ++i) {
if (i == nFold) continue; if (i == nFold) continue;
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end()); train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());

View File

@ -3,37 +3,36 @@
#include <torch/torch.h> #include <torch/torch.h>
#include <vector> #include <vector>
#include <random> #include <random>
using namespace std;
namespace platform { namespace platform {
class Fold { class Fold {
protected: protected:
int k; int k;
int n; int n;
int seed; int seed;
default_random_engine random_seed; std::default_random_engine random_seed;
public: public:
Fold(int k, int n, int seed = -1); Fold(int k, int n, int seed = -1);
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0; virtual std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default; virtual ~Fold() = default;
int getNumberOfFolds() { return k; } int getNumberOfFolds() { return k; }
}; };
class KFold : public Fold { class KFold : public Fold {
private: private:
vector<int> indices; std::vector<int> indices;
public: public:
KFold(int k, int n, int seed = -1); KFold(int k, int n, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override; std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) override;
}; };
class StratifiedKFold : public Fold { class StratifiedKFold : public Fold {
private: private:
vector<int> y; std::vector<int> y;
vector<vector<int>> stratified_indices; std::vector<std::vector<int>> stratified_indices;
void build(); void build();
bool faulty = false; // Only true if the number of samples of any class is less than the number of folds. bool faulty = false; // Only true if the number of samples of any class is less than the number of folds.
public: public:
StratifiedKFold(int k, const vector<int>& y, int seed = -1); StratifiedKFold(int k, const std::vector<int>& y, int seed = -1);
StratifiedKFold(int k, torch::Tensor& y, int seed = -1); StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override; std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) override;
bool isFaulty() { return faulty; } bool isFaulty() { return faulty; }
}; };
} }

View File

@ -10,7 +10,7 @@
namespace platform { namespace platform {
ManageResults::ManageResults(int numFiles, const string& model, const string& score, bool complete, bool partial, bool compare) : ManageResults::ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare) :
numFiles{ numFiles }, complete{ complete }, partial{ partial }, compare{ compare }, results(Results(Paths::results(), model, score, complete, partial)) numFiles{ numFiles }, complete{ complete }, partial{ partial }, compare{ compare }, results(Results(Paths::results(), model, score, complete, partial))
{ {
indexList = true; indexList = true;
@ -23,7 +23,7 @@ namespace platform {
void ManageResults::doMenu() void ManageResults::doMenu()
{ {
if (results.empty()) { if (results.empty()) {
cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << endl; std::cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl;
return; return;
} }
results.sortDate(); results.sortDate();
@ -32,68 +32,68 @@ namespace platform {
if (openExcel) { if (openExcel) {
workbook_close(workbook); workbook_close(workbook);
} }
cout << Colors::RESET() << "Done!" << endl; std::cout << Colors::RESET() << "Done!" << std::endl;
} }
void ManageResults::list() void ManageResults::list()
{ {
auto temp = ConfigLocale(); auto temp = ConfigLocale();
string suffix = numFiles != results.size() ? " of " + to_string(results.size()) : ""; std::string suffix = numFiles != results.size() ? " of " + std::to_string(results.size()) : "";
stringstream oss; std::stringstream oss;
oss << "Results on screen: " << numFiles << suffix; oss << "Results on screen: " << numFiles << suffix;
cout << Colors::GREEN() << oss.str() << endl; std::cout << Colors::GREEN() << oss.str() << std::endl;
cout << string(oss.str().size(), '-') << endl; std::cout << std::string(oss.str().size(), '-') << std::endl;
if (complete) { if (complete) {
cout << Colors::MAGENTA() << "Only listing complete results" << endl; std::cout << Colors::MAGENTA() << "Only listing complete results" << std::endl;
} }
if (partial) { if (partial) {
cout << Colors::MAGENTA() << "Only listing partial results" << endl; std::cout << Colors::MAGENTA() << "Only listing partial results" << std::endl;
} }
auto i = 0; auto i = 0;
int maxModel = results.maxModelSize(); int maxModel = results.maxModelSize();
cout << Colors::GREEN() << " # Date " << setw(maxModel) << left << "Model" << " Score Name Score C/P Duration Title" << endl; std::cout << Colors::GREEN() << " # Date " << std::setw(maxModel) << std::left << "Model" << " Score Name Score C/P Duration Title" << std::endl;
cout << "=== ========== " << string(maxModel, '=') << " =========== =========== === ========= =============================================================" << endl; std::cout << "=== ========== " << std::string(maxModel, '=') << " =========== =========== === ========= =============================================================" << std::endl;
bool odd = true; bool odd = true;
for (auto& result : results) { for (auto& result : results) {
auto color = odd ? Colors::BLUE() : Colors::CYAN(); auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " "; std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
cout << result.to_string(maxModel) << endl; std::cout << result.to_string(maxModel) << std::endl;
if (i == numFiles) { if (i == numFiles) {
break; break;
} }
odd = !odd; odd = !odd;
} }
} }
bool ManageResults::confirmAction(const string& intent, const string& fileName) const bool ManageResults::confirmAction(const std::string& intent, const std::string& fileName) const
{ {
string color; std::string color;
if (intent == "delete") { if (intent == "delete") {
color = Colors::RED(); color = Colors::RED();
} else { } else {
color = Colors::YELLOW(); color = Colors::YELLOW();
} }
string line; std::string line;
bool finished = false; bool finished = false;
while (!finished) { while (!finished) {
cout << color << "Really want to " << intent << " " << fileName << "? (y/n): "; std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): ";
getline(cin, line); getline(std::cin, line);
finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n')); finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n'));
} }
if (tolower(line[0]) == 'y') { if (tolower(line[0]) == 'y') {
return true; return true;
} }
cout << "Not done!" << endl; std::cout << "Not done!" << std::endl;
return false; return false;
} }
void ManageResults::report(const int index, const bool excelReport) void ManageResults::report(const int index, const bool excelReport)
{ {
cout << Colors::YELLOW() << "Reporting " << results.at(index).getFilename() << endl; std::cout << Colors::YELLOW() << "Reporting " << results.at(index).getFilename() << std::endl;
auto data = results.at(index).load(); auto data = results.at(index).load();
if (excelReport) { if (excelReport) {
ReportExcel reporter(data, compare, workbook); ReportExcel reporter(data, compare, workbook);
reporter.show(); reporter.show();
openExcel = true; openExcel = true;
workbook = reporter.getWorkbook(); workbook = reporter.getWorkbook();
cout << "Adding sheet to " << Paths::excel() + Paths::excelResults() << endl; std::cout << "Adding sheet to " << Paths::excel() + Paths::excelResults() << std::endl;
} else { } else {
ReportConsole reporter(data, compare); ReportConsole reporter(data, compare);
reporter.show(); reporter.show();
@ -103,20 +103,20 @@ namespace platform {
{ {
// Show a dataset result inside a report // Show a dataset result inside a report
auto data = results.at(index).load(); auto data = results.at(index).load();
cout << Colors::YELLOW() << "Showing " << results.at(index).getFilename() << endl; std::cout << Colors::YELLOW() << "Showing " << results.at(index).getFilename() << std::endl;
ReportConsole reporter(data, compare, idx); ReportConsole reporter(data, compare, idx);
reporter.show(); reporter.show();
} }
void ManageResults::sortList() void ManageResults::sortList()
{ {
cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): "; std::cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): ";
string line; std::string line;
char option; char option;
getline(cin, line); getline(std::cin, line);
if (line.size() == 0) if (line.size() == 0)
return; return;
if (line.size() > 1) { if (line.size() > 1) {
cout << "Invalid option" << endl; std::cout << "Invalid option" << std::endl;
return; return;
} }
option = line[0]; option = line[0];
@ -134,7 +134,7 @@ namespace platform {
results.sortModel(); results.sortModel();
break; break;
default: default:
cout << "Invalid option" << endl; std::cout << "Invalid option" << std::endl;
} }
} }
void ManageResults::menu() void ManageResults::menu()
@ -142,9 +142,9 @@ namespace platform {
char option; char option;
int index, subIndex; int index, subIndex;
bool finished = false; bool finished = false;
string filename; std::string filename;
// tuple<Option, digit, requires value> // tuple<Option, digit, requires value>
vector<tuple<string, char, bool>> mainOptions = { std::vector<std::tuple<std::string, char, bool>> mainOptions = {
{"quit", 'q', false}, {"quit", 'q', false},
{"list", 'l', false}, {"list", 'l', false},
{"delete", 'd', true}, {"delete", 'd', true},
@ -153,7 +153,7 @@ namespace platform {
{"report", 'r', true}, {"report", 'r', true},
{"excel", 'e', true} {"excel", 'e', true}
}; };
vector<tuple<string, char, bool>> listOptions = { std::vector<std::tuple<std::string, char, bool>> listOptions = {
{"report", 'r', true}, {"report", 'r', true},
{"list", 'l', false}, {"list", 'l', false},
{"quit", 'q', false} {"quit", 'q', false}
@ -161,9 +161,9 @@ namespace platform {
auto parser = CommandParser(); auto parser = CommandParser();
while (!finished) { while (!finished) {
if (indexList) { if (indexList) {
tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1); std::tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1);
} else { } else {
tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).load()["results"].size() - 1); std::tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).load()["results"].size() - 1);
} }
switch (option) { switch (option) {
case 'q': case 'q':
@ -177,9 +177,9 @@ namespace platform {
filename = results.at(index).getFilename(); filename = results.at(index).getFilename();
if (!confirmAction("delete", filename)) if (!confirmAction("delete", filename))
break; break;
cout << "Deleting " << filename << endl; std::cout << "Deleting " << filename << std::endl;
results.deleteResult(index); results.deleteResult(index);
cout << "File: " + filename + " deleted!" << endl; std::cout << "File: " + filename + " deleted!" << std::endl;
list(); list();
break; break;
case 'h': case 'h':
@ -187,9 +187,9 @@ namespace platform {
if (!confirmAction("hide", filename)) if (!confirmAction("hide", filename))
break; break;
filename = results.at(index).getFilename(); filename = results.at(index).getFilename();
cout << "Hiding " << filename << endl; std::cout << "Hiding " << filename << std::endl;
results.hideResult(index, Paths::hiddenResults()); results.hideResult(index, Paths::hiddenResults());
cout << "File: " + filename + " hidden! (moved to " << Paths::hiddenResults() << ")" << endl; std::cout << "File: " + filename + " hidden! (moved to " << Paths::hiddenResults() << ")" << std::endl;
list(); list();
break; break;
case 's': case 's':

View File

@ -6,12 +6,12 @@
namespace platform { namespace platform {
class ManageResults { class ManageResults {
public: public:
ManageResults(int numFiles, const string& model, const string& score, bool complete, bool partial, bool compare); ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare);
~ManageResults() = default; ~ManageResults() = default;
void doMenu(); void doMenu();
private: private:
void list(); void list();
bool confirmAction(const string& intent, const string& fileName) const; bool confirmAction(const std::string& intent, const std::string& fileName) const;
void report(const int index, const bool excelReport); void report(const int index, const bool excelReport);
void showIndex(const int index, const int idx); void showIndex(const int index, const int idx);
void sortList(); void sortList();

View File

@ -1,6 +1,5 @@
#include "Models.h" #include "Models.h"
namespace platform { namespace platform {
using namespace std;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory // Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
Models* Models::factory = nullptr;; Models* Models::factory = nullptr;;
Models* Models::instance() Models* Models::instance()
@ -10,13 +9,13 @@ namespace platform {
factory = new Models(); factory = new Models();
return factory; return factory;
} }
void Models::registerFactoryFunction(const string& name, void Models::registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction) function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{ {
// register the class factory function // register the class factory function
functionRegistry[name] = classFactoryFunction; functionRegistry[name] = classFactoryFunction;
} }
shared_ptr<bayesnet::BaseClassifier> Models::create(const string& name) shared_ptr<bayesnet::BaseClassifier> Models::create(const std::string& name)
{ {
bayesnet::BaseClassifier* instance = nullptr; bayesnet::BaseClassifier* instance = nullptr;
@ -30,23 +29,22 @@ namespace platform {
else else
return nullptr; return nullptr;
} }
vector<string> Models::getNames() std::vector<std::string> Models::getNames()
{ {
vector<string> names; std::vector<std::string> names;
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names), transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
[](const pair<string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; }); [](const pair<std::string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
return names; return names;
} }
string Models::toString() std::string Models::tostring()
{ {
string result = ""; std::string result = "";
for (const auto& pair : functionRegistry) { for (const auto& pair : functionRegistry) {
result += pair.first + ", "; result += pair.first + ", ";
} }
return "{" + result.substr(0, result.size() - 2) + "}"; return "{" + result.substr(0, result.size() - 2) + "}";
} }
Registrar::Registrar(const std::string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
Registrar::Registrar(const string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{ {
// register the class factory function // register the class factory function
Models::instance()->registerFactoryFunction(name, classFactoryFunction); Models::instance()->registerFactoryFunction(name, classFactoryFunction);

View File

@ -11,10 +11,14 @@
#include "SPODELd.h" #include "SPODELd.h"
#include "AODELd.h" #include "AODELd.h"
#include "BoostAODE.h" #include "BoostAODE.h"
#include "STree.h"
#include "ODTE.h"
#include "SVC.h"
#include "RandomForest.h"
namespace platform { namespace platform {
class Models { class Models {
private: private:
map<string, function<bayesnet::BaseClassifier* (void)>> functionRegistry; map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
static Models* factory; //singleton static Models* factory; //singleton
Models() {}; Models() {};
public: public:
@ -22,16 +26,16 @@ namespace platform {
void operator=(const Models&) = delete; void operator=(const Models&) = delete;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory // Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
static Models* instance(); static Models* instance();
shared_ptr<bayesnet::BaseClassifier> create(const string& name); shared_ptr<bayesnet::BaseClassifier> create(const std::string& name);
void registerFactoryFunction(const string& name, void registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction); function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
vector<string> getNames(); std::vector<string> getNames();
string toString(); std::string tostring();
}; };
class Registrar { class Registrar {
public: public:
Registrar(const string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction); Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
}; };
} }
#endif #endif

View File

@ -7,8 +7,8 @@
namespace platform { namespace platform {
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1) ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
{ {
stringstream oss; std::stringstream oss;
oss << "Better than ZeroR + " << setprecision(1) << fixed << margin * 100 << "%"; oss << "Better than ZeroR + " << std::setprecision(1) << fixed << margin * 100 << "%";
meaning = { meaning = {
{Symbols::equal_best, "Equal to best"}, {Symbols::equal_best, "Equal to best"},
{Symbols::better_best, "Better than best"}, {Symbols::better_best, "Better than best"},
@ -16,10 +16,10 @@ namespace platform {
{Symbols::upward_arrow, oss.str()} {Symbols::upward_arrow, oss.str()}
}; };
} }
string ReportBase::fromVector(const string& key) std::string ReportBase::fromVector(const std::string& key)
{ {
stringstream oss; std::stringstream oss;
string sep = ""; std::string sep = "";
oss << "["; oss << "[";
for (auto& item : data[key]) { for (auto& item : data[key]) {
oss << sep << item.get<double>(); oss << sep << item.get<double>();
@ -28,13 +28,13 @@ namespace platform {
oss << "]"; oss << "]";
return oss.str(); return oss.str();
} }
string ReportBase::fVector(const string& title, const json& data, const int width, const int precision) std::string ReportBase::fVector(const std::string& title, const json& data, const int width, const int precision)
{ {
stringstream oss; std::stringstream oss;
string sep = ""; std::string sep = "";
oss << title << "["; oss << title << "[";
for (const auto& item : data) { for (const auto& item : data) {
oss << sep << fixed << setw(width) << setprecision(precision) << item.get<double>(); oss << sep << fixed << setw(width) << std::setprecision(precision) << item.get<double>();
sep = ", "; sep = ", ";
} }
oss << "]"; oss << "]";
@ -45,25 +45,25 @@ namespace platform {
header(); header();
body(); body();
} }
string ReportBase::compareResult(const string& dataset, double result) std::string ReportBase::compareResult(const std::string& dataset, double result)
{ {
string status = " "; std::string status = " ";
if (compare) { if (compare) {
double best = bestResult(dataset, data["model"].get<string>()); double best = bestResult(dataset, data["model"].get<std::string>());
if (result == best) { if (result == best) {
status = Symbols::equal_best; status = Symbols::equal_best;
} else if (result > best) { } else if (result > best) {
status = Symbols::better_best; status = Symbols::better_best;
} }
} else { } else {
if (data["score_name"].get<string>() == "accuracy") { if (data["score_name"].get<std::string>() == "accuracy") {
auto dt = Datasets(false, Paths::datasets()); auto dt = Datasets(false, Paths::datasets());
dt.loadDataset(dataset); dt.loadDataset(dataset);
auto numClasses = dt.getNClasses(dataset); auto numClasses = dt.getNClasses(dataset);
if (numClasses == 2) { if (numClasses == 2) {
vector<int> distribution = dt.getClassesCounts(dataset); std::vector<int> distribution = dt.getClassesCounts(dataset);
double nSamples = dt.getNSamples(dataset); double nSamples = dt.getNSamples(dataset);
vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end()); std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin); double mark = *maxValue / nSamples * (1 + margin);
if (mark > 1) { if (mark > 1) {
mark = 0.9995; mark = 0.9995;
@ -82,14 +82,14 @@ namespace platform {
} }
return status; return status;
} }
double ReportBase::bestResult(const string& dataset, const string& model) double ReportBase::bestResult(const std::string& dataset, const std::string& model)
{ {
double value = 0.0; double value = 0.0;
if (bestResults.size() == 0) { if (bestResults.size() == 0) {
// try to load the best results // try to load the best results
string score = data["score_name"]; std::string score = data["score_name"];
replace(score.begin(), score.end(), '_', '-'); replace(score.begin(), score.end(), '_', '-');
string fileName = "best_results_" + score + "_" + model + ".json"; std::string fileName = "best_results_" + score + "_" + model + ".json";
ifstream resultData(Paths::results() + "/" + fileName); ifstream resultData(Paths::results() + "/" + fileName);
if (resultData.is_open()) { if (resultData.is_open()) {
bestResults = json::parse(resultData); bestResults = json::parse(resultData);

View File

@ -8,7 +8,6 @@
using json = nlohmann::json; using json = nlohmann::json;
namespace platform { namespace platform {
using namespace std;
class ReportBase { class ReportBase {
public: public:
@ -17,19 +16,19 @@ namespace platform {
void show(); void show();
protected: protected:
json data; json data;
string fromVector(const string& key); std::string fromVector(const std::string& key);
string fVector(const string& title, const json& data, const int width, const int precision); std::string fVector(const std::string& title, const json& data, const int width, const int precision);
bool getExistBestFile(); bool getExistBestFile();
virtual void header() = 0; virtual void header() = 0;
virtual void body() = 0; virtual void body() = 0;
virtual void showSummary() = 0; virtual void showSummary() = 0;
string compareResult(const string& dataset, double result); std::string compareResult(const std::string& dataset, double result);
map<string, int> summary; std::map<std::string, int> summary;
double margin; double margin;
map<string, string> meaning; std::map<std::string, std::string> meaning;
bool compare; bool compare;
private: private:
double bestResult(const string& dataset, const string& model); double bestResult(const std::string& dataset, const std::string& model);
json bestResults; json bestResults;
bool existBestFile = true; bool existBestFile = true;
}; };

View File

@ -6,25 +6,30 @@
#include "CLocale.h" #include "CLocale.h"
namespace platform { namespace platform {
string ReportConsole::headerLine(const string& text, int utf = 0) std::string ReportConsole::headerLine(const std::string& text, int utf = 0)
{ {
int n = MAXL - text.length() - 3; int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n; n = n < 0 ? 0 : n;
return "* " + text + string(n + utf, ' ') + "*\n"; return "* " + text + std::string(n + utf, ' ') + "*\n";
} }
void ReportConsole::header() void ReportConsole::header()
{ {
stringstream oss; std::stringstream oss;
cout << Colors::MAGENTA() << string(MAXL, '*') << endl; std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
cout << headerLine("Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>()); std::cout << headerLine(
cout << headerLine(data["title"].get<string>()); "Report " + data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>()
cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False")); + " with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size())
oss << "Execution took " << setprecision(2) << fixed << data["duration"].get<float>() << " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<string>(); + " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
cout << headerLine(oss.str()); );
cout << headerLine("Score is " + data["score_name"].get<string>()); std::cout << headerLine(data["title"].get<std::string>());
cout << string(MAXL, '*') << endl; std::cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
cout << endl; oss << "Execution took " << std::setprecision(2) << std::fixed << data["duration"].get<float>()
<< " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<std::string>();
std::cout << headerLine(oss.str());
std::cout << headerLine("Score is " + data["score_name"].get<std::string>());
std::cout << std::string(MAXL, '*') << std::endl;
std::cout << std::endl;
} }
void ReportConsole::body() void ReportConsole::body()
{ {
@ -32,12 +37,12 @@ namespace platform {
int maxHyper = 15; int maxHyper = 15;
int maxDataset = 7; int maxDataset = 7;
for (const auto& r : data["results"]) { for (const auto& r : data["results"]) {
maxHyper = max(maxHyper, (int)r["hyperparameters"].dump().size()); maxHyper = std::max(maxHyper, (int)r["hyperparameters"].dump().size());
maxDataset = max(maxDataset, (int)r["dataset"].get<string>().size()); maxDataset = std::max(maxDataset, (int)r["dataset"].get<std::string>().size());
} }
cout << Colors::GREEN() << " # " << setw(maxDataset) << left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl; std::cout << Colors::GREEN() << " # " << std::setw(maxDataset) << std::left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << std::endl;
cout << "=== " << string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << string(maxHyper, '=') << endl; std::cout << "=== " << std::string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << std::string(maxHyper, '=') << std::endl;
json lastResult; json lastResult;
double totalScore = 0.0; double totalScore = 0.0;
bool odd = true; bool odd = true;
@ -48,33 +53,33 @@ namespace platform {
continue; continue;
} }
auto color = odd ? Colors::CYAN() : Colors::BLUE(); auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color; std::cout << color;
cout << setw(3) << right << index++ << " "; std::cout << std::setw(3) << std::right << index++ << " ";
cout << setw(maxDataset) << left << r["dataset"].get<string>() << " "; std::cout << std::setw(maxDataset) << std::left << r["dataset"].get<std::string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " "; std::cout << std::setw(6) << std::right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " "; std::cout << std::setw(5) << std::right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " "; std::cout << std::setw(3) << std::right << r["classes"].get<int>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " "; std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["nodes"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " "; std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["leaves"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " "; std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>(); std::cout << std::setw(8) << std::right << std::setprecision(6) << std::fixed << r["score"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["score_std"].get<double>();
const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>()); const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
cout << status; std::cout << status;
cout << setw(12) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " "; std::cout << std::setw(12) << std::right << std::setprecision(6) << std::fixed << r["time"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["time_std"].get<double>() << " ";
cout << r["hyperparameters"].dump(); std::cout << r["hyperparameters"].dump();
cout << endl; std::cout << std::endl;
cout << flush; std::cout << std::flush;
lastResult = r; lastResult = r;
totalScore += r["score"].get<double>(); totalScore += r["score"].get<double>();
odd = !odd; odd = !odd;
} }
if (data["results"].size() == 1 || selectedIndex != -1) { if (data["results"].size() == 1 || selectedIndex != -1) {
cout << string(MAXL, '*') << endl; std::cout << std::string(MAXL, '*') << std::endl;
cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12)); std::cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12)); std::cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3)); std::cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3)); std::cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
cout << string(MAXL, '*') << endl; std::cout << std::string(MAXL, '*') << std::endl;
} else { } else {
footer(totalScore); footer(totalScore);
} }
@ -82,28 +87,28 @@ namespace platform {
void ReportConsole::showSummary() void ReportConsole::showSummary()
{ {
for (const auto& item : summary) { for (const auto& item : summary) {
stringstream oss; std::stringstream oss;
oss << setw(3) << left << item.first; oss << std::setw(3) << std::left << item.first;
oss << setw(3) << right << item.second << " "; oss << std::setw(3) << std::right << item.second << " ";
oss << left << meaning.at(item.first); oss << std::left << meaning.at(item.first);
cout << headerLine(oss.str(), 2); std::cout << headerLine(oss.str(), 2);
} }
} }
void ReportConsole::footer(double totalScore) void ReportConsole::footer(double totalScore)
{ {
cout << Colors::MAGENTA() << string(MAXL, '*') << endl; std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
showSummary(); showSummary();
auto score = data["score_name"].get<string>(); auto score = data["score_name"].get<std::string>();
auto best = BestScore::getScore(score); auto best = BestScore::getScore(score);
if (best.first != "") { if (best.first != "") {
stringstream oss; std::stringstream oss;
oss << score << " compared to " << best.first << " .: " << totalScore / best.second; oss << score << " compared to " << best.first << " .: " << totalScore / best.second;
cout << headerLine(oss.str()); std::cout << headerLine(oss.str());
} }
if (!getExistBestFile() && compare) { if (!getExistBestFile() && compare) {
cout << headerLine("*** Best Results File not found. Couldn't compare any result!"); std::cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
} }
cout << string(MAXL, '*') << endl << Colors::RESET(); std::cout << std::string(MAXL, '*') << std::endl << Colors::RESET();
} }
} }

View File

@ -5,7 +5,6 @@
#include "Colors.h" #include "Colors.h"
namespace platform { namespace platform {
using namespace std;
const int MAXL = 133; const int MAXL = 133;
class ReportConsole : public ReportBase { class ReportConsole : public ReportBase {
public: public:
@ -13,7 +12,7 @@ namespace platform {
virtual ~ReportConsole() = default; virtual ~ReportConsole() = default;
private: private:
int selectedIndex; int selectedIndex;
string headerLine(const string& text, int utf); std::string headerLine(const std::string& text, int utf);
void header() override; void header() override;
void body() override; void body() override;
void footer(double totalScore); void footer(double totalScore);

View File

@ -14,28 +14,28 @@ namespace platform {
void ReportExcel::formatColumns() void ReportExcel::formatColumns()
{ {
worksheet_freeze_panes(worksheet, 6, 1); worksheet_freeze_panes(worksheet, 6, 1);
vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 }; std::vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
for (int i = 0; i < columns_sizes.size(); ++i) { for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
} }
} }
void ReportExcel::createWorksheet() void ReportExcel::createWorksheet()
{ {
const string name = data["model"].get<string>(); const std::string name = data["model"].get<std::string>();
string suffix = ""; std::string suffix = "";
string efectiveName; std::string efectiveName;
int num = 1; int num = 1;
// Create a sheet with the name of the model // Create a sheet with the name of the model
while (true) { while (true) {
efectiveName = name + suffix; efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) { if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = to_string(++num); suffix = std::to_string(++num);
} else { } else {
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str()); worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break; break;
} }
if (num > 100) { if (num > 100) {
throw invalid_argument("Couldn't create sheet " + efectiveName); throw std::invalid_argument("Couldn't create sheet " + efectiveName);
} }
} }
} }
@ -48,7 +48,7 @@ namespace platform {
if (worksheet == NULL) { if (worksheet == NULL) {
createWorksheet(); createWorksheet();
} }
setProperties(data["title"].get<string>()); setProperties(data["title"].get<std::string>());
createFormats(); createFormats();
formatColumns(); formatColumns();
} }
@ -60,26 +60,26 @@ namespace platform {
void ReportExcel::header() void ReportExcel::header()
{ {
locale mylocale(cout.getloc(), new separated); std::locale mylocale(std::cout.getloc(), new separated);
locale::global(mylocale); std::locale::global(mylocale);
cout.imbue(mylocale); std::cout.imbue(mylocale);
stringstream oss; std::stringstream oss;
string message = data["model"].get<string>() + " ver. " + data["version"].get<string>() + " " + std::string message = data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>() + " " +
data["language"].get<string>() + " ver. " + data["language_version"].get<string>() + data["language"].get<std::string>() + " ver. " + data["language_version"].get<std::string>() +
" with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>(); " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>();
worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]); worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<string>().c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<std::string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<string>()).c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<std::string>()).c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]); worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
oss << setprecision(2) << fixed << data["duration"].get<float>() << " s"; oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() << " s";
worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
oss.str(""); oss.str("");
oss.clear(); oss.clear();
oss << setprecision(2) << fixed << data["duration"].get<float>() / 3600 << " h"; oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() / 3600 << " h";
worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]); worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<string>().c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<std::string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]); worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]);
oss.str(""); oss.str("");
oss.clear(); oss.clear();
@ -93,7 +93,7 @@ namespace platform {
void ReportExcel::body() void ReportExcel::body()
{ {
auto head = vector<string>( auto head = std::vector<std::string>(
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time", { "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time",
"Time Std.", "Hyperparameters" }); "Time Std.", "Hyperparameters" });
int col = 0; int col = 0;
@ -105,9 +105,9 @@ namespace platform {
int hypSize = 22; int hypSize = 22;
json lastResult; json lastResult;
double totalScore = 0.0; double totalScore = 0.0;
string hyperparameters; std::string hyperparameters;
for (const auto& r : data["results"]) { for (const auto& r : data["results"]) {
writeString(row, col, r["dataset"].get<string>(), "text"); writeString(row, col, r["dataset"].get<std::string>(), "text");
writeInt(row, col + 1, r["samples"].get<int>(), "ints"); writeInt(row, col + 1, r["samples"].get<int>(), "ints");
writeInt(row, col + 2, r["features"].get<int>(), "ints"); writeInt(row, col + 2, r["features"].get<int>(), "ints");
writeInt(row, col + 3, r["classes"].get<int>(), "ints"); writeInt(row, col + 3, r["classes"].get<int>(), "ints");
@ -116,7 +116,7 @@ namespace platform {
writeDouble(row, col + 6, r["depth"].get<double>(), "floats"); writeDouble(row, col + 6, r["depth"].get<double>(), "floats");
writeDouble(row, col + 7, r["score"].get<double>(), "result"); writeDouble(row, col + 7, r["score"].get<double>(), "result");
writeDouble(row, col + 8, r["score_std"].get<double>(), "result"); writeDouble(row, col + 8, r["score_std"].get<double>(), "result");
const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>()); const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
writeString(row, col + 9, status, "textCentered"); writeString(row, col + 9, status, "textCentered");
writeDouble(row, col + 10, r["time"].get<double>(), "time"); writeDouble(row, col + 10, r["time"].get<double>(), "time");
writeDouble(row, col + 11, r["time_std"].get<double>(), "time"); writeDouble(row, col + 11, r["time_std"].get<double>(), "time");
@ -133,12 +133,12 @@ namespace platform {
worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL); worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL);
// Show totals if only one dataset is present in the result // Show totals if only one dataset is present in the result
if (data["results"].size() == 1) { if (data["results"].size() == 1) {
for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) { for (const std::string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
row++; row++;
col = 1; col = 1;
writeString(row, col, group, "text"); writeString(row, col, group, "text");
for (double item : lastResult[group]) { for (double item : lastResult[group]) {
string style = group.find("scores") != string::npos ? "result" : "time"; std::string style = group.find("scores") != std::string::npos ? "result" : "time";
writeDouble(row, ++col, item, style); writeDouble(row, ++col, item, style);
} }
} }
@ -167,7 +167,7 @@ namespace platform {
{ {
showSummary(); showSummary();
row += 4 + summary.size(); row += 4 + summary.size();
auto score = data["score_name"].get<string>(); auto score = data["score_name"].get<std::string>();
auto best = BestScore::getScore(score); auto best = BestScore::getScore(score);
if (best.first != "") { if (best.first != "") {
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + best.first + " .:").c_str(), efectiveStyle("text")); worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + best.first + " .:").c_str(), efectiveStyle("text"));

View File

@ -6,7 +6,6 @@
#include "ExcelFile.h" #include "ExcelFile.h"
#include "Colors.h" #include "Colors.h"
namespace platform { namespace platform {
using namespace std;
class ReportExcel : public ReportBase, public ExcelFile { class ReportExcel : public ReportBase, public ExcelFile {
public: public:
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL); explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL);

View File

@ -8,7 +8,7 @@
#include "CLocale.h" #include "CLocale.h"
namespace platform { namespace platform {
Result::Result(const string& path, const string& filename) Result::Result(const std::string& path, const std::string& filename)
: path(path) : path(path)
, filename(filename) , filename(filename)
{ {
@ -31,28 +31,28 @@ namespace platform {
json Result::load() const json Result::load() const
{ {
ifstream resultData(path + "/" + filename); std::ifstream resultData(path + "/" + filename);
if (resultData.is_open()) { if (resultData.is_open()) {
json data = json::parse(resultData); json data = json::parse(resultData);
return data; return data;
} }
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]"); throw std::invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
} }
string Result::to_string(int maxModel) const std::string Result::to_string(int maxModel) const
{ {
auto tmp = ConfigLocale(); auto tmp = ConfigLocale();
stringstream oss; std::stringstream oss;
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration; double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s"; std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
oss << date << " "; oss << date << " ";
oss << setw(maxModel) << left << model << " "; oss << std::setw(maxModel) << std::left << model << " ";
oss << setw(11) << left << scoreName << " "; oss << std::setw(11) << std::left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " "; oss << std::right << std::setw(11) << std::setprecision(7) << std::fixed << score << " ";
auto completeString = isComplete() ? "C" : "P"; auto completeString = isComplete() ? "C" : "P";
oss << setw(1) << " " << completeString << " "; oss << std::setw(1) << " " << completeString << " ";
oss << setw(7) << setprecision(2) << fixed << durationShow << " " << durationUnit << " "; oss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
oss << setw(50) << left << title << " "; oss << std::setw(50) << std::left << title << " ";
return oss.str(); return oss.str();
} }
} }

View File

@ -5,31 +5,30 @@
#include <string> #include <string>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
namespace platform { namespace platform {
using namespace std;
using json = nlohmann::json; using json = nlohmann::json;
class Result { class Result {
public: public:
Result(const string& path, const string& filename); Result(const std::string& path, const std::string& filename);
json load() const; json load() const;
string to_string(int maxModel) const; std::string to_string(int maxModel) const;
string getFilename() const { return filename; }; std::string getFilename() const { return filename; };
string getDate() const { return date; }; std::string getDate() const { return date; };
double getScore() const { return score; }; double getScore() const { return score; };
string getTitle() const { return title; }; std::string getTitle() const { return title; };
double getDuration() const { return duration; }; double getDuration() const { return duration; };
string getModel() const { return model; }; std::string getModel() const { return model; };
string getScoreName() const { return scoreName; }; std::string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; }; bool isComplete() const { return complete; };
private: private:
string path; std::string path;
string filename; std::string filename;
string date; std::string date;
double score; double score;
string title; std::string title;
double duration; double duration;
string model; std::string model;
string scoreName; std::string scoreName;
bool complete; bool complete;
}; };
}; };

View File

@ -2,7 +2,7 @@
#include <algorithm> #include <algorithm>
namespace platform { namespace platform {
Results::Results(const string& path, const string& model, const string& score, bool complete, bool partial) : Results::Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial) :
path(path), model(model), scoreName(score), complete(complete), partial(partial) path(path), model(model), scoreName(score), complete(complete), partial(partial)
{ {
load(); load();
@ -17,7 +17,7 @@ namespace platform {
using std::filesystem::directory_iterator; using std::filesystem::directory_iterator;
for (const auto& file : directory_iterator(path)) { for (const auto& file : directory_iterator(path)) {
auto filename = file.path().filename().string(); auto filename = file.path().filename().string();
if (filename.find(".json") != string::npos && filename.find("results_") == 0) { if (filename.find(".json") != std::string::npos && filename.find("results_") == 0) {
auto result = Result(path, filename); auto result = Result(path, filename);
bool addResult = true; bool addResult = true;
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete()) if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete())
@ -27,7 +27,7 @@ namespace platform {
} }
} }
} }
void Results::hideResult(int index, const string& pathHidden) void Results::hideResult(int index, const std::string& pathHidden)
{ {
auto filename = files.at(index).getFilename(); auto filename = files.at(index).getFilename();
rename((path + "/" + filename).c_str(), (pathHidden + "/" + filename).c_str()); rename((path + "/" + filename).c_str(), (pathHidden + "/" + filename).c_str());

View File

@ -6,32 +6,31 @@
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "Result.h" #include "Result.h"
namespace platform { namespace platform {
using namespace std;
using json = nlohmann::json; using json = nlohmann::json;
class Results { class Results {
public: public:
Results(const string& path, const string& model, const string& score, bool complete, bool partial); Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial);
void sortDate(); void sortDate();
void sortScore(); void sortScore();
void sortModel(); void sortModel();
void sortDuration(); void sortDuration();
int maxModelSize() const { return maxModel; }; int maxModelSize() const { return maxModel; };
void hideResult(int index, const string& pathHidden); void hideResult(int index, const std::string& pathHidden);
void deleteResult(int index); void deleteResult(int index);
int size() const; int size() const;
bool empty() const; bool empty() const;
vector<Result>::iterator begin() { return files.begin(); }; std::vector<Result>::iterator begin() { return files.begin(); };
vector<Result>::iterator end() { return files.end(); }; std::vector<Result>::iterator end() { return files.end(); };
Result& at(int index) { return files.at(index); }; Result& at(int index) { return files.at(index); };
private: private:
string path; std::string path;
string model; std::string model;
string scoreName; std::string scoreName;
bool complete; bool complete;
bool partial; bool partial;
int maxModel; int maxModel;
vector<Result> files; std::vector<Result> files;
void load(); // Loads the list of results void load(); // Loads the list of results
}; };
}; };

View File

@ -9,7 +9,7 @@
namespace platform { namespace platform {
Statistics::Statistics(const vector<string>& models, const vector<string>& datasets, const json& data, double significance, bool output) : Statistics::Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance, bool output) :
models(models), datasets(datasets), data(data), significance(significance), output(output) models(models), datasets(datasets), data(data), significance(significance), output(output)
{ {
nModels = models.size(); nModels = models.size();
@ -20,27 +20,27 @@ namespace platform {
void Statistics::fit() void Statistics::fit()
{ {
if (nModels < 3 || nDatasets < 3) { if (nModels < 3 || nDatasets < 3) {
cerr << "nModels: " << nModels << endl; std::cerr << "nModels: " << nModels << std::endl;
cerr << "nDatasets: " << nDatasets << endl; std::cerr << "nDatasets: " << nDatasets << std::endl;
throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
} }
ranksModels.clear(); ranksModels.clear();
computeRanks(); computeRanks();
// Set the control model as the one with the lowest average rank // Set the control model as the one with the lowest average rank
controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
computeWTL(); computeWTL();
maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
fitted = true; fitted = true;
} }
map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder) std::map<std::string, float> assignRanks(std::vector<std::pair<std::string, double>>& ranksOrder)
{ {
// sort the ranksOrder vector by value // sort the ranksOrder std::vector by value
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) { std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair<std::string, double>& a, const std::pair<std::string, double>& b) {
return a.second > b.second; return a.second > b.second;
}); });
//Assign ranks to values and if they are the same they share the same averaged rank //Assign ranks to values and if they are the same they share the same averaged rank
map<string, float> ranks; std::map<std::string, float> ranks;
for (int i = 0; i < ranksOrder.size(); i++) { for (int i = 0; i < ranksOrder.size(); i++) {
ranks[ranksOrder[i].first] = i + 1.0; ranks[ranksOrder[i].first] = i + 1.0;
} }
@ -63,9 +63,9 @@ namespace platform {
} }
void Statistics::computeRanks() void Statistics::computeRanks()
{ {
map<string, float> ranksLine; std::map<std::string, float> ranksLine;
for (const auto& dataset : datasets) { for (const auto& dataset : datasets) {
vector<pair<string, double>> ranksOrder; std::vector<std::pair<std::string, double>> ranksOrder;
for (const auto& model : models) { for (const auto& model : models) {
double value = data[model].at(dataset).at(0).get<double>(); double value = data[model].at(dataset).at(0).get<double>();
ranksOrder.push_back({ model, value }); ranksOrder.push_back({ model, value });
@ -118,11 +118,11 @@ namespace platform {
if (!fitted) { if (!fitted) {
fit(); fit();
} }
stringstream oss; std::stringstream oss;
// Reference https://link.springer.com/article/10.1007/s44196-022-00083-8 // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
// Post-hoc Holm test // Post-hoc Holm test
// Calculate the p-value for the models paired with the control model // Calculate the p-value for the models paired with the control model
map<int, double> stats; // p-value of each model paired with the control model std::map<int, double> stats; // p-value of each model paired with the control model
boost::math::normal dist(0.0, 1.0); boost::math::normal dist(0.0, 1.0);
double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets)); double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
for (int i = 0; i < nModels; i++) { for (int i = 0; i < nModels; i++) {
@ -135,11 +135,11 @@ namespace platform {
stats[i] = p_value; stats[i] = p_value;
} }
// Sort the models by p-value // Sort the models by p-value
vector<pair<int, double>> statsOrder; std::vector<std::pair<int, double>> statsOrder;
for (const auto& stat : stats) { for (const auto& stat : stats) {
statsOrder.push_back({ stat.first, stat.second }); statsOrder.push_back({ stat.first, stat.second });
} }
sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) { std::sort(statsOrder.begin(), statsOrder.end(), [](const std::pair<int, double>& a, const std::pair<int, double>& b) {
return a.second < b.second; return a.second < b.second;
}); });
@ -147,29 +147,29 @@ namespace platform {
for (int i = 0; i < statsOrder.size(); ++i) { for (int i = 0; i < statsOrder.size(); ++i) {
auto item = statsOrder.at(i); auto item = statsOrder.at(i);
double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second; double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
double p_value = min((double)1.0, item.second * (nModels - i)); double p_value = std::min((double)1.0, item.second * (nModels - i));
p_value = max(before, p_value); p_value = std::max(before, p_value);
statsOrder[i] = { item.first, p_value }; statsOrder[i] = { item.first, p_value };
} }
holmResult.model = models.at(controlIdx); holmResult.model = models.at(controlIdx);
auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW(); auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
oss << color; oss << color;
oss << " *************************************************************************************************************" << endl; oss << " *************************************************************************************************************" << std::endl;
oss << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl; oss << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl;
oss << " Control model: " << models.at(controlIdx) << endl; oss << " Control model: " << models.at(controlIdx) << std::endl;
oss << " " << left << setw(maxModelName) << string("Model") << " p-value rank win tie loss Status" << endl; oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl;
oss << " " << string(maxModelName, '=') << " ============ ========= === === ==== =============" << endl; oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl;
// sort ranks from lowest to highest // sort ranks from lowest to highest
vector<pair<string, float>> ranksOrder; std::vector<std::pair<std::string, float>> ranksOrder;
for (const auto& rank : ranks) { for (const auto& rank : ranks) {
ranksOrder.push_back({ rank.first, rank.second }); ranksOrder.push_back({ rank.first, rank.second });
} }
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, float>& a, const pair<string, float>& b) { std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair<std::string, float>& a, const std::pair<std::string, float>& b) {
return a.second < b.second; return a.second < b.second;
}); });
// Show the control model info. // Show the control model info.
oss << " " << Colors::BLUE() << left << setw(maxModelName) << ranksOrder.at(0).first << " "; oss << " " << Colors::BLUE() << std::left << std::setw(maxModelName) << ranksOrder.at(0).first << " ";
oss << setw(12) << " " << setprecision(7) << fixed << " " << ranksOrder.at(0).second << endl; oss << std::setw(12) << " " << std::setprecision(7) << std::fixed << " " << ranksOrder.at(0).second << std::endl;
for (const auto& item : ranksOrder) { for (const auto& item : ranksOrder) {
auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first)); auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
double pvalue = 0.0; double pvalue = 0.0;
@ -185,15 +185,15 @@ namespace platform {
auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA(); auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross; auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0"; auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
oss << " " << colorStatus << left << setw(maxModelName) << item.first << " "; oss << " " << colorStatus << std::left << std::setw(maxModelName) << item.first << " ";
oss << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second; oss << std::setprecision(6) << std::scientific << pvalue << std::setprecision(7) << std::fixed << " " << item.second;
oss << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss; oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss;
oss << " " << status << textStatus << endl; oss << " " << status << textStatus << std::endl;
} }
oss << color << " *************************************************************************************************************" << endl; oss << color << " *************************************************************************************************************" << std::endl;
oss << Colors::RESET(); oss << Colors::RESET();
if (output) { if (output) {
cout << oss.str(); std::cout << oss.str();
} }
} }
bool Statistics::friedmanTest() bool Statistics::friedmanTest()
@ -201,12 +201,12 @@ namespace platform {
if (!fitted) { if (!fitted) {
fit(); fit();
} }
stringstream oss; std::stringstream oss;
// Friedman test // Friedman test
// Calculate the Friedman statistic // Calculate the Friedman statistic
oss << Colors::BLUE() << endl; oss << Colors::BLUE() << std::endl;
oss << "***************************************************************************************************************" << endl; oss << "***************************************************************************************************************" << std::endl;
oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl; oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl;
double degreesOfFreedom = nModels - 1.0; double degreesOfFreedom = nModels - 1.0;
double sumSquared = 0; double sumSquared = 0;
for (const auto& rank : ranks) { for (const auto& rank : ranks) {
@ -218,21 +218,21 @@ namespace platform {
boost::math::chi_squared chiSquared(degreesOfFreedom); boost::math::chi_squared chiSquared(degreesOfFreedom);
long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ); long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
double criticalValue = quantile(chiSquared, 1 - significance); double criticalValue = quantile(chiSquared, 1 - significance);
oss << "Friedman statistic: " << friedmanQ << endl; oss << "Friedman statistic: " << friedmanQ << std::endl;
oss << "Critical χ2 Value for df=" << fixed << (int)degreesOfFreedom oss << "Critical χ2 Value for df=" << std::fixed << (int)degreesOfFreedom
<< " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl; << " and alpha=" << std::setprecision(2) << std::fixed << significance << ": " << std::setprecision(7) << std::scientific << criticalValue << std::endl;
oss << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl; oss << "p-value: " << std::scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << std::setprecision(2) << std::fixed << significance << std::endl;
bool result; bool result;
if (p_value < significance) { if (p_value < significance) {
oss << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl; oss << Colors::GREEN() << "The null hypothesis H0 is rejected." << std::endl;
result = true; result = true;
} else { } else {
oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl; oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl;
result = false; result = false;
} }
oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << endl; oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << std::endl;
if (output) { if (output) {
cout << oss.str(); std::cout << oss.str();
} }
friedmanResult = { friedmanQ, criticalValue, p_value, result }; friedmanResult = { friedmanQ, criticalValue, p_value, result };
return result; return result;
@ -245,7 +245,7 @@ namespace platform {
{ {
return holmResult; return holmResult;
} }
map<string, map<string, float>>& Statistics::getRanks() std::map<std::string, std::map<std::string, float>>& Statistics::getRanks()
{ {
return ranksModels; return ranksModels;
} }

View File

@ -5,7 +5,6 @@
#include <map> #include <map>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json; using json = nlohmann::json;
namespace platform { namespace platform {
@ -21,30 +20,30 @@ namespace platform {
bool reject; bool reject;
}; };
struct HolmLine { struct HolmLine {
string model; std::string model;
long double pvalue; long double pvalue;
double rank; double rank;
WTL wtl; WTL wtl;
bool reject; bool reject;
}; };
struct HolmResult { struct HolmResult {
string model; std::string model;
vector<HolmLine> holmLines; std::vector<HolmLine> holmLines;
}; };
class Statistics { class Statistics {
public: public:
Statistics(const vector<string>& models, const vector<string>& datasets, const json& data, double significance = 0.05, bool output = true); Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true);
bool friedmanTest(); bool friedmanTest();
void postHocHolmTest(bool friedmanResult); void postHocHolmTest(bool friedmanResult);
FriedmanResult& getFriedmanResult(); FriedmanResult& getFriedmanResult();
HolmResult& getHolmResult(); HolmResult& getHolmResult();
map<string, map<string, float>>& getRanks(); std::map<std::string, std::map<std::string, float>>& getRanks();
private: private:
void fit(); void fit();
void computeRanks(); void computeRanks();
void computeWTL(); void computeWTL();
const vector<string>& models; const std::vector<std::string>& models;
const vector<string>& datasets; const std::vector<std::string>& datasets;
const json& data; const json& data;
double significance; double significance;
bool output; bool output;
@ -52,13 +51,13 @@ namespace platform {
int nModels = 0; int nModels = 0;
int nDatasets = 0; int nDatasets = 0;
int controlIdx = 0; int controlIdx = 0;
map<int, WTL> wtl; std::map<int, WTL> wtl;
map<string, float> ranks; std::map<std::string, float> ranks;
int maxModelName = 0; int maxModelName = 0;
int maxDatasetName = 0; int maxDatasetName = 0;
FriedmanResult friedmanResult; FriedmanResult friedmanResult;
HolmResult holmResult; HolmResult holmResult;
map<string, map<string, float>> ranksModels; std::map<std::string, std::map<std::string, float>> ranksModels;
}; };
} }
#endif // !STATISTICS_H #endif // !STATISTICS_H

View File

@ -1,18 +1,17 @@
#ifndef SYMBOLS_H #ifndef SYMBOLS_H
#define SYMBOLS_H #define SYMBOLS_H
#include <string> #include <string>
using namespace std;
namespace platform { namespace platform {
class Symbols { class Symbols {
public: public:
inline static const string check_mark{ "\u2714" }; inline static const std::string check_mark{ "\u2714" };
inline static const string exclamation{ "\u2757" }; inline static const std::string exclamation{ "\u2757" };
inline static const string black_star{ "\u2605" }; inline static const std::string black_star{ "\u2605" };
inline static const string cross{ "\u2717" }; inline static const std::string cross{ "\u2717" };
inline static const string upward_arrow{ "\u27B6" }; inline static const std::string upward_arrow{ "\u27B6" };
inline static const string down_arrow{ "\u27B4" }; inline static const std::string down_arrow{ "\u27B4" };
inline static const string equal_best{ check_mark }; inline static const std::string equal_best{ check_mark };
inline static const string better_best{ black_star }; inline static const std::string better_best{ black_star };
}; };
} }
#endif // !SYMBOLS_H #endif // !SYMBOLS_H

View File

@ -4,7 +4,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
namespace platform { namespace platform {
//static vector<string> split(const string& text, char delimiter); //static std::vector<std::string> split(const std::string& text, char delimiter);
static std::vector<std::string> split(const std::string& text, char delimiter) static std::vector<std::string> split(const std::string& text, char delimiter)
{ {
std::vector<std::string> result; std::vector<std::string> result;

View File

@ -4,7 +4,6 @@
#include "BestResults.h" #include "BestResults.h"
#include "Colors.h" #include "Colors.h"
using namespace std;
argparse::ArgumentParser manageArguments(int argc, char** argv) argparse::ArgumentParser manageArguments(int argc, char** argv)
{ {
@ -15,19 +14,19 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true); program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true); program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true); program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true);
program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const string& value) { program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const std::string& value) {
try { try {
auto k = stod(value); auto k = std::stod(value);
if (k < 0.01 || k > 0.15) { if (k < 0.01 || k > 0.15) {
throw runtime_error("Significance level hast to be a number in [0.01, 0.15]"); throw std::runtime_error("Significance level hast to be a number in [0.01, 0.15]");
} }
return k; return k;
} }
catch (const runtime_error& err) { catch (const std::runtime_error& err) {
throw runtime_error(err.what()); throw std::runtime_error(err.what());
} }
catch (...) { catch (...) {
throw runtime_error("Number of folds must be an decimal number"); throw std::runtime_error("Number of folds must be an decimal number");
}}); }});
return program; return program;
} }
@ -35,35 +34,35 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
auto program = manageArguments(argc, argv); auto program = manageArguments(argc, argv);
string model, score; std::string model, score;
bool build, report, friedman, excel; bool build, report, friedman, excel;
double level; double level;
try { try {
program.parse_args(argc, argv); program.parse_args(argc, argv);
model = program.get<string>("model"); model = program.get<std::string>("model");
score = program.get<string>("score"); score = program.get<std::string>("score");
build = program.get<bool>("build"); build = program.get<bool>("build");
report = program.get<bool>("report"); report = program.get<bool>("report");
friedman = program.get<bool>("friedman"); friedman = program.get<bool>("friedman");
excel = program.get<bool>("excel"); excel = program.get<bool>("excel");
level = program.get<double>("level"); level = program.get<double>("level");
if (model == "" || score == "") { if (model == "" || score == "") {
throw runtime_error("Model and score name must be supplied"); throw std::runtime_error("Model and score name must be supplied");
} }
if (friedman && model != "any") { if (friedman && model != "any") {
cerr << "Friedman test can only be used with all models" << endl; std::cerr << "Friedman test can only be used with all models" << std::endl;
cerr << program; std::cerr << program;
exit(1); exit(1);
} }
if (!report && !build) { if (!report && !build) {
cerr << "Either build, report or both, have to be selected to do anything!" << endl; std::cerr << "Either build, report or both, have to be selected to do anything!" << std::endl;
cerr << program; std::cerr << program;
exit(1); exit(1);
} }
} }
catch (const exception& err) { catch (const std::exception& err) {
cerr << err.what() << endl; std::cerr << err.what() << std::endl;
cerr << program; std::cerr << program;
exit(1); exit(1);
} }
// Generate report // Generate report
@ -72,8 +71,8 @@ int main(int argc, char** argv)
if (model == "any") { if (model == "any") {
results.buildAll(); results.buildAll();
} else { } else {
string fileName = results.build(); std::string fileName = results.build();
cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl; std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl;
} }
} }
if (report) { if (report) {

View File

@ -4,54 +4,53 @@
#include "Colors.h" #include "Colors.h"
#include "Datasets.h" #include "Datasets.h"
using namespace std;
const int BALANCE_LENGTH = 75; const int BALANCE_LENGTH = 75;
struct separated : numpunct<char> { struct separated : numpunct<char> {
char do_decimal_point() const { return ','; } char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; } char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; } std::string do_grouping() const { return "\03"; }
}; };
void outputBalance(const string& balance) void outputBalance(const std::string& balance)
{ {
auto temp = string(balance); auto temp = std::string(balance);
while (temp.size() > BALANCE_LENGTH - 1) { while (temp.size() > BALANCE_LENGTH - 1) {
auto part = temp.substr(0, BALANCE_LENGTH); auto part = temp.substr(0, BALANCE_LENGTH);
cout << part << endl; std::cout << part << std::endl;
cout << setw(48) << " "; std::cout << setw(48) << " ";
temp = temp.substr(BALANCE_LENGTH); temp = temp.substr(BALANCE_LENGTH);
} }
cout << temp << endl; std::cout << temp << std::endl;
} }
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
auto data = platform::Datasets(false, platform::Paths::datasets()); auto data = platform::Datasets(false, platform::Paths::datasets());
locale mylocale(cout.getloc(), new separated); locale mylocale(std::cout.getloc(), new separated);
locale::global(mylocale); locale::global(mylocale);
cout.imbue(mylocale); std::cout.imbue(mylocale);
cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << endl; std::cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << std::endl;
string balanceBars = string(BALANCE_LENGTH, '='); std::string balanceBars = std::string(BALANCE_LENGTH, '=');
cout << "============================== ====== ===== === " << balanceBars << endl; std::cout << "============================== ====== ===== === " << balanceBars << std::endl;
bool odd = true; bool odd = true;
for (const auto& dataset : data.getNames()) { for (const auto& dataset : data.getNames()) {
auto color = odd ? Colors::CYAN() : Colors::BLUE(); auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color << setw(30) << left << dataset << " "; std::cout << color << setw(30) << left << dataset << " ";
data.loadDataset(dataset); data.loadDataset(dataset);
auto nSamples = data.getNSamples(dataset); auto nSamples = data.getNSamples(dataset);
cout << setw(6) << right << nSamples << " "; std::cout << setw(6) << right << nSamples << " ";
cout << setw(5) << right << data.getFeatures(dataset).size() << " "; std::cout << setw(5) << right << data.getFeatures(dataset).size() << " ";
cout << setw(3) << right << data.getNClasses(dataset) << " "; std::cout << setw(3) << right << data.getNClasses(dataset) << " ";
stringstream oss; std::stringstream oss;
string sep = ""; std::string sep = "";
for (auto number : data.getClassesCounts(dataset)) { for (auto number : data.getClassesCounts(dataset)) {
oss << sep << setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
sep = " / "; sep = " / ";
} }
outputBalance(oss.str()); outputBalance(oss.str());
odd = !odd; odd = !odd;
} }
cout << Colors::RESET() << endl; std::cout << Colors::RESET() << std::endl;
return 0; return 0;
} }

View File

@ -9,7 +9,6 @@
#include "Paths.h" #include "Paths.h"
using namespace std;
using json = nlohmann::json; using json = nlohmann::json;
argparse::ArgumentParser manageArguments() argparse::ArgumentParser manageArguments()
@ -19,13 +18,13 @@ argparse::ArgumentParser manageArguments()
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name"); program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment"); program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
program.add_argument("-m", "--model") program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString()) .help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) { .action([](const std::string& value) {
static const vector<string> choices = platform::Models::instance()->getNames(); static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) { if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value; return value;
} }
throw runtime_error("Model must be one of " + platform::Models::instance()->toString()); throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring());
} }
); );
program.add_argument("--title").default_value("").help("Experiment title"); program.add_argument("--title").default_value("").help("Experiment title");
@ -33,19 +32,19 @@ argparse::ArgumentParser manageArguments()
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true); program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const string& value) { program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
try { try {
auto k = stoi(value); auto k = stoi(value);
if (k < 2) { if (k < 2) {
throw runtime_error("Number of folds must be greater than 1"); throw std::runtime_error("Number of folds must be greater than 1");
} }
return k; return k;
} }
catch (const runtime_error& err) { catch (const runtime_error& err) {
throw runtime_error(err.what()); throw std::runtime_error(err.what());
} }
catch (...) { catch (...) {
throw runtime_error("Number of folds must be an integer"); throw std::runtime_error("Number of folds must be an integer");
}}); }});
auto seed_values = env.getSeeds(); auto seed_values = env.getSeeds();
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
@ -54,39 +53,39 @@ argparse::ArgumentParser manageArguments()
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
string file_name, model_name, title; std::string file_name, model_name, title;
json hyperparameters_json; json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet; bool discretize_dataset, stratified, saveResults, quiet;
vector<int> seeds; std::vector<int> seeds;
vector<string> filesToTest; std::vector<std::string> filesToTest;
int n_folds; int n_folds;
auto program = manageArguments(); auto program = manageArguments();
try { try {
program.parse_args(argc, argv); program.parse_args(argc, argv);
file_name = program.get<string>("dataset"); file_name = program.get<std::string>("dataset");
model_name = program.get<string>("model"); model_name = program.get<std::string>("model");
discretize_dataset = program.get<bool>("discretize"); discretize_dataset = program.get<bool>("discretize");
stratified = program.get<bool>("stratified"); stratified = program.get<bool>("stratified");
quiet = program.get<bool>("quiet"); quiet = program.get<bool>("quiet");
n_folds = program.get<int>("folds"); n_folds = program.get<int>("folds");
seeds = program.get<vector<int>>("seeds"); seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<string>("hyperparameters"); auto hyperparameters = program.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters); hyperparameters_json = json::parse(hyperparameters);
title = program.get<string>("title"); title = program.get<std::string>("title");
if (title == "" && file_name == "") { if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided"); throw runtime_error("title is mandatory if dataset is not provided");
} }
saveResults = program.get<bool>("save"); saveResults = program.get<bool>("save");
} }
catch (const exception& err) { catch (const exception& err) {
cerr << err.what() << endl; cerr << err.what() << std::endl;
cerr << program; cerr << program;
exit(1); exit(1);
} }
auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets()); auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets());
if (file_name != "") { if (file_name != "") {
if (!datasets.isDataset(file_name)) { if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << endl; cerr << "Dataset " << file_name << " not found" << std::endl;
exit(1); exit(1);
} }
if (title == "") { if (title == "") {
@ -118,6 +117,6 @@ int main(int argc, char** argv)
} }
if (!quiet) if (!quiet)
experiment.report(); experiment.report();
cout << "Done!" << endl; std::cout << "Done!" << std::endl;
return 0; return 0;
} }

View File

@ -2,7 +2,6 @@
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include "ManageResults.h" #include "ManageResults.h"
using namespace std;
argparse::ArgumentParser manageArguments(int argc, char** argv) argparse::ArgumentParser manageArguments(int argc, char** argv)
{ {
@ -17,17 +16,17 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
program.parse_args(argc, argv); program.parse_args(argc, argv);
auto number = program.get<int>("number"); auto number = program.get<int>("number");
if (number < 0) { if (number < 0) {
throw runtime_error("Number of results must be greater than or equal to 0"); throw std::runtime_error("Number of results must be greater than or equal to 0");
} }
auto model = program.get<string>("model"); auto model = program.get<std::string>("model");
auto score = program.get<string>("score"); auto score = program.get<std::string>("score");
auto complete = program.get<bool>("complete"); auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial"); auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare"); auto compare = program.get<bool>("compare");
} }
catch (const exception& err) { catch (const std::exception& err) {
cerr << err.what() << endl; std::cerr << err.what() << std::endl;
cerr << program; std::cerr << program;
exit(1); exit(1);
} }
return program; return program;
@ -37,8 +36,8 @@ int main(int argc, char** argv)
{ {
auto program = manageArguments(argc, argv); auto program = manageArguments(argc, argv);
int number = program.get<int>("number"); int number = program.get<int>("number");
string model = program.get<string>("model"); std::string model = program.get<std::string>("model");
string score = program.get<string>("score"); std::string score = program.get<std::string>("score");
auto complete = program.get<bool>("complete"); auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial"); auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare"); auto compare = program.get<bool>("compare");

View File

@ -18,4 +18,12 @@ static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
static platform::Registrar registrarBA("BoostAODE", static platform::Registrar registrarBA("BoostAODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
static platform::Registrar registrarSt("STree",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::STree();});
static platform::Registrar registrarOdte("Odte",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::ODTE();});
static platform::Registrar registrarSvc("SVC",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::SVC();});
static platform::Registrar registrarRaF("RandomForest",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();});
#endif #endif

View File

@ -0,0 +1,9 @@
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${Python3_INCLUDE_DIRS})
include_directories(${TORCH_INCLUDE_DIRS})
add_library(PyWrap SHARED PyWrap.cc STree.cc ODTE.cc SVC.cc RandomForest.cc PyClassifier.cc)
#target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy xgboost::xgboost ArffFiles)
target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles)

15
src/PyClassifiers/ODTE.cc Normal file
View File

@ -0,0 +1,15 @@
#include "ODTE.h"
namespace pywrap {
std::string ODTE::graph()
{
return callMethodString("graph");
}
void ODTE::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "n_jobs", "n_estimators", "random_state" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */

15
src/PyClassifiers/ODTE.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef ODTE_H
#define ODTE_H
#include "nlohmann/json.hpp"
#include "PyClassifier.h"
namespace pywrap {
class ODTE : public PyClassifier {
public:
ODTE() : PyClassifier("odte", "Odte") {};
~ODTE() = default;
std::string graph();
void setHyperparameters(nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* ODTE_H */

View File

@ -0,0 +1,99 @@
#include "PyClassifier.h"
namespace pywrap {
namespace bp = boost::python;
namespace np = boost::python::numpy;
PyClassifier::PyClassifier(const std::string& module, const std::string& className, bool sklearn) : module(module), className(className), sklearn(sklearn), fitted(false)
{
// This id allows to have more than one instance of the same module/class
id = reinterpret_cast<clfId_t>(this);
pyWrap = PyWrap::GetInstance();
pyWrap->importClass(id, module, className);
}
PyClassifier::~PyClassifier()
{
pyWrap->clean(id);
}
np::ndarray tensor2numpy(torch::Tensor& X)
{
int m = X.size(0);
int n = X.size(1);
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<float>(), bp::make_tuple(m, n), bp::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2), bp::object());
Xn = Xn.transpose();
return Xn;
}
std::pair<np::ndarray, np::ndarray> tensors2numpy(torch::Tensor& X, torch::Tensor& y)
{
int n = X.size(1);
auto yn = np::from_data(y.data_ptr(), np::dtype::get_builtin<int32_t>(), bp::make_tuple(n), bp::make_tuple(sizeof(y.dtype()) * 2), bp::object());
return { tensor2numpy(X), yn };
}
std::string PyClassifier::version()
{
if (sklearn) {
return pyWrap->sklearnVersion();
}
return pyWrap->version(id);
}
std::string PyClassifier::callMethodString(const std::string& method)
{
return pyWrap->callMethodString(id, method);
}
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y)
{
if (!fitted && hyperparameters.size() > 0) {
pyWrap->setHyperparameters(id, hyperparameters);
}
auto [Xn, yn] = tensors2numpy(X, y);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
CPyObject yp = bp::incref(bp::object(yn).ptr());
pyWrap->fit(id, Xp, yp);
fitted = true;
return *this;
}
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
return fit(X, y);
}
torch::Tensor PyClassifier::predict(torch::Tensor& X)
{
int dimension = X.size(1);
auto Xn = tensor2numpy(X);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
PyObject* incoming = pyWrap->predict(id, Xp);
bp::handle<> handle(incoming);
bp::object object(handle);
np::ndarray prediction = np::from_object(object);
if (PyErr_Occurred()) {
PyErr_Print();
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
}
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
}
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{
auto [Xn, yn] = tensors2numpy(X, y);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
CPyObject yp = bp::incref(bp::object(yn).ptr());
float result = pyWrap->score(id, Xp, yp);
return result;
}
void PyClassifier::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const std::vector<std::string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
void PyClassifier::checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
} /* namespace pywrap */

View File

@ -0,0 +1,55 @@
#ifndef PYCLASSIFIER_H
#define PYCLASSIFIER_H
#include "boost/python/detail/wrap_python.hpp"
#include <boost/python/numpy.hpp>
#include <nlohmann/json.hpp>
#include <string>
#include <map>
#include <vector>
#include <utility>
#include <torch/torch.h>
#include "PyWrap.h"
#include "Classifier.h"
#include "TypeId.h"
namespace pywrap {
class PyClassifier : public bayesnet::BaseClassifier {
public:
PyClassifier(const std::string& module, const std::string& className, const bool sklearn = false);
virtual ~PyClassifier();
PyClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override { return *this; };
// X is nxm tensor, y is nx1 tensor
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y);
PyClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override { return *this; };
PyClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override { return *this; };
torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int >>& X) override { return std::vector<int>(); };
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override { return 0.0; };
float score(torch::Tensor& X, torch::Tensor& y) override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
std::string version();
std::string callMethodString(const std::string& method);
std::string getVersion() override { return this->version(); };
int getNumberOfNodes()const override { return 0; };
int getNumberOfEdges()const override { return 0; };
int getNumberOfStates() const override { return 0; };
std::vector<std::string> show() const override { return std::vector<std::string>(); }
std::vector<std::string> graph(const std::string& title = "") const override { return std::vector<std::string>(); }
bayesnet::status_t getStatus() const override { return bayesnet::NORMAL; };
std::vector<std::string> topological_order() override { return std::vector<std::string>(); }
void dump_cpt() const override {};
protected:
void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters);
nlohmann::json hyperparameters;
void trainModel(const torch::Tensor& weights) override {};
private:
PyWrap* pyWrap;
std::string module;
std::string className;
bool sklearn;
clfId_t id;
bool fitted;
};
} /* namespace pywrap */
#endif /* PYCLASSIFIER_H */

15
src/PyClassifiers/PyClf.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef PYCLF_H
#define PYCLF_H
#include <string>
#include "DotEnv.h"
namespace PyClassifiers {
class PyClf {
public:
PyClf(const std::string& name);
virtual ~PyClf();
private:
std::string name;
};
} /* namespace PyClassifiers */
#endif /* PYCLF_H */

View File

@ -0,0 +1,87 @@
#ifndef PYHELPER_HPP
#define PYHELPER_HPP
#pragma once
// Code taken and adapted from
// https ://www.codeproject.com/Articles/820116/Embedding-Python-program-in-a-C-Cplusplus-code
#include "boost/python/detail/wrap_python.hpp"
#include <boost/python/numpy.hpp>
#include <iostream>
namespace pywrap {
namespace p = boost::python;
namespace np = boost::python::numpy;
class CPyInstance {
public:
CPyInstance()
{
Py_Initialize();
np::initialize();
}
~CPyInstance()
{
Py_Finalize();
}
};
class CPyObject {
private:
PyObject* p;
public:
CPyObject() : p(NULL)
{
}
CPyObject(PyObject* _p) : p(_p)
{
}
~CPyObject()
{
Release();
}
PyObject* getObject()
{
return p;
}
PyObject* setObject(PyObject* _p)
{
return (p = _p);
}
PyObject* AddRef()
{
if (p) {
Py_INCREF(p);
}
return p;
}
void Release()
{
if (p) {
Py_XDECREF(p);
}
p = NULL;
}
PyObject* operator ->()
{
return p;
}
bool is()
{
return p ? true : false;
}
operator PyObject* ()
{
return p;
}
PyObject* operator = (PyObject* pp)
{
p = pp;
return p;
}
operator bool()
{
return p ? true : false;
}
};
} /* namespace pywrap */
#endif

192
src/PyClassifiers/PyWrap.cc Normal file
View File

@ -0,0 +1,192 @@
#define PY_SSIZE_T_CLEAN
#include <stdexcept>
#include "PyWrap.h"
#include <string>
#include <map>
#include <sstream>
#include <boost/python/numpy.hpp>
#include <iostream>
namespace pywrap {
namespace np = boost::python::numpy;
PyWrap* PyWrap::wrapper = nullptr;
std::mutex PyWrap::mutex;
CPyInstance* PyWrap::pyInstance = nullptr;
auto moduleClassMap = std::map<std::pair<std::string, std::string>, std::tuple<PyObject*, PyObject*, PyObject*>>();
PyWrap* PyWrap::GetInstance()
{
std::lock_guard<std::mutex> lock(mutex);
if (wrapper == nullptr) {
wrapper = new PyWrap();
pyInstance = new CPyInstance();
PyRun_SimpleString("import warnings;warnings.filterwarnings('ignore')");
}
return wrapper;
}
void PyWrap::RemoveInstance()
{
if (wrapper != nullptr) {
if (pyInstance != nullptr) {
delete pyInstance;
}
pyInstance = nullptr;
if (wrapper != nullptr) {
delete wrapper;
}
wrapper = nullptr;
}
}
void PyWrap::importClass(const clfId_t id, const std::string& moduleName, const std::string& className)
{
std::lock_guard<std::mutex> lock(mutex);
auto result = moduleClassMap.find(id);
if (result != moduleClassMap.end()) {
return;
}
PyObject* module = PyImport_ImportModule(moduleName.c_str());
if (PyErr_Occurred()) {
errorAbort("Couldn't import module " + moduleName);
}
PyObject* classObject = PyObject_GetAttrString(module, className.c_str());
if (PyErr_Occurred()) {
errorAbort("Couldn't find class " + className);
}
PyObject* instance = PyObject_CallObject(classObject, NULL);
if (PyErr_Occurred()) {
errorAbort("Couldn't create instance of class " + className);
}
moduleClassMap.insert({ id, { module, classObject, instance } });
}
void PyWrap::clean(const clfId_t id)
{
// Remove Python interpreter if no more modules imported left
std::lock_guard<std::mutex> lock(mutex);
auto result = moduleClassMap.find(id);
if (result == moduleClassMap.end()) {
return;
}
Py_DECREF(std::get<0>(result->second));
Py_DECREF(std::get<1>(result->second));
Py_DECREF(std::get<2>(result->second));
moduleClassMap.erase(result);
if (PyErr_Occurred()) {
PyErr_Print();
errorAbort("Error cleaning module ");
}
// With boost you can't remove the interpreter
// https://www.boost.org/doc/libs/1_83_0/libs/python/doc/html/tutorial/tutorial/embedding.html#tutorial.embedding.getting_started
// if (moduleClassMap.empty()) {
// RemoveInstance();
// }
}
void PyWrap::errorAbort(const std::string& message)
{
std::cerr << message << std::endl;
PyErr_Print();
RemoveInstance();
exit(1);
}
PyObject* PyWrap::getClass(const clfId_t id)
{
auto item = moduleClassMap.find(id);
if (item == moduleClassMap.end()) {
errorAbort("Module not found");
}
return std::get<2>(item->second);
}
std::string PyWrap::callMethodString(const clfId_t id, const std::string& method)
{
PyObject* instance = getClass(id);
PyObject* result;
try {
if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL)))
errorAbort("Couldn't call method " + method);
}
catch (const std::exception& e) {
errorAbort(e.what());
}
std::string value = PyUnicode_AsUTF8(result);
Py_XDECREF(result);
return value;
}
std::string PyWrap::sklearnVersion()
{
return "1.0";
// CPyObject data = PyRun_SimpleString("import sklearn;return sklearn.__version__");
// std::string result = PyUnicode_AsUTF8(data);
// return result;
}
std::string PyWrap::version(const clfId_t id)
{
return callMethodString(id, "version");
}
void PyWrap::setHyperparameters(const clfId_t id, const json& hyperparameters)
{
// Set hyperparameters as attributes of the class
PyObject* pValue;
PyObject* instance = getClass(id);
for (const auto& [key, value] : hyperparameters.items()) {
std::stringstream oss;
oss << value.type_name();
if (oss.str() == "string") {
pValue = Py_BuildValue("s", value.get<std::string>().c_str());
} else {
if (value.is_number_integer()) {
pValue = Py_BuildValue("i", value.get<int>());
} else {
pValue = Py_BuildValue("f", value.get<double>());
}
}
int res = PyObject_SetAttrString(instance, key.c_str(), pValue);
if (res == -1 && PyErr_Occurred()) {
Py_XDECREF(pValue);
errorAbort("Couldn't set attribute " + key + "=" + value.dump());
}
Py_XDECREF(pValue);
}
}
void PyWrap::fit(const clfId_t id, CPyObject& X, CPyObject& y)
{
PyObject* instance = getClass(id);
CPyObject result;
CPyObject method = PyUnicode_FromString("fit");
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL)))
errorAbort("Couldn't call method fit");
}
catch (const std::exception& e) {
errorAbort(e.what());
}
}
PyObject* PyWrap::predict(const clfId_t id, CPyObject& X)
{
PyObject* instance = getClass(id);
PyObject* result;
CPyObject method = PyUnicode_FromString("predict");
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), NULL)))
errorAbort("Couldn't call method predict");
}
catch (const std::exception& e) {
errorAbort(e.what());
}
Py_INCREF(result);
return result; // Caller must free this object
}
double PyWrap::score(const clfId_t id, CPyObject& X, CPyObject& y)
{
PyObject* instance = getClass(id);
CPyObject result;
CPyObject method = PyUnicode_FromString("score");
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL)))
errorAbort("Couldn't call method score");
}
catch (const std::exception& e) {
errorAbort(e.what());
}
double resultValue = PyFloat_AsDouble(result);
return resultValue;
}
}

View File

@ -0,0 +1,47 @@
#ifndef PYWRAP_H
#define PYWRAP_H
#include "boost/python/detail/wrap_python.hpp"
#include <string>
#include <map>
#include <tuple>
#include <mutex>
#include <nlohmann/json.hpp>
#include "PyHelper.hpp"
#include "TypeId.h"
#pragma once
namespace pywrap {
/*
Singleton class to handle Python/numpy interpreter.
*/
using json = nlohmann::json;
class PyWrap {
public:
PyWrap() = default;
PyWrap(PyWrap& other) = delete;
static PyWrap* GetInstance();
void operator=(const PyWrap&) = delete;
~PyWrap() = default;
std::string callMethodString(const clfId_t id, const std::string& method);
std::string sklearnVersion();
std::string version(const clfId_t id);
void setHyperparameters(const clfId_t id, const json& hyperparameters);
void fit(const clfId_t id, CPyObject& X, CPyObject& y);
PyObject* predict(const clfId_t id, CPyObject& X);
double score(const clfId_t id, CPyObject& X, CPyObject& y);
void clean(const clfId_t id);
void importClass(const clfId_t id, const std::string& moduleName, const std::string& className);
PyObject* getClass(const clfId_t id);
private:
// Only call RemoveInstance from clean method
static void RemoveInstance();
void errorAbort(const std::string& message);
// No need to use static map here, since this class is a singleton
std::map<clfId_t, std::tuple<PyObject*, PyObject*, PyObject*>> moduleClassMap;
static CPyInstance* pyInstance;
static PyWrap* wrapper;
static std::mutex mutex;
};
} /* namespace pywrap */
#endif /* PYWRAP_H */

Some files were not shown because too many files have changed in this diff Show More