diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a4515f..1f8b8c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # Options # ------- @@ -41,11 +42,11 @@ if(Boost_FOUND) include_directories(${Boost_INCLUDE_DIRS}) endif() -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # CMakes modules # -------------- set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) include(AddGitSubmodule) + if (CODE_COVERAGE) enable_testing() include(CodeCoverage) diff --git a/lib/Files/ArffFiles.cc b/lib/Files/ArffFiles.cc index 4039b0b..99f29bd 100644 --- a/lib/Files/ArffFiles.cc +++ b/lib/Files/ArffFiles.cc @@ -4,11 +4,9 @@ #include #include -using namespace std; - ArffFiles::ArffFiles() = default; -vector ArffFiles::getLines() const +std::vector ArffFiles::getLines() const { return lines; } @@ -18,48 +16,48 @@ unsigned long int ArffFiles::getSize() const return lines.size(); } -vector> ArffFiles::getAttributes() const +std::vector> ArffFiles::getAttributes() const { return attributes; } -string ArffFiles::getClassName() const +std::string ArffFiles::getClassName() const { return className; } -string ArffFiles::getClassType() const +std::string ArffFiles::getClassType() const { return classType; } -vector>& ArffFiles::getX() +std::vector>& ArffFiles::getX() { return X; } -vector& ArffFiles::getY() +std::vector& ArffFiles::getY() { return y; } -void ArffFiles::loadCommon(string fileName) +void ArffFiles::loadCommon(std::string fileName) { - ifstream file(fileName); + std::ifstream file(fileName); if (!file.is_open()) { - throw invalid_argument("Unable to open file"); + throw std::invalid_argument("Unable to open file"); } - string line; - string keyword; - string attribute; - string type; - string type_w; + std::string line; + std::string keyword; + std::string attribute; + std::string type; + std::string type_w; while (getline(file, line)) { if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { continue; } - if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { - stringstream ss(line); + if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) { + std::stringstream ss(line); ss >> keyword >> attribute; type = ""; while (ss >> type_w) @@ -74,35 +72,35 @@ void ArffFiles::loadCommon(string fileName) } file.close(); if (attributes.empty()) - throw invalid_argument("No attributes found"); + throw std::invalid_argument("No attributes found"); } -void ArffFiles::load(const string& fileName, bool classLast) +void ArffFiles::load(const std::string& fileName, bool classLast) { int labelIndex; loadCommon(fileName); if (classLast) { - className = get<0>(attributes.back()); - classType = get<1>(attributes.back()); + className = std::get<0>(attributes.back()); + classType = std::get<1>(attributes.back()); attributes.pop_back(); labelIndex = static_cast(attributes.size()); } else { - className = get<0>(attributes.front()); - classType = get<1>(attributes.front()); + className = std::get<0>(attributes.front()); + classType = std::get<1>(attributes.front()); attributes.erase(attributes.begin()); labelIndex = 0; } generateDataset(labelIndex); } -void ArffFiles::load(const string& fileName, const string& name) +void ArffFiles::load(const std::string& fileName, const std::string& name) { int labelIndex; loadCommon(fileName); bool found = false; for (int i = 0; i < attributes.size(); ++i) { if (attributes[i].first == name) { - className = get<0>(attributes[i]); - classType = get<1>(attributes[i]); + className = std::get<0>(attributes[i]); + classType = std::get<1>(attributes[i]); attributes.erase(attributes.begin() + i); labelIndex = i; found = true; @@ -110,19 +108,19 @@ void ArffFiles::load(const string& fileName, const string& name) } } if (!found) { - throw invalid_argument("Class name not found"); + throw std::invalid_argument("Class name not found"); } generateDataset(labelIndex); } void ArffFiles::generateDataset(int labelIndex) { - X = vector>(attributes.size(), vector(lines.size())); - auto yy = vector(lines.size(), ""); - auto removeLines = vector(); // Lines with missing values + X = std::vector>(attributes.size(), std::vector(lines.size())); + auto yy = std::vector(lines.size(), ""); + auto removeLines = std::vector(); // Lines with missing values for (size_t i = 0; i < lines.size(); i++) { - stringstream ss(lines[i]); - string value; + std::stringstream ss(lines[i]); + std::string value; int pos = 0; int xIndex = 0; while (getline(ss, value, ',')) { @@ -146,21 +144,21 @@ void ArffFiles::generateDataset(int labelIndex) y = factorize(yy); } -string ArffFiles::trim(const string& source) +std::string ArffFiles::trim(const std::string& source) { - string s(source); + std::string s(source); s.erase(0, s.find_first_not_of(" '\n\r\t")); s.erase(s.find_last_not_of(" '\n\r\t") + 1); return s; } -vector ArffFiles::factorize(const vector& labels_t) +std::vector ArffFiles::factorize(const std::vector& labels_t) { - vector yy; + std::vector yy; yy.reserve(labels_t.size()); - map labelMap; + std::map labelMap; int i = 0; - for (const string& label : labels_t) { + for (const std::string& label : labels_t) { if (labelMap.find(label) == labelMap.end()) { labelMap[label] = i++; } diff --git a/lib/Files/ArffFiles.h b/lib/Files/ArffFiles.h index 5cacb27..25e5a8c 100644 --- a/lib/Files/ArffFiles.h +++ b/lib/Files/ArffFiles.h @@ -4,31 +4,29 @@ #include #include -using namespace std; - class ArffFiles { private: - vector lines; - vector> attributes; - string className; - string classType; - vector> X; - vector y; + std::vector lines; + std::vector> attributes; + std::string className; + std::string classType; + std::vector> X; + std::vector y; void generateDataset(int); - void loadCommon(string); + void loadCommon(std::string); public: ArffFiles(); - void load(const string&, bool = true); - void load(const string&, const string&); - vector getLines() const; + void load(const std::string&, bool = true); + void load(const std::string&, const std::string&); + std::vector getLines() const; unsigned long int getSize() const; - string getClassName() const; - string getClassType() const; - static string trim(const string&); - vector>& getX(); - vector& getY(); - vector> getAttributes() const; - static vector factorize(const vector& labels_t); + std::string getClassName() const; + std::string getClassType() const; + static std::string trim(const std::string&); + std::vector>& getX(); + std::vector& getY(); + std::vector> getAttributes() const; + static std::vector factorize(const std::vector& labels_t); }; #endif \ No newline at end of file diff --git a/sample/sample.cc b/sample/sample.cc index 10b80ba..ef17fb5 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -12,14 +12,12 @@ #include "modelRegister.h" #include -using namespace std; +const std::string PATH = "../../data/"; -const string PATH = "../../data/"; - -pair, map> discretize(vector& X, mdlp::labels_t& y, vector features) +pair, map> discretize(std::vector& X, mdlp::labels_t& y, std::vector features) { - vectorXd; - map maxes; + std::vectorXd; + map maxes; auto fimdlp = mdlp::CPPFImdlp(); for (int i = 0; i < X.size(); i++) { @@ -31,7 +29,7 @@ pair, map> discretize(vector>, vector> extract_indices(vector indices, vector> X, vector y) +pair>, std::vector> extract_indices(std::vector indices, std::vector> X, std::vector y) { - vector> Xr; // nxm - vector yr; + std::vector> Xr; // nxm + std::vector yr; for (int col = 0; col < X.size(); ++col) { - Xr.push_back(vector()); + Xr.push_back(std::vector()); } for (auto index : indices) { for (int col = 0; col < X.size(); ++col) { @@ -58,7 +56,7 @@ pair>, vector> extract_indices(vector indices, vect int main(int argc, char** argv) { - map datasets = { + map datasets = { {"diabetes", true}, {"ecoli", true}, {"glass", true}, @@ -68,13 +66,13 @@ int main(int argc, char** argv) {"liver-disorders", true}, {"mfeat-factors", true}, }; - auto valid_datasets = vector(); + auto valid_datasets = std::vector(); transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets), - [](const pair& pair) { return pair.first; }); + [](const pair& pair) { return pair.first; }); argparse::ArgumentParser program("BayesNetSample"); program.add_argument("-d", "--dataset") .help("Dataset file name") - .action([valid_datasets](const std::string& value) { + .action([valid_datasets](const std::std::std::string& value) { if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { return value; } @@ -83,23 +81,23 @@ int main(int argc, char** argv) ); program.add_argument("-p", "--path") .help(" folder where the data files are located, default") - .default_value(string{ PATH } + .default_value(std::string{ PATH } ); program.add_argument("-m", "--model") - .help("Model to use " + platform::Models::instance()->toString()) - .action([](const std::string& value) { - static const vector choices = platform::Models::instance()->getNames(); + .help("Model to use " + platform::Models::instance()->tostd::string()) + .action([](const std::std::std::string& value) { + static const std::vector choices = platform::Models::instance()->getNames(); if (find(choices.begin(), choices.end(), value) != choices.end()) { return value; } - throw runtime_error("Model must be one of " + platform::Models::instance()->toString()); + throw runtime_error("Model must be one of " + platform::Models::instance()->tostd::string()); } ); program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); - program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { + program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const std::std::string& value) { try { auto k = stoi(value); if (k < 2) { @@ -115,13 +113,13 @@ int main(int argc, char** argv) }}); program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); bool class_last, stratified, tensors, dump_cpt; - string model_name, file_name, path, complete_file_name; + std::string model_name, file_name, path, complete_file_name; int nFolds, seed; try { program.parse_args(argc, argv); - file_name = program.get("dataset"); - path = program.get("path"); - model_name = program.get("model"); + file_name = program.get("dataset"); + path = program.get("path"); + model_name = program.get("model"); complete_file_name = path + file_name + ".arff"; stratified = program.get("stratified"); tensors = program.get("tensors"); @@ -134,7 +132,7 @@ int main(int argc, char** argv) } } catch (const exception& err) { - cerr << err.what() << endl; + cerr << err.what() << std::endl; cerr << program; exit(1); } @@ -145,50 +143,50 @@ int main(int argc, char** argv) auto handler = ArffFiles(); handler.load(complete_file_name, class_last); // Get Dataset X, y - vector& X = handler.getX(); + std::vector& X = handler.getX(); mdlp::labels_t& y = handler.getY(); // Get className & Features auto className = handler.getClassName(); - vector features; + std::vector features; auto attributes = handler.getAttributes(); transform(attributes.begin(), attributes.end(), back_inserter(features), - [](const pair& item) { return item.first; }); + [](const pair& item) { return item.first; }); // Discretize Dataset auto [Xd, maxes] = discretize(X, y, features); maxes[className] = *max_element(y.begin(), y.end()) + 1; - map> states; + map> states; for (auto feature : features) { - states[feature] = vector(maxes[feature]); + states[feature] = std::vector(maxes[feature]); } - states[className] = vector(maxes[className]); + states[className] = std::vector(maxes[className]); auto clf = platform::Models::instance()->create(model_name); clf->fit(Xd, y, features, className, states); if (dump_cpt) { - cout << "--- CPT Tables ---" << endl; + std::cout << "--- CPT Tables ---" << std::endl; clf->dump_cpt(); } auto lines = clf->show(); for (auto line : lines) { - cout << line << endl; + std::cout << line << std::endl; } - cout << "--- Topological Order ---" << endl; + std::cout << "--- Topological Order ---" << std::endl; auto order = clf->topological_order(); for (auto name : order) { - cout << name << ", "; + std::cout << name << ", "; } - cout << "end." << endl; + std::cout << "end." << std::endl; auto score = clf->score(Xd, y); - cout << "Score: " << score << endl; + std::cout << "Score: " << score << std::endl; auto graph = clf->graph(); auto dot_file = model_name + "_" + file_name; ofstream file(dot_file + ".dot"); file << graph; file.close(); - cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl; - cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl; - string stratified_string = stratified ? " Stratified" : ""; - cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl; - cout << "==========================================" << endl; + std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl; + std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl; + std::string stratified_std::string = stratified ? " Stratified" : ""; + std::cout << nFolds << " Folds" << stratified_std::string << " Cross validation" << std::endl; + std::cout << "==========================================" << std::endl; torch::Tensor Xt = torch::zeros({ static_cast(Xd.size()), static_cast(Xd[0].size()) }, torch::kInt32); torch::Tensor yt = torch::tensor(y, torch::kInt32); for (int i = 0; i < features.size(); ++i) { @@ -202,7 +200,7 @@ int main(int argc, char** argv) fold = new platform::KFold(nFolds, y.size(), seed); for (auto i = 0; i < nFolds; ++i) { auto [train, test] = fold->getFold(i); - cout << "Fold: " << i + 1 << endl; + std::cout << "Fold: " << i + 1 << std::endl; if (tensors) { auto ttrain = torch::tensor(train, torch::kInt64); auto ttest = torch::tensor(test, torch::kInt64); @@ -222,16 +220,16 @@ int main(int argc, char** argv) score_test = clf->score(Xtest, ytest); } if (dump_cpt) { - cout << "--- CPT Tables ---" << endl; + std::cout << "--- CPT Tables ---" << std::endl; clf->dump_cpt(); } total_score_train += score_train; total_score += score_test; - cout << "Score Train: " << score_train << endl; - cout << "Score Test : " << score_test << endl; - cout << "-------------------------------------------------------------------------------" << endl; + std::cout << "Score Train: " << score_train << std::endl; + std::cout << "Score Test : " << score_test << std::endl; + std::cout << "-------------------------------------------------------------------------------" << std::endl; } - cout << "**********************************************************************************" << endl; - cout << "Average Score Train: " << total_score_train / nFolds << endl; - cout << "Average Score Test : " << total_score / nFolds << endl;return 0; + std::cout << "**********************************************************************************" << std::endl; + std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl; + std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0; } \ No newline at end of file diff --git a/src/BayesNet/AODE.cc b/src/BayesNet/AODE.cc index 6db843e..850980c 100644 --- a/src/BayesNet/AODE.cc +++ b/src/BayesNet/AODE.cc @@ -9,9 +9,9 @@ namespace bayesnet { models.push_back(std::make_unique(i)); } n_models = models.size(); - significanceModels = vector(n_models, 1.0); + significanceModels = std::vector(n_models, 1.0); } - vector AODE::graph(const string& title) const + std::vector AODE::graph(const std::string& title) const { return Ensemble::graph(title); } diff --git a/src/BayesNet/AODE.h b/src/BayesNet/AODE.h index 00965f6..98f87fe 100644 --- a/src/BayesNet/AODE.h +++ b/src/BayesNet/AODE.h @@ -9,7 +9,7 @@ namespace bayesnet { public: AODE(); virtual ~AODE() {}; - vector graph(const string& title = "AODE") const override; + std::vector graph(const std::string& title = "AODE") const override; }; } #endif \ No newline at end of file diff --git a/src/BayesNet/AODELd.cc b/src/BayesNet/AODELd.cc index 6f80715..fc899a9 100644 --- a/src/BayesNet/AODELd.cc +++ b/src/BayesNet/AODELd.cc @@ -2,16 +2,15 @@ #include "Models.h" namespace bayesnet { - using namespace std; AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {} - AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector& features_, const string& className_, map>& states_) + AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) { checkInput(X_, y_); features = features_; className = className_; Xf = X_; y = y_; - // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y + // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network @@ -26,7 +25,7 @@ namespace bayesnet { models.push_back(std::make_unique(i)); } n_models = models.size(); - significanceModels = vector(n_models, 1.0); + significanceModels = std::vector(n_models, 1.0); } void AODELd::trainModel(const torch::Tensor& weights) { @@ -34,7 +33,7 @@ namespace bayesnet { model->fit(Xf, y, features, className, states); } } - vector AODELd::graph(const string& name) const + std::vector AODELd::graph(const std::string& name) const { return Ensemble::graph(name); } diff --git a/src/BayesNet/AODELd.h b/src/BayesNet/AODELd.h index cdb8822..c8c3347 100644 --- a/src/BayesNet/AODELd.h +++ b/src/BayesNet/AODELd.h @@ -5,17 +5,16 @@ #include "SPODELd.h" namespace bayesnet { - using namespace std; class AODELd : public Ensemble, public Proposal { protected: void trainModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override; public: AODELd(); - AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const vector& features_, const string& className_, map>& states_) override; + AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) override; virtual ~AODELd() = default; - vector graph(const string& name = "AODELd") const override; - static inline string version() { return "0.0.1"; }; + std::vector graph(const std::string& name = "AODELd") const override; + static inline std::string version() { return "0.0.1"; }; }; } #endif // !AODELD_H \ No newline at end of file diff --git a/src/BayesNet/BaseClassifier.h b/src/BayesNet/BaseClassifier.h index 5337e78..a4be506 100644 --- a/src/BayesNet/BaseClassifier.h +++ b/src/BayesNet/BaseClassifier.h @@ -4,31 +4,30 @@ #include #include namespace bayesnet { - using namespace std; enum status_t { NORMAL, WARNING, ERROR }; class BaseClassifier { protected: virtual void trainModel(const torch::Tensor& weights) = 0; public: - // X is nxm vector, y is nx1 vector - virtual BaseClassifier& fit(vector>& X, vector& y, const vector& features, const string& className, map>& states) = 0; + // X is nxm std::vector, y is nx1 std::vector + virtual BaseClassifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) = 0; // X is nxm tensor, y is nx1 tensor - virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const vector& features, const string& className, map>& states) = 0; - virtual BaseClassifier& fit(torch::Tensor& dataset, const vector& features, const string& className, map>& states) = 0; - virtual BaseClassifier& fit(torch::Tensor& dataset, const vector& features, const string& className, map>& states, const torch::Tensor& weights) = 0; + virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) = 0; + virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) = 0; + virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) = 0; virtual ~BaseClassifier() = default; torch::Tensor virtual predict(torch::Tensor& X) = 0; - vector virtual predict(vector>& X) = 0; + std::vector virtual predict(std::vector>& X) = 0; status_t virtual getStatus() const = 0; - float virtual score(vector>& X, vector& y) = 0; + float virtual score(std::vector>& X, std::vector& y) = 0; float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; int virtual getNumberOfNodes()const = 0; int virtual getNumberOfEdges()const = 0; int virtual getNumberOfStates() const = 0; - vector virtual show() const = 0; - vector virtual graph(const string& title = "") const = 0; - const string inline getVersion() const { return "0.2.0"; }; - vector virtual topological_order() = 0; + std::vector virtual show() const = 0; + std::vector virtual graph(const std::string& title = "") const = 0; + const std::string inline getVersion() const { return "0.2.0"; }; + std::vector virtual topological_order() = 0; void virtual dump_cpt()const = 0; virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0; }; diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesNet/BayesMetrics.cc index 6bd3bbb..4e2633f 100644 --- a/src/BayesNet/BayesMetrics.cc +++ b/src/BayesNet/BayesMetrics.cc @@ -2,15 +2,15 @@ #include "Mst.h" namespace bayesnet { //samples is n+1xm tensor used to fit the model - Metrics::Metrics(const torch::Tensor& samples, const vector& features, const string& className, const int classNumStates) + Metrics::Metrics(const torch::Tensor& samples, const std::vector& features, const std::string& className, const int classNumStates) : samples(samples) , features(features) , className(className) , classNumStates(classNumStates) { } - //samples is nxm vector used to fit the model - Metrics::Metrics(const vector>& vsamples, const vector& labels, const vector& features, const string& className, const int classNumStates) + //samples is nxm std::vector used to fit the model + Metrics::Metrics(const std::vector>& vsamples, const std::vector& labels, const std::vector& features, const std::string& className, const int classNumStates) : features(features) , className(className) , classNumStates(classNumStates) @@ -21,7 +21,7 @@ namespace bayesnet { } samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); } - vector Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k) + std::vector Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k) { // Return the K Best features auto n = samples.size(0) - 1; @@ -56,15 +56,15 @@ namespace bayesnet { } return featuresKBest; } - vector Metrics::getScoresKBest() const + std::vector Metrics::getScoresKBest() const { return scoresKBest; } torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights) { - auto result = vector(); - auto source = vector(features); + auto result = std::vector(); + auto source = std::vector(features); source.push_back(className); auto combinations = doCombinations(source); // Compute class prior @@ -100,7 +100,7 @@ namespace bayesnet { return matrix; } // To use in Python - vector Metrics::conditionalEdgeWeights(vector& weights_) + std::vector Metrics::conditionalEdgeWeights(std::vector& weights_) { const torch::Tensor weights = torch::tensor(weights_); auto matrix = conditionalEdge(weights); @@ -121,7 +121,7 @@ namespace bayesnet { { int numSamples = firstFeature.sizes()[0]; torch::Tensor featureCounts = secondFeature.bincount(weights); - unordered_map> jointCounts; + std::unordered_map> jointCounts; double totalWeight = 0; for (auto i = 0; i < numSamples; i++) { jointCounts[secondFeature[i].item()][firstFeature[i].item()] += weights[i].item(); @@ -155,7 +155,7 @@ namespace bayesnet { and the indices of the weights as nodes of this square matrix using Kruskal algorithm */ - vector> Metrics::maximumSpanningTree(const vector& features, const Tensor& weights, const int root) + std::vector> Metrics::maximumSpanningTree(const std::vector& features, const torch::Tensor& weights, const int root) { auto mst = MST(features, weights, root); return mst.maximumSpanningTree(); diff --git a/src/BayesNet/BayesMetrics.h b/src/BayesNet/BayesMetrics.h index 66016a6..5d2b927 100644 --- a/src/BayesNet/BayesMetrics.h +++ b/src/BayesNet/BayesMetrics.h @@ -4,23 +4,21 @@ #include #include namespace bayesnet { - using namespace std; - using namespace torch; class Metrics { private: int classNumStates = 0; - vector scoresKBest; - vector featuresKBest; // sorted indices of the features - double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights); + std::vector scoresKBest; + std::vector featuresKBest; // sorted indices of the features + double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights); protected: - Tensor samples; // n+1xm tensor used to fit the model where samples[-1] is the y vector - string className; - double entropy(const Tensor& feature, const Tensor& weights); - vector features; + torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector + std::string className; + double entropy(const torch::Tensor& feature, const torch::Tensor& weights); + std::vector features; template - vector> doCombinations(const vector& source) + std::vector> doCombinations(const std::vector& source) { - vector> result; + std::vector> result; for (int i = 0; i < source.size(); ++i) { T temp = source[i]; for (int j = i + 1; j < source.size(); ++j) { @@ -30,7 +28,7 @@ namespace bayesnet { return result; } template - T pop_first(vector& v) + T pop_first(std::vector& v) { T temp = v[0]; v.erase(v.begin()); @@ -38,14 +36,14 @@ namespace bayesnet { } public: Metrics() = default; - Metrics(const torch::Tensor& samples, const vector& features, const string& className, const int classNumStates); - Metrics(const vector>& vsamples, const vector& labels, const vector& features, const string& className, const int classNumStates); - vector SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0); - vector getScoresKBest() const; - double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights); - vector conditionalEdgeWeights(vector& weights); // To use in Python - Tensor conditionalEdge(const torch::Tensor& weights); - vector> maximumSpanningTree(const vector& features, const Tensor& weights, const int root); + Metrics(const torch::Tensor& samples, const std::vector& features, const std::string& className, const int classNumStates); + Metrics(const std::vector>& vsamples, const std::vector& labels, const std::vector& features, const std::string& className, const int classNumStates); + std::vector SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0); + std::vector getScoresKBest() const; + double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights); + std::vector conditionalEdgeWeights(std::vector& weights); // To use in Python + torch::Tensor conditionalEdge(const torch::Tensor& weights); + std::vector> maximumSpanningTree(const std::vector& features, const torch::Tensor& weights, const int root); }; } #endif \ No newline at end of file diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc index e88a4da..aed067d 100644 --- a/src/BayesNet/BoostAODE.cc +++ b/src/BayesNet/BoostAODE.cc @@ -46,7 +46,7 @@ namespace bayesnet { void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters) { // Check if hyperparameters are valid - const vector validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" }; + const std::vector validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" }; checkHyperparameters(validKeys, hyperparameters); if (hyperparameters.contains("repeatSparent")) { repeatSparent = hyperparameters["repeatSparent"]; @@ -65,38 +65,38 @@ namespace bayesnet { } if (hyperparameters.contains("select_features")) { auto selectedAlgorithm = hyperparameters["select_features"]; - vector algos = { "IWSS", "FCBF", "CFS" }; + std::vector algos = { "IWSS", "FCBF", "CFS" }; selectFeatures = true; algorithm = selectedAlgorithm; - if (find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) { - throw invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]"); + if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) { + throw std::invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]"); } } } - unordered_set BoostAODE::initializeModels() + std::unordered_set BoostAODE::initializeModels() { - unordered_set featuresUsed; - Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); + std::unordered_set featuresUsed; + torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); int maxFeatures = 0; if (algorithm == "CFS") { featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_); } else if (algorithm == "IWSS") { if (threshold < 0 || threshold >0.5) { - throw invalid_argument("Invalid threshold value for IWSS [0, 0.5]"); + throw std::invalid_argument("Invalid threshold value for IWSS [0, 0.5]"); } featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); } else if (algorithm == "FCBF") { if (threshold < 1e-7 || threshold > 1) { - throw invalid_argument("Invalid threshold value [1e-7, 1]"); + throw std::invalid_argument("Invalid threshold value [1e-7, 1]"); } featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); } featureSelector->fit(); auto cfsFeatures = featureSelector->getFeatures(); for (const int& feature : cfsFeatures) { - // cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl; + // std::cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << std::endl; featuresUsed.insert(feature); - unique_ptr model = std::make_unique(feature); + std::unique_ptr model = std::make_unique(feature); model->fit(dataset, features, className, states, weights_); models.push_back(std::move(model)); significanceModels.push_back(1.0); @@ -107,13 +107,13 @@ namespace bayesnet { } void BoostAODE::trainModel(const torch::Tensor& weights) { - unordered_set featuresUsed; + std::unordered_set featuresUsed; if (selectFeatures) { featuresUsed = initializeModels(); } if (maxModels == 0) maxModels = .1 * n > 10 ? .1 * n : n; - Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); + torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); bool exitCondition = false; // Variables to control the accuracy finish condition double priorAccuracy = 0.0; @@ -130,12 +130,12 @@ namespace bayesnet { while (!exitCondition) { // Step 1: Build ranking with mutual information auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted - unique_ptr model; + std::unique_ptr model; auto feature = featureSelection[0]; if (!repeatSparent || featuresUsed.size() < featureSelection.size()) { bool used = true; for (const auto& feat : featureSelection) { - if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) { + if (std::find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) { continue; } used = false; @@ -188,7 +188,7 @@ namespace bayesnet { status = WARNING; } } - vector BoostAODE::graph(const string& title) const + std::vector BoostAODE::graph(const std::string& title) const { return Ensemble::graph(title); } diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h index dd1cf75..cc45cba 100644 --- a/src/BayesNet/BoostAODE.h +++ b/src/BayesNet/BoostAODE.h @@ -9,7 +9,7 @@ namespace bayesnet { public: BoostAODE(); virtual ~BoostAODE() {}; - vector graph(const string& title = "BoostAODE") const override; + std::vector graph(const std::string& title = "BoostAODE") const override; void setHyperparameters(nlohmann::json& hyperparameters) override; protected: void buildModel(const torch::Tensor& weights) override; @@ -17,14 +17,14 @@ namespace bayesnet { private: torch::Tensor dataset_; torch::Tensor X_train, y_train, X_test, y_test; - unordered_set initializeModels(); + std::unordered_set initializeModels(); // Hyperparameters bool repeatSparent = false; // if true, a feature can be selected more than once int maxModels = 0; bool ascending = false; //Process KBest features ascending or descending order bool convergence = false; //if true, stop when the model does not improve bool selectFeatures = false; // if true, use feature selection - string algorithm = ""; // Selected feature selection algorithm + std::string algorithm = ""; // Selected feature selection algorithm FeatureSelect* featureSelector = nullptr; double threshold = -1; }; diff --git a/src/BayesNet/CFS.cc b/src/BayesNet/CFS.cc index f2ffc1e..24a489f 100644 --- a/src/BayesNet/CFS.cc +++ b/src/BayesNet/CFS.cc @@ -13,7 +13,7 @@ namespace bayesnet { selectedScores.push_back(suLabels[feature]); selectedFeatures.erase(selectedFeatures.begin()); while (continueCondition) { - double merit = numeric_limits::lowest(); + double merit = std::numeric_limits::lowest(); int bestFeature = -1; for (auto feature : featureOrder) { selectedFeatures.push_back(feature); @@ -36,7 +36,7 @@ namespace bayesnet { } fitted = true; } - bool CFS::computeContinueCondition(const vector& featureOrder) + bool CFS::computeContinueCondition(const std::vector& featureOrder) { if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) { return false; @@ -49,11 +49,11 @@ namespace bayesnet { subsets show no improvement over the current best subset." as stated in Mark A.Hall Thesis */ - double item_ant = numeric_limits::lowest(); + double item_ant = std::numeric_limits::lowest(); int num = 0; - vector lastFive(selectedScores.end() - 5, selectedScores.end()); + std::vector lastFive(selectedScores.end() - 5, selectedScores.end()); for (auto item : lastFive) { - if (item_ant == numeric_limits::lowest()) { + if (item_ant == std::numeric_limits::lowest()) { item_ant = item; } if (item > item_ant) { diff --git a/src/BayesNet/CFS.h b/src/BayesNet/CFS.h index 36b7c52..154ba55 100644 --- a/src/BayesNet/CFS.h +++ b/src/BayesNet/CFS.h @@ -3,19 +3,18 @@ #include #include #include "FeatureSelect.h" -using namespace std; namespace bayesnet { class CFS : public FeatureSelect { public: - // dataset is a n+1xm tensor of integers where dataset[-1] is the y vector - CFS(const torch::Tensor& samples, const vector& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) : + // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector + CFS(const torch::Tensor& samples, const std::vector& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) : FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights) { } virtual ~CFS() {}; void fit() override; private: - bool computeContinueCondition(const vector& featureOrder); + bool computeContinueCondition(const std::vector& featureOrder); }; } #endif \ No newline at end of file diff --git a/src/BayesNet/Classifier.cc b/src/BayesNet/Classifier.cc index a5b0420..53db344 100644 --- a/src/BayesNet/Classifier.cc +++ b/src/BayesNet/Classifier.cc @@ -2,10 +2,8 @@ #include "bayesnetUtils.h" namespace bayesnet { - using namespace torch; - Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} - Classifier& Classifier::build(const vector& features, const string& className, map>& states, const torch::Tensor& weights) + Classifier& Classifier::build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) { this->features = features; this->className = className; @@ -21,7 +19,7 @@ namespace bayesnet { fitted = true; return *this; } - void Classifier::buildDataset(Tensor& ytmp) + void Classifier::buildDataset(torch::Tensor& ytmp) { try { auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1); @@ -29,8 +27,8 @@ namespace bayesnet { } catch (const std::exception& e) { std::cerr << e.what() << '\n'; - cout << "X dimensions: " << dataset.sizes() << "\n"; - cout << "y dimensions: " << ytmp.sizes() << "\n"; + std::cout << "X dimensions: " << dataset.sizes() << "\n"; + std::cout << "y dimensions: " << ytmp.sizes() << "\n"; exit(1); } } @@ -39,7 +37,7 @@ namespace bayesnet { model.fit(dataset, weights, features, className, states); } // X is nxm where n is the number of features and m the number of samples - Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const vector& features, const string& className, map>& states) + Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) { dataset = X; buildDataset(y); @@ -47,24 +45,24 @@ namespace bayesnet { return build(features, className, states, weights); } // X is nxm where n is the number of features and m the number of samples - Classifier& Classifier::fit(vector>& X, vector& y, const vector& features, const string& className, map>& states) + Classifier& Classifier::fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) { - dataset = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, kInt32); + dataset = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kInt32); for (int i = 0; i < X.size(); ++i) { - dataset.index_put_({ i, "..." }, torch::tensor(X[i], kInt32)); + dataset.index_put_({ i, "..." }, torch::tensor(X[i], torch::kInt32)); } - auto ytmp = torch::tensor(y, kInt32); + auto ytmp = torch::tensor(y, torch::kInt32); buildDataset(ytmp); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); return build(features, className, states, weights); } - Classifier& Classifier::fit(torch::Tensor& dataset, const vector& features, const string& className, map>& states) + Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) { this->dataset = dataset; const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); return build(features, className, states, weights); } - Classifier& Classifier::fit(torch::Tensor& dataset, const vector& features, const string& className, map>& states, const torch::Tensor& weights) + Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) { this->dataset = dataset; return build(features, className, states, weights); @@ -72,57 +70,57 @@ namespace bayesnet { void Classifier::checkFitParameters() { if (torch::is_floating_point(dataset)) { - throw invalid_argument("dataset (X, y) must be of type Integer"); + throw std::invalid_argument("dataset (X, y) must be of type Integer"); } if (n != features.size()) { - throw invalid_argument("Classifier: X " + to_string(n) + " and features " + to_string(features.size()) + " must have the same number of features"); + throw std::invalid_argument("Classifier: X " + std::to_string(n) + " and features " + std::to_string(features.size()) + " must have the same number of features"); } if (states.find(className) == states.end()) { - throw invalid_argument("className not found in states"); + throw std::invalid_argument("className not found in states"); } for (auto feature : features) { if (states.find(feature) == states.end()) { - throw invalid_argument("feature [" + feature + "] not found in states"); + throw std::invalid_argument("feature [" + feature + "] not found in states"); } } } - Tensor Classifier::predict(Tensor& X) + torch::Tensor Classifier::predict(torch::Tensor& X) { if (!fitted) { - throw logic_error("Classifier has not been fitted"); + throw std::logic_error("Classifier has not been fitted"); } return model.predict(X); } - vector Classifier::predict(vector>& X) + std::vector Classifier::predict(std::vector>& X) { if (!fitted) { - throw logic_error("Classifier has not been fitted"); + throw std::logic_error("Classifier has not been fitted"); } auto m_ = X[0].size(); auto n_ = X.size(); - vector> Xd(n_, vector(m_, 0)); + std::vector> Xd(n_, std::vector(m_, 0)); for (auto i = 0; i < n_; i++) { - Xd[i] = vector(X[i].begin(), X[i].end()); + Xd[i] = std::vector(X[i].begin(), X[i].end()); } auto yp = model.predict(Xd); return yp; } - float Classifier::score(Tensor& X, Tensor& y) + float Classifier::score(torch::Tensor& X, torch::Tensor& y) { if (!fitted) { - throw logic_error("Classifier has not been fitted"); + throw std::logic_error("Classifier has not been fitted"); } - Tensor y_pred = predict(X); + torch::Tensor y_pred = predict(X); return (y_pred == y).sum().item() / y.size(0); } - float Classifier::score(vector>& X, vector& y) + float Classifier::score(std::vector>& X, std::vector& y) { if (!fitted) { - throw logic_error("Classifier has not been fitted"); + throw std::logic_error("Classifier has not been fitted"); } return model.score(X, y); } - vector Classifier::show() const + std::vector Classifier::show() const { return model.show(); } @@ -147,7 +145,7 @@ namespace bayesnet { { return fitted ? model.getStates() : 0; } - vector Classifier::topological_order() + std::vector Classifier::topological_order() { return model.topological_sort(); } @@ -155,18 +153,18 @@ namespace bayesnet { { model.dump_cpt(); } - void Classifier::checkHyperparameters(const vector& validKeys, nlohmann::json& hyperparameters) + void Classifier::checkHyperparameters(const std::vector& validKeys, nlohmann::json& hyperparameters) { for (const auto& item : hyperparameters.items()) { if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) { - throw invalid_argument("Hyperparameter " + item.key() + " is not valid"); + throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid"); } } } void Classifier::setHyperparameters(nlohmann::json& hyperparameters) { // Check if hyperparameters are valid, default is no hyperparameters - const vector validKeys = { }; + const std::vector validKeys = { }; checkHyperparameters(validKeys, hyperparameters); } } \ No newline at end of file diff --git a/src/BayesNet/Classifier.h b/src/BayesNet/Classifier.h index 5dd3040..418589e 100644 --- a/src/BayesNet/Classifier.h +++ b/src/BayesNet/Classifier.h @@ -4,46 +4,44 @@ #include "BaseClassifier.h" #include "Network.h" #include "BayesMetrics.h" -using namespace std; -using namespace torch; namespace bayesnet { class Classifier : public BaseClassifier { private: - Classifier& build(const vector& features, const string& className, map>& states, const torch::Tensor& weights); + Classifier& build(const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights); protected: bool fitted; int m, n; // m: number of samples, n: number of features Network model; Metrics metrics; - vector features; - string className; - map> states; - Tensor dataset; // (n+1)xm tensor + std::vector features; + std::string className; + std::map> states; + torch::Tensor dataset; // (n+1)xm tensor status_t status = NORMAL; void checkFitParameters(); virtual void buildModel(const torch::Tensor& weights) = 0; void trainModel(const torch::Tensor& weights) override; - void checkHyperparameters(const vector& validKeys, nlohmann::json& hyperparameters); + void checkHyperparameters(const std::vector& validKeys, nlohmann::json& hyperparameters); void buildDataset(torch::Tensor& y); public: Classifier(Network model); virtual ~Classifier() = default; - Classifier& fit(vector>& X, vector& y, const vector& features, const string& className, map>& states) override; - Classifier& fit(torch::Tensor& X, torch::Tensor& y, const vector& features, const string& className, map>& states) override; - Classifier& fit(torch::Tensor& dataset, const vector& features, const string& className, map>& states) override; - Classifier& fit(torch::Tensor& dataset, const vector& features, const string& className, map>& states, const torch::Tensor& weights) override; + Classifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) override; + Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) override; + Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) override; + Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) override; void addNodes(); int getNumberOfNodes() const override; int getNumberOfEdges() const override; int getNumberOfStates() const override; - Tensor predict(Tensor& X) override; + torch::Tensor predict(torch::Tensor& X) override; status_t getStatus() const override { return status; } - vector predict(vector>& X) override; - float score(Tensor& X, Tensor& y) override; - float score(vector>& X, vector& y) override; - vector show() const override; - vector topological_order() override; + std::vector predict(std::vector>& X) override; + float score(torch::Tensor& X, torch::Tensor& y) override; + float score(std::vector>& X, std::vector& y) override; + std::vector show() const override; + std::vector topological_order() override; void dump_cpt() const override; void setHyperparameters(nlohmann::json& hyperparameters) override; }; diff --git a/src/BayesNet/Ensemble.cc b/src/BayesNet/Ensemble.cc index 14aad4d..4702733 100644 --- a/src/BayesNet/Ensemble.cc +++ b/src/BayesNet/Ensemble.cc @@ -1,7 +1,6 @@ #include "Ensemble.h" namespace bayesnet { - using namespace torch; Ensemble::Ensemble() : Classifier(Network()), n_models(0) {} @@ -9,20 +8,20 @@ namespace bayesnet { { n_models = models.size(); for (auto i = 0; i < n_models; ++i) { - // fit with vectors + // fit with std::vectors models[i]->fit(dataset, features, className, states); } } - vector Ensemble::voting(Tensor& y_pred) + std::vector Ensemble::voting(torch::Tensor& y_pred) { auto y_pred_ = y_pred.accessor(); - vector y_pred_final; + std::vector y_pred_final; int numClasses = states.at(className).size(); // y_pred is m x n_models with the prediction of every model for each sample for (int i = 0; i < y_pred.size(0); ++i) { // votes store in each index (value of class) the significance added by each model // i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions - vector votes(numClasses, 0.0); + std::vector votes(numClasses, 0.0); for (int j = 0; j < n_models; ++j) { votes[y_pred_[i][j]] += significanceModels.at(j); } @@ -32,18 +31,18 @@ namespace bayesnet { } return y_pred_final; } - Tensor Ensemble::predict(Tensor& X) + torch::Tensor Ensemble::predict(torch::Tensor& X) { if (!fitted) { - throw logic_error("Ensemble has not been fitted"); + throw std::logic_error("Ensemble has not been fitted"); } - Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32); - auto threads{ vector() }; - mutex mtx; + torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32); + auto threads{ std::vector() }; + std::mutex mtx; for (auto i = 0; i < n_models; ++i) { - threads.push_back(thread([&, i]() { + threads.push_back(std::thread([&, i]() { auto ypredict = models[i]->predict(X); - lock_guard lock(mtx); + std::lock_guard lock(mtx); y_pred.index_put_({ "...", i }, ypredict); })); } @@ -52,27 +51,27 @@ namespace bayesnet { } return torch::tensor(voting(y_pred)); } - vector Ensemble::predict(vector>& X) + std::vector Ensemble::predict(std::vector>& X) { if (!fitted) { - throw logic_error("Ensemble has not been fitted"); + throw std::logic_error("Ensemble has not been fitted"); } long m_ = X[0].size(); long n_ = X.size(); - vector> Xd(n_, vector(m_, 0)); + std::vector> Xd(n_, std::vector(m_, 0)); for (auto i = 0; i < n_; i++) { - Xd[i] = vector(X[i].begin(), X[i].end()); + Xd[i] = std::vector(X[i].begin(), X[i].end()); } - Tensor y_pred = torch::zeros({ m_, n_models }, kInt32); + torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32); for (auto i = 0; i < n_models; ++i) { - y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt32)); + y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32)); } return voting(y_pred); } - float Ensemble::score(Tensor& X, Tensor& y) + float Ensemble::score(torch::Tensor& X, torch::Tensor& y) { if (!fitted) { - throw logic_error("Ensemble has not been fitted"); + throw std::logic_error("Ensemble has not been fitted"); } auto y_pred = predict(X); int correct = 0; @@ -83,10 +82,10 @@ namespace bayesnet { } return (double)correct / y_pred.size(0); } - float Ensemble::score(vector>& X, vector& y) + float Ensemble::score(std::vector>& X, std::vector& y) { if (!fitted) { - throw logic_error("Ensemble has not been fitted"); + throw std::logic_error("Ensemble has not been fitted"); } auto y_pred = predict(X); int correct = 0; @@ -97,20 +96,20 @@ namespace bayesnet { } return (double)correct / y_pred.size(); } - vector Ensemble::show() const + std::vector Ensemble::show() const { - auto result = vector(); + auto result = std::vector(); for (auto i = 0; i < n_models; ++i) { auto res = models[i]->show(); result.insert(result.end(), res.begin(), res.end()); } return result; } - vector Ensemble::graph(const string& title) const + std::vector Ensemble::graph(const std::string& title) const { - auto result = vector(); + auto result = std::vector(); for (auto i = 0; i < n_models; ++i) { - auto res = models[i]->graph(title + "_" + to_string(i)); + auto res = models[i]->graph(title + "_" + std::to_string(i)); result.insert(result.end(), res.begin(), res.end()); } return result; diff --git a/src/BayesNet/Ensemble.h b/src/BayesNet/Ensemble.h index 58a1d63..07fda9b 100644 --- a/src/BayesNet/Ensemble.h +++ b/src/BayesNet/Ensemble.h @@ -4,34 +4,32 @@ #include "Classifier.h" #include "BayesMetrics.h" #include "bayesnetUtils.h" -using namespace std; -using namespace torch; namespace bayesnet { class Ensemble : public Classifier { private: - Ensemble& build(vector& features, string className, map>& states); + Ensemble& build(std::vector& features, std::string className, std::map>& states); protected: unsigned n_models; - vector> models; - vector significanceModels; + std::vector> models; + std::vector significanceModels; void trainModel(const torch::Tensor& weights) override; - vector voting(Tensor& y_pred); + std::vector voting(torch::Tensor& y_pred); public: Ensemble(); virtual ~Ensemble() = default; - Tensor predict(Tensor& X) override; - vector predict(vector>& X) override; - float score(Tensor& X, Tensor& y) override; - float score(vector>& X, vector& y) override; + torch::Tensor predict(torch::Tensor& X) override; + std::vector predict(std::vector>& X) override; + float score(torch::Tensor& X, torch::Tensor& y) override; + float score(std::vector>& X, std::vector& y) override; int getNumberOfNodes() const override; int getNumberOfEdges() const override; int getNumberOfStates() const override; - vector show() const override; - vector graph(const string& title) const override; - vector topological_order() override + std::vector show() const override; + std::vector graph(const std::string& title) const override; + std::vector topological_order() override { - return vector(); + return std::vector(); } void dump_cpt() const override { diff --git a/src/BayesNet/FCBF.cc b/src/BayesNet/FCBF.cc index db935af..f92c593 100644 --- a/src/BayesNet/FCBF.cc +++ b/src/BayesNet/FCBF.cc @@ -2,7 +2,7 @@ #include "FCBF.h" namespace bayesnet { - FCBF::FCBF(const torch::Tensor& samples, const vector& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) : + FCBF::FCBF(const torch::Tensor& samples, const std::vector& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) : FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold) { if (threshold < 1e-7) { diff --git a/src/BayesNet/FCBF.h b/src/BayesNet/FCBF.h index aa7ff47..1b30cba 100644 --- a/src/BayesNet/FCBF.h +++ b/src/BayesNet/FCBF.h @@ -3,12 +3,11 @@ #include #include #include "FeatureSelect.h" -using namespace std; namespace bayesnet { class FCBF : public FeatureSelect { public: - // dataset is a n+1xm tensor of integers where dataset[-1] is the y vector - FCBF(const torch::Tensor& samples, const vector& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold); + // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector + FCBF(const torch::Tensor& samples, const std::vector& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold); virtual ~FCBF() {}; void fit() override; private: diff --git a/src/BayesNet/FeatureSelect.cc b/src/BayesNet/FeatureSelect.cc index 11d929b..b8300a5 100644 --- a/src/BayesNet/FeatureSelect.cc +++ b/src/BayesNet/FeatureSelect.cc @@ -2,7 +2,7 @@ #include #include "bayesnetUtils.h" namespace bayesnet { - FeatureSelect::FeatureSelect(const torch::Tensor& samples, const vector& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) : + FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) : Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights) { @@ -42,7 +42,7 @@ namespace bayesnet { try { return suFeatures.at({ firstFeature, secondFeature }); } - catch (const out_of_range& e) { + catch (const std::out_of_range& e) { double result = symmetricalUncertainty(firstFeature, secondFeature); suFeatures[{firstFeature, secondFeature}] = result; return result; @@ -62,17 +62,17 @@ namespace bayesnet { } return rcf / sqrt(n + (n * n - n) * rff); } - vector FeatureSelect::getFeatures() const + std::vector FeatureSelect::getFeatures() const { if (!fitted) { - throw runtime_error("FeatureSelect not fitted"); + throw std::runtime_error("FeatureSelect not fitted"); } return selectedFeatures; } - vector FeatureSelect::getScores() const + std::vector FeatureSelect::getScores() const { if (!fitted) { - throw runtime_error("FeatureSelect not fitted"); + throw std::runtime_error("FeatureSelect not fitted"); } return selectedScores; } diff --git a/src/BayesNet/FeatureSelect.h b/src/BayesNet/FeatureSelect.h index 46923c9..d5c56ed 100644 --- a/src/BayesNet/FeatureSelect.h +++ b/src/BayesNet/FeatureSelect.h @@ -3,16 +3,15 @@ #include #include #include "BayesMetrics.h" -using namespace std; namespace bayesnet { class FeatureSelect : public Metrics { public: - // dataset is a n+1xm tensor of integers where dataset[-1] is the y vector - FeatureSelect(const torch::Tensor& samples, const vector& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights); + // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector + FeatureSelect(const torch::Tensor& samples, const std::vector& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights); virtual ~FeatureSelect() {}; virtual void fit() = 0; - vector getFeatures() const; - vector getScores() const; + std::vector getFeatures() const; + std::vector getScores() const; protected: void initialize(); void computeSuLabels(); @@ -21,10 +20,10 @@ namespace bayesnet { double computeMeritCFS(); const torch::Tensor& weights; int maxFeatures; - vector selectedFeatures; - vector selectedScores; - vector suLabels; - map, double> suFeatures; + std::vector selectedFeatures; + std::vector selectedScores; + std::vector suLabels; + std::map, double> suFeatures; bool fitted = false; }; } diff --git a/src/BayesNet/IWSS.cc b/src/BayesNet/IWSS.cc index f39f137..4fd11ea 100644 --- a/src/BayesNet/IWSS.cc +++ b/src/BayesNet/IWSS.cc @@ -2,7 +2,7 @@ #include #include "bayesnetUtils.h" namespace bayesnet { - IWSS::IWSS(const torch::Tensor& samples, const vector& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) : + IWSS::IWSS(const torch::Tensor& samples, const std::vector& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) : FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold) { if (threshold < 0 || threshold > .5) { diff --git a/src/BayesNet/IWSS.h b/src/BayesNet/IWSS.h index 88a1034..0faea7d 100644 --- a/src/BayesNet/IWSS.h +++ b/src/BayesNet/IWSS.h @@ -3,12 +3,11 @@ #include #include #include "FeatureSelect.h" -using namespace std; namespace bayesnet { class IWSS : public FeatureSelect { public: - // dataset is a n+1xm tensor of integers where dataset[-1] is the y vector - IWSS(const torch::Tensor& samples, const vector& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold); + // dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector + IWSS(const torch::Tensor& samples, const std::vector& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold); virtual ~IWSS() {}; void fit() override; private: diff --git a/src/BayesNet/KDB.cc b/src/BayesNet/KDB.cc index d511354..3c344a7 100644 --- a/src/BayesNet/KDB.cc +++ b/src/BayesNet/KDB.cc @@ -1,13 +1,11 @@ #include "KDB.h" namespace bayesnet { - using namespace torch; - KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {} void KDB::setHyperparameters(nlohmann::json& hyperparameters) { // Check if hyperparameters are valid - const vector validKeys = { "k", "theta" }; + const std::vector validKeys = { "k", "theta" }; checkHyperparameters(validKeys, hyperparameters); if (hyperparameters.contains("k")) { k = hyperparameters["k"]; @@ -40,16 +38,16 @@ namespace bayesnet { // 1. For each feature Xi, compute mutual information, I(X;C), // where C is the class. addNodes(); - const Tensor& y = dataset.index({ -1, "..." }); - vector mi; + const torch::Tensor& y = dataset.index({ -1, "..." }); + std::vector mi; for (auto i = 0; i < features.size(); i++) { - Tensor firstFeature = dataset.index({ i, "..." }); + torch::Tensor firstFeature = dataset.index({ i, "..." }); mi.push_back(metrics.mutualInformation(firstFeature, y, weights)); } // 2. Compute class conditional mutual information I(Xi;XjIC), f or each auto conditionalEdgeWeights = metrics.conditionalEdge(weights); // 3. Let the used variable list, S, be empty. - vector S; + std::vector S; // 4. Let the DAG network being constructed, BN, begin with a single // class node, C. // 5. Repeat until S includes all domain features @@ -67,9 +65,9 @@ namespace bayesnet { S.push_back(idx); } } - void KDB::add_m_edges(int idx, vector& S, Tensor& weights) + void KDB::add_m_edges(int idx, std::vector& S, torch::Tensor& weights) { - auto n_edges = min(k, static_cast(S.size())); + auto n_edges = std::min(k, static_cast(S.size())); auto cond_w = clone(weights); bool exit_cond = k == 0; int num = 0; @@ -81,7 +79,7 @@ namespace bayesnet { model.addEdge(features[max_minfo], features[idx]); num++; } - catch (const invalid_argument& e) { + catch (const std::invalid_argument& e) { // Loops are not allowed } } @@ -91,11 +89,11 @@ namespace bayesnet { exit_cond = num == n_edges || candidates.size(0) == 0; } } - vector KDB::graph(const string& title) const + std::vector KDB::graph(const std::string& title) const { - string header{ title }; + std::string header{ title }; if (title == "KDB") { - header += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")"; + header += " (k=" + std::to_string(k) + ", theta=" + std::to_string(theta) + ")"; } return model.graph(header); } diff --git a/src/BayesNet/KDB.h b/src/BayesNet/KDB.h index 992d061..7dbc8f4 100644 --- a/src/BayesNet/KDB.h +++ b/src/BayesNet/KDB.h @@ -4,20 +4,18 @@ #include "Classifier.h" #include "bayesnetUtils.h" namespace bayesnet { - using namespace std; - using namespace torch; class KDB : public Classifier { private: int k; float theta; - void add_m_edges(int idx, vector& S, Tensor& weights); + void add_m_edges(int idx, std::vector& S, torch::Tensor& weights); protected: void buildModel(const torch::Tensor& weights) override; public: explicit KDB(int k, float theta = 0.03); virtual ~KDB() {}; void setHyperparameters(nlohmann::json& hyperparameters) override; - vector graph(const string& name = "KDB") const override; + std::vector graph(const std::string& name = "KDB") const override; }; } #endif \ No newline at end of file diff --git a/src/BayesNet/KDBLd.cc b/src/BayesNet/KDBLd.cc index fc0910a..8f17901 100644 --- a/src/BayesNet/KDBLd.cc +++ b/src/BayesNet/KDBLd.cc @@ -1,16 +1,15 @@ #include "KDBLd.h" namespace bayesnet { - using namespace std; KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {} - KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector& features_, const string& className_, map>& states_) + KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) { checkInput(X_, y_); features = features_; className = className_; Xf = X_; y = y_; - // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y + // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network @@ -18,12 +17,12 @@ namespace bayesnet { states = localDiscretizationProposal(states, model); return *this; } - Tensor KDBLd::predict(Tensor& X) + torch::Tensor KDBLd::predict(torch::Tensor& X) { auto Xt = prepareX(X); return KDB::predict(Xt); } - vector KDBLd::graph(const string& name) const + std::vector KDBLd::graph(const std::string& name) const { return KDB::graph(name); } diff --git a/src/BayesNet/KDBLd.h b/src/BayesNet/KDBLd.h index c034938..bc98a3e 100644 --- a/src/BayesNet/KDBLd.h +++ b/src/BayesNet/KDBLd.h @@ -4,16 +4,15 @@ #include "Proposal.h" namespace bayesnet { - using namespace std; class KDBLd : public KDB, public Proposal { private: public: explicit KDBLd(int k); virtual ~KDBLd() = default; - KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const vector& features, const string& className, map>& states) override; - vector graph(const string& name = "KDB") const override; - Tensor predict(Tensor& X) override; - static inline string version() { return "0.0.1"; }; + KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; + std::vector graph(const std::string& name = "KDB") const override; + torch::Tensor predict(torch::Tensor& X) override; + static inline std::string version() { return "0.0.1"; }; }; } #endif // !KDBLD_H \ No newline at end of file diff --git a/src/BayesNet/Mst.cc b/src/BayesNet/Mst.cc index c618484..41207e8 100644 --- a/src/BayesNet/Mst.cc +++ b/src/BayesNet/Mst.cc @@ -7,8 +7,7 @@ */ namespace bayesnet { - using namespace std; - Graph::Graph(int V) : V(V), parent(vector(V)) + Graph::Graph(int V) : V(V), parent(std::vector(V)) { for (int i = 0; i < V; i++) parent[i] = i; @@ -41,35 +40,35 @@ namespace bayesnet { uSt = find_set(G[i].second.first); vEd = find_set(G[i].second.second); if (uSt != vEd) { - T.push_back(G[i]); // add to mst vector + T.push_back(G[i]); // add to mst std::vector union_set(uSt, vEd); } } } void Graph::display_mst() { - cout << "Edge :" << " Weight" << endl; + std::cout << "Edge :" << " Weight" << std::endl; for (int i = 0; i < T.size(); i++) { - cout << T[i].second.first << " - " << T[i].second.second << " : " + std::cout << T[i].second.first << " - " << T[i].second.second << " : " << T[i].first; - cout << endl; + std::cout << std::endl; } } - void insertElement(list& variables, int variable) + void insertElement(std::list& variables, int variable) { - if (find(variables.begin(), variables.end(), variable) == variables.end()) { + if (std::find(variables.begin(), variables.end(), variable) == variables.end()) { variables.push_front(variable); } } - vector> reorder(vector>> T, int root_original) + std::vector> reorder(std::vector>> T, int root_original) { // Create the edges of a DAG from the MST // replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted - auto result = vector>(); - auto visited = vector(); - auto nextVariables = list(); + auto result = std::vector>(); + auto visited = std::vector(); + auto nextVariables = std::list(); nextVariables.push_front(root_original); while (nextVariables.size() > 0) { int root = nextVariables.front(); @@ -104,8 +103,8 @@ namespace bayesnet { return result; } - MST::MST(const vector& features, const Tensor& weights, const int root) : features(features), weights(weights), root(root) {} - vector> MST::maximumSpanningTree() + MST::MST(const std::vector& features, const torch::Tensor& weights, const int root) : features(features), weights(weights), root(root) {} + std::vector> MST::maximumSpanningTree() { auto num_features = features.size(); Graph g(num_features); diff --git a/src/BayesNet/Mst.h b/src/BayesNet/Mst.h index e0f3372..8e7c462 100644 --- a/src/BayesNet/Mst.h +++ b/src/BayesNet/Mst.h @@ -4,24 +4,22 @@ #include #include namespace bayesnet { - using namespace std; - using namespace torch; class MST { private: - Tensor weights; - vector features; + torch::Tensor weights; + std::vector features; int root = 0; public: MST() = default; - MST(const vector& features, const Tensor& weights, const int root); - vector> maximumSpanningTree(); + MST(const std::vector& features, const torch::Tensor& weights, const int root); + std::vector> maximumSpanningTree(); }; class Graph { private: int V; // number of nodes in graph - vector >> G; // vector for graph - vector >> T; // vector for mst - vector parent; + std::vector >> G; // std::vector for graph + std::vector >> T; // std::vector for mst + std::vector parent; public: explicit Graph(int V); void addEdge(int u, int v, float wt); @@ -29,7 +27,7 @@ namespace bayesnet { void union_set(int u, int v); void kruskal_algorithm(); void display_mst(); - vector >> get_mst() { return T; } + std::vector >> get_mst() { return T; } }; } #endif \ No newline at end of file diff --git a/src/BayesNet/Network.cc b/src/BayesNet/Network.cc index 88f3610..e8a7da8 100644 --- a/src/BayesNet/Network.cc +++ b/src/BayesNet/Network.cc @@ -3,18 +3,18 @@ #include "Network.h" #include "bayesnetUtils.h" namespace bayesnet { - Network::Network() : features(vector()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {} - Network::Network(float maxT) : features(vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {} + Network::Network() : features(std::vector()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {} + Network::Network(float maxT) : features(std::vector()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {} Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other. getmaxThreads()), fitted(other.fitted) { - for (const auto& pair : other.nodes) { - nodes[pair.first] = std::make_unique(*pair.second); + for (const auto& node : other.nodes) { + nodes[node.first] = std::make_unique(*node.second); } } void Network::initialize() { - features = vector(); + features = std::vector(); className = ""; classNumStates = 0; fitted = false; @@ -29,10 +29,10 @@ namespace bayesnet { { return samples; } - void Network::addNode(const string& name) + void Network::addNode(const std::string& name) { if (name == "") { - throw invalid_argument("Node name cannot be empty"); + throw std::invalid_argument("Node name cannot be empty"); } if (nodes.find(name) != nodes.end()) { return; @@ -42,7 +42,7 @@ namespace bayesnet { } nodes[name] = std::make_unique(name); } - vector Network::getFeatures() const + std::vector Network::getFeatures() const { return features; } @@ -58,11 +58,11 @@ namespace bayesnet { } return result; } - string Network::getClassName() const + std::string Network::getClassName() const { return className; } - bool Network::isCyclic(const string& nodeId, unordered_set& visited, unordered_set& recStack) + bool Network::isCyclic(const std::string& nodeId, std::unordered_set& visited, std::unordered_set& recStack) { if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet { @@ -78,78 +78,78 @@ namespace bayesnet { recStack.erase(nodeId); // remove node from recursion stack before function ends return false; } - void Network::addEdge(const string& parent, const string& child) + void Network::addEdge(const std::string& parent, const std::string& child) { if (nodes.find(parent) == nodes.end()) { - throw invalid_argument("Parent node " + parent + " does not exist"); + throw std::invalid_argument("Parent node " + parent + " does not exist"); } if (nodes.find(child) == nodes.end()) { - throw invalid_argument("Child node " + child + " does not exist"); + throw std::invalid_argument("Child node " + child + " does not exist"); } // Temporarily add edge to check for cycles nodes[parent]->addChild(nodes[child].get()); nodes[child]->addParent(nodes[parent].get()); - unordered_set visited; - unordered_set recStack; + std::unordered_set visited; + std::unordered_set recStack; if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle { // remove problematic edge nodes[parent]->removeChild(nodes[child].get()); nodes[child]->removeParent(nodes[parent].get()); - throw invalid_argument("Adding this edge forms a cycle in the graph."); + throw std::invalid_argument("Adding this edge forms a cycle in the graph."); } } - map>& Network::getNodes() + std::map>& Network::getNodes() { return nodes; } - void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector& featureNames, const string& className, const map>& states, const torch::Tensor& weights) + void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights) { if (weights.size(0) != n_samples) { - throw invalid_argument("Weights (" + to_string(weights.size(0)) + ") must have the same number of elements as samples (" + to_string(n_samples) + ") in Network::fit"); + throw std::invalid_argument("Weights (" + std::to_string(weights.size(0)) + ") must have the same number of elements as samples (" + std::to_string(n_samples) + ") in Network::fit"); } if (n_samples != n_samples_y) { - throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")"); + throw std::invalid_argument("X and y must have the same number of samples in Network::fit (" + std::to_string(n_samples) + " != " + std::to_string(n_samples_y) + ")"); } if (n_features != featureNames.size()) { - throw invalid_argument("X and features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(featureNames.size()) + ")"); + throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")"); } if (n_features != features.size() - 1) { - throw invalid_argument("X and local features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(features.size() - 1) + ")"); + throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")"); } if (find(features.begin(), features.end(), className) == features.end()) { - throw invalid_argument("className not found in Network::features"); + throw std::invalid_argument("className not found in Network::features"); } for (auto& feature : featureNames) { if (find(features.begin(), features.end(), feature) == features.end()) { - throw invalid_argument("Feature " + feature + " not found in Network::features"); + throw std::invalid_argument("Feature " + feature + " not found in Network::features"); } if (states.find(feature) == states.end()) { - throw invalid_argument("Feature " + feature + " not found in states"); + throw std::invalid_argument("Feature " + feature + " not found in states"); } } } - void Network::setStates(const map>& states) + void Network::setStates(const std::map>& states) { // Set states to every Node in the network - for_each(features.begin(), features.end(), [this, &states](const string& feature) { + for_each(features.begin(), features.end(), [this, &states](const std::string& feature) { nodes.at(feature)->setNumStates(states.at(feature).size()); }); classNumStates = nodes.at(className)->getNumStates(); } // X comes in nxm, where n is the number of features and m the number of samples - void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector& featureNames, const string& className, const map>& states) + void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states) { checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights); this->className = className; - Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); + torch::Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); samples = torch::cat({ X , ytmp }, 0); for (int i = 0; i < featureNames.size(); ++i) { auto row_feature = X.index({ i, "..." }); } completeFit(states, weights); } - void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector& featureNames, const string& className, const map>& states) + void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states) { checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); this->className = className; @@ -157,7 +157,7 @@ namespace bayesnet { completeFit(states, weights); } // input_data comes in nxm, where n is the number of features and m the number of samples - void Network::fit(const vector>& input_data, const vector& labels, const vector& weights_, const vector& featureNames, const string& className, const map>& states) + void Network::fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights_, const std::vector& featureNames, const std::string& className, const std::map>& states) { const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64); checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights); @@ -170,11 +170,11 @@ namespace bayesnet { samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); completeFit(states, weights); } - void Network::completeFit(const map>& states, const torch::Tensor& weights) + void Network::completeFit(const std::map>& states, const torch::Tensor& weights) { setStates(states); laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation - vector threads; + std::vector threads; for (auto& node : nodes) { threads.emplace_back([this, &node, &weights]() { node.second->computeCPT(samples, features, laplaceSmoothing, weights); @@ -188,12 +188,12 @@ namespace bayesnet { torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba) { if (!fitted) { - throw logic_error("You must call fit() before calling predict()"); + throw std::logic_error("You must call fit() before calling predict()"); } torch::Tensor result; result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); for (int i = 0; i < samples.size(1); ++i) { - const Tensor sample = samples.index({ "...", i }); + const torch::Tensor sample = samples.index({ "...", i }); auto psample = predict_sample(sample); auto temp = torch::tensor(psample, torch::kFloat64); // result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64)); @@ -204,32 +204,32 @@ namespace bayesnet { return result.argmax(1); } // Return mxn tensor of probabilities - Tensor Network::predict_proba(const Tensor& samples) + torch::Tensor Network::predict_proba(const torch::Tensor& samples) { return predict_tensor(samples, true); } // Return mxn tensor of probabilities - Tensor Network::predict(const Tensor& samples) + torch::Tensor Network::predict(const torch::Tensor& samples) { return predict_tensor(samples, false); } - // Return mx1 vector of predictions - // tsamples is nxm vector of samples - vector Network::predict(const vector>& tsamples) + // Return mx1 std::vector of predictions + // tsamples is nxm std::vector of samples + std::vector Network::predict(const std::vector>& tsamples) { if (!fitted) { - throw logic_error("You must call fit() before calling predict()"); + throw std::logic_error("You must call fit() before calling predict()"); } - vector predictions; - vector sample; + std::vector predictions; + std::vector sample; for (int row = 0; row < tsamples[0].size(); ++row) { sample.clear(); for (int col = 0; col < tsamples.size(); ++col) { sample.push_back(tsamples[col][row]); } - vector classProbabilities = predict_sample(sample); + std::vector classProbabilities = predict_sample(sample); // Find the class with the maximum posterior probability auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); int predictedClass = distance(classProbabilities.begin(), maxElem); @@ -237,14 +237,14 @@ namespace bayesnet { } return predictions; } - // Return mxn vector of probabilities - vector> Network::predict_proba(const vector>& tsamples) + // Return mxn std::vector of probabilities + std::vector> Network::predict_proba(const std::vector>& tsamples) { if (!fitted) { - throw logic_error("You must call fit() before calling predict_proba()"); + throw std::logic_error("You must call fit() before calling predict_proba()"); } - vector> predictions; - vector sample; + std::vector> predictions; + std::vector sample; for (int row = 0; row < tsamples[0].size(); ++row) { sample.clear(); for (int col = 0; col < tsamples.size(); ++col) { @@ -254,9 +254,9 @@ namespace bayesnet { } return predictions; } - double Network::score(const vector>& tsamples, const vector& labels) + double Network::score(const std::vector>& tsamples, const std::vector& labels) { - vector y_pred = predict(tsamples); + std::vector y_pred = predict(tsamples); int correct = 0; for (int i = 0; i < y_pred.size(); ++i) { if (y_pred[i] == labels[i]) { @@ -265,35 +265,35 @@ namespace bayesnet { } return (double)correct / y_pred.size(); } - // Return 1xn vector of probabilities - vector Network::predict_sample(const vector& sample) + // Return 1xn std::vector of probabilities + std::vector Network::predict_sample(const std::vector& sample) { // Ensure the sample size is equal to the number of features if (sample.size() != features.size() - 1) { - throw invalid_argument("Sample size (" + to_string(sample.size()) + - ") does not match the number of features (" + to_string(features.size() - 1) + ")"); + throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) + + ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); } - map evidence; + std::map evidence; for (int i = 0; i < sample.size(); ++i) { evidence[features[i]] = sample[i]; } return exactInference(evidence); } - // Return 1xn vector of probabilities - vector Network::predict_sample(const Tensor& sample) + // Return 1xn std::vector of probabilities + std::vector Network::predict_sample(const torch::Tensor& sample) { // Ensure the sample size is equal to the number of features if (sample.size(0) != features.size() - 1) { - throw invalid_argument("Sample size (" + to_string(sample.size(0)) + - ") does not match the number of features (" + to_string(features.size() - 1) + ")"); + throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) + + ") does not match the number of features (" + std::to_string(features.size() - 1) + ")"); } - map evidence; + std::map evidence; for (int i = 0; i < sample.size(0); ++i) { evidence[features[i]] = sample[i].item(); } return exactInference(evidence); } - double Network::computeFactor(map& completeEvidence) + double Network::computeFactor(std::map& completeEvidence) { double result = 1.0; for (auto& node : getNodes()) { @@ -301,17 +301,17 @@ namespace bayesnet { } return result; } - vector Network::exactInference(map& evidence) + std::vector Network::exactInference(std::map& evidence) { - vector result(classNumStates, 0.0); - vector threads; - mutex mtx; + std::vector result(classNumStates, 0.0); + std::vector threads; + std::mutex mtx; for (int i = 0; i < classNumStates; ++i) { threads.emplace_back([this, &result, &evidence, i, &mtx]() { - auto completeEvidence = map(evidence); + auto completeEvidence = std::map(evidence); completeEvidence[getClassName()] = i; double factor = computeFactor(completeEvidence); - lock_guard lock(mtx); + std::lock_guard lock(mtx); result[i] = factor; }); } @@ -323,12 +323,12 @@ namespace bayesnet { transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; }); return result; } - vector Network::show() const + std::vector Network::show() const { - vector result; + std::vector result; // Draw the network for (auto& node : nodes) { - string line = node.first + " -> "; + std::string line = node.first + " -> "; for (auto child : node.second->getChildren()) { line += child->getName() + ", "; } @@ -336,12 +336,12 @@ namespace bayesnet { } return result; } - vector Network::graph(const string& title) const + std::vector Network::graph(const std::string& title) const { - auto output = vector(); + auto output = std::vector(); auto prefix = "digraph BayesNet {\nlabel=graph(className); @@ -350,9 +350,9 @@ namespace bayesnet { output.push_back("}\n"); return output; } - vector> Network::getEdges() const + std::vector> Network::getEdges() const { - auto edges = vector>(); + auto edges = std::vector>(); for (const auto& node : nodes) { auto head = node.first; for (const auto& child : node.second->getChildren()) { @@ -366,7 +366,7 @@ namespace bayesnet { { return getEdges().size(); } - vector Network::topological_sort() + std::vector Network::topological_sort() { /* Check if al the fathers of every node are before the node */ auto result = features; @@ -393,10 +393,10 @@ namespace bayesnet { ending = false; } } else { - throw logic_error("Error in topological sort because of node " + feature + " is not in result"); + throw std::logic_error("Error in topological sort because of node " + feature + " is not in result"); } } else { - throw logic_error("Error in topological sort because of node father " + fatherName + " is not in result"); + throw std::logic_error("Error in topological sort because of node father " + fatherName + " is not in result"); } } } @@ -406,8 +406,8 @@ namespace bayesnet { void Network::dump_cpt() const { for (auto& node : nodes) { - cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl; - cout << node.second->getCPT() << endl; + std::cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl; + std::cout << node.second->getCPT() << std::endl; } } } diff --git a/src/BayesNet/Network.h b/src/BayesNet/Network.h index 2b89a47..34b06c2 100644 --- a/src/BayesNet/Network.h +++ b/src/BayesNet/Network.h @@ -7,22 +7,22 @@ namespace bayesnet { class Network { private: - map> nodes; + std::map> nodes; bool fitted; float maxThreads = 0.95; int classNumStates; - vector features; // Including classname - string className; + std::vector features; // Including classname + std::string className; double laplaceSmoothing; torch::Tensor samples; // nxm tensor used to fit the model bool isCyclic(const std::string&, std::unordered_set&, std::unordered_set&); - vector predict_sample(const vector&); - vector predict_sample(const torch::Tensor&); - vector exactInference(map&); - double computeFactor(map&); - void completeFit(const map>& states, const torch::Tensor& weights); - void checkFitData(int n_features, int n_samples, int n_samples_y, const vector& featureNames, const string& className, const map>& states, const torch::Tensor& weights); - void setStates(const map>&); + std::vector predict_sample(const std::vector&); + std::vector predict_sample(const torch::Tensor&); + std::vector exactInference(std::map&); + double computeFactor(std::map&); + void completeFit(const std::map>& states, const torch::Tensor& weights); + void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector& featureNames, const std::string& className, const std::map>& states, const torch::Tensor& weights); + void setStates(const std::map>&); public: Network(); explicit Network(float); @@ -30,33 +30,33 @@ namespace bayesnet { ~Network() = default; torch::Tensor& getSamples(); float getmaxThreads(); - void addNode(const string&); - void addEdge(const string&, const string&); - map>& getNodes(); - vector getFeatures() const; + void addNode(const std::string&); + void addEdge(const std::string&, const std::string&); + std::map>& getNodes(); + std::vector getFeatures() const; int getStates() const; - vector> getEdges() const; + std::vector> getEdges() const; int getNumEdges() const; int getClassNumStates() const; - string getClassName() const; + std::string getClassName() const; /* Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on. */ - void fit(const vector>& input_data, const vector& labels, const vector& weights, const vector& featureNames, const string& className, const map>& states); - void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector& featureNames, const string& className, const map>& states); - void fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector& featureNames, const string& className, const map>& states); - vector predict(const vector>&); // Return mx1 vector of predictions + void fit(const std::vector>& input_data, const std::vector& labels, const std::vector& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); + void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); + void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector& featureNames, const std::string& className, const std::map>& states); + std::vector predict(const std::vector>&); // Return mx1 std::vector of predictions torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); - vector> predict_proba(const vector>&); // Return mxn vector of probabilities + std::vector> predict_proba(const std::vector>&); // Return mxn std::vector of probabilities torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities - double score(const vector>&, const vector&); - vector topological_sort(); - vector show() const; - vector graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format + double score(const std::vector>&, const std::vector&); + std::vector topological_sort(); + std::vector show() const; + std::vector graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format void initialize(); void dump_cpt() const; - inline string version() { return "0.2.0"; } + inline std::string version() { return "0.2.0"; } }; } #endif \ No newline at end of file diff --git a/src/BayesNet/Node.cc b/src/BayesNet/Node.cc index 4f01bec..28408d4 100644 --- a/src/BayesNet/Node.cc +++ b/src/BayesNet/Node.cc @@ -3,7 +3,7 @@ namespace bayesnet { Node::Node(const std::string& name) - : name(name), numStates(0), cpTable(torch::Tensor()), parents(vector()), children(vector()) + : name(name), numStates(0), cpTable(torch::Tensor()), parents(std::vector()), children(std::vector()) { } void Node::clear() @@ -14,7 +14,7 @@ namespace bayesnet { dimensions.clear(); numStates = 0; } - string Node::getName() const + std::string Node::getName() const { return name; } @@ -34,11 +34,11 @@ namespace bayesnet { { children.push_back(child); } - vector& Node::getParents() + std::vector& Node::getParents() { return parents; } - vector& Node::getChildren() + std::vector& Node::getChildren() { return children; } @@ -63,28 +63,28 @@ namespace bayesnet { */ unsigned Node::minFill() { - unordered_set neighbors; + std::unordered_set neighbors; for (auto child : children) { neighbors.emplace(child->getName()); } for (auto parent : parents) { neighbors.emplace(parent->getName()); } - auto source = vector(neighbors.begin(), neighbors.end()); + auto source = std::vector(neighbors.begin(), neighbors.end()); return combinations(source).size(); } - vector> Node::combinations(const vector& source) + std::vector> Node::combinations(const std::vector& source) { - vector> result; + std::vector> result; for (int i = 0; i < source.size(); ++i) { - string temp = source[i]; + std::string temp = source[i]; for (int j = i + 1; j < source.size(); ++j) { result.push_back({ temp, source[j] }); } } return result; } - void Node::computeCPT(const torch::Tensor& dataset, const vector& features, const double laplaceSmoothing, const torch::Tensor& weights) + void Node::computeCPT(const torch::Tensor& dataset, const std::vector& features, const double laplaceSmoothing, const torch::Tensor& weights) { dimensions.clear(); // Get dimensions of the CPT @@ -96,7 +96,7 @@ namespace bayesnet { // Fill table with counts auto pos = find(features.begin(), features.end(), name); if (pos == features.end()) { - throw logic_error("Feature " + name + " not found in dataset"); + throw std::logic_error("Feature " + name + " not found in dataset"); } int name_index = pos - features.begin(); for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) { @@ -105,7 +105,7 @@ namespace bayesnet { for (auto parent : parents) { pos = find(features.begin(), features.end(), parent->getName()); if (pos == features.end()) { - throw logic_error("Feature parent " + parent->getName() + " not found in dataset"); + throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset"); } int parent_index = pos - features.begin(); coordinates.push_back(dataset.index({ parent_index, n_sample })); @@ -116,17 +116,17 @@ namespace bayesnet { // Normalize the counts cpTable = cpTable / cpTable.sum(0); } - float Node::getFactorValue(map& evidence) + float Node::getFactorValue(std::map& evidence) { c10::List> coordinates; // following predetermined order of indices in the cpTable (see Node.h) coordinates.push_back(at::tensor(evidence[name])); - transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); }); + transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); }); return cpTable.index({ coordinates }).item(); } - vector Node::graph(const string& className) + std::vector Node::graph(const std::string& className) { - auto output = vector(); + auto output = std::vector(); auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : ""; output.push_back(name + " [shape=circle" + suffix + "] \n"); transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); }); diff --git a/src/BayesNet/Node.h b/src/BayesNet/Node.h index 4979007..a49158f 100644 --- a/src/BayesNet/Node.h +++ b/src/BayesNet/Node.h @@ -5,33 +5,32 @@ #include #include namespace bayesnet { - using namespace std; class Node { private: - string name; - vector parents; - vector children; + std::string name; + std::vector parents; + std::vector children; int numStates; // number of states of the variable torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ... - vector dimensions; // dimensions of the cpTable - vector> combinations(const vector&); + std::vector dimensions; // dimensions of the cpTable + std::vector> combinations(const std::vector&); public: - explicit Node(const string&); + explicit Node(const std::string&); void clear(); void addParent(Node*); void addChild(Node*); void removeParent(Node*); void removeChild(Node*); - string getName() const; - vector& getParents(); - vector& getChildren(); + std::string getName() const; + std::vector& getParents(); + std::vector& getChildren(); torch::Tensor& getCPT(); - void computeCPT(const torch::Tensor& dataset, const vector& features, const double laplaceSmoothing, const torch::Tensor& weights); + void computeCPT(const torch::Tensor& dataset, const std::vector& features, const double laplaceSmoothing, const torch::Tensor& weights); int getNumStates() const; void setNumStates(int); unsigned minFill(); - vector graph(const string& clasName); // Returns a vector of strings representing the graph in graphviz format - float getFactorValue(map&); + std::vector graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format + float getFactorValue(std::map&); }; } #endif \ No newline at end of file diff --git a/src/BayesNet/Proposal.cc b/src/BayesNet/Proposal.cc index 6bc819b..58b2e73 100644 --- a/src/BayesNet/Proposal.cc +++ b/src/BayesNet/Proposal.cc @@ -2,7 +2,7 @@ #include "ArffFiles.h" namespace bayesnet { - Proposal::Proposal(torch::Tensor& dataset_, vector& features_, string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {} + Proposal::Proposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {} Proposal::~Proposal() { for (auto& [key, value] : discretizers) { @@ -18,14 +18,14 @@ namespace bayesnet { throw std::invalid_argument("y must be an integer tensor"); } } - map> Proposal::localDiscretizationProposal(const map>& oldStates, Network& model) + map> Proposal::localDiscretizationProposal(const map>& oldStates, Network& model) { // order of local discretization is important. no good 0, 1, 2... // although we rediscretize features after the local discretization of every feature auto order = model.topological_sort(); auto& nodes = model.getNodes(); - map> states = oldStates; - vector indicesToReDiscretize; + map> states = oldStates; + std::vector indicesToReDiscretize; bool upgrade = false; // Flag to check if we need to upgrade the model for (auto feature : order) { auto nodeParents = nodes[feature]->getParents(); @@ -33,16 +33,16 @@ namespace bayesnet { upgrade = true; int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin(); indicesToReDiscretize.push_back(index); // We need to re-discretize this feature - vector parents; + std::vector parents; transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); }); // Remove class as parent as it will be added later parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end()); // Get the indices of the parents - vector indices; + std::vector indices; indices.push_back(-1); // Add class index transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); }); // Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y) - vector yJoinParents(Xf.size(1)); + std::vector yJoinParents(Xf.size(1)); for (auto idx : indices) { for (int i = 0; i < Xf.size(1); ++i) { yJoinParents[i] += to_string(pDataset.index({ idx, i }).item()); @@ -51,16 +51,16 @@ namespace bayesnet { auto arff = ArffFiles(); auto yxv = arff.factorize(yJoinParents); auto xvf_ptr = Xf.index({ index }).data_ptr(); - auto xvf = vector(xvf_ptr, xvf_ptr + Xf.size(1)); + auto xvf = std::vector(xvf_ptr, xvf_ptr + Xf.size(1)); discretizers[feature]->fit(xvf, yxv); } if (upgrade) { // Discretize again X (only the affected indices) with the new fitted discretizers for (auto index : indicesToReDiscretize) { auto Xt_ptr = Xf.index({ index }).data_ptr(); - auto Xt = vector(Xt_ptr, Xt_ptr + Xf.size(1)); + auto Xt = std::vector(Xt_ptr, Xt_ptr + Xf.size(1)); pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt))); - auto xStates = vector(discretizers[pFeatures[index]]->getCutPoints().size() + 1); + auto xStates = std::vector(discretizers[pFeatures[index]]->getCutPoints().size() + 1); iota(xStates.begin(), xStates.end(), 0); //Update new states of the feature/node states[pFeatures[index]] = xStates; @@ -70,28 +70,28 @@ namespace bayesnet { } return states; } - map> Proposal::fit_local_discretization(const torch::Tensor& y) + map> Proposal::fit_local_discretization(const torch::Tensor& y) { // Discretize the continuous input data and build pDataset (Classifier::dataset) int m = Xf.size(1); int n = Xf.size(0); - map> states; - pDataset = torch::zeros({ n + 1, m }, kInt32); - auto yv = vector(y.data_ptr(), y.data_ptr() + y.size(0)); + map> states; + pDataset = torch::zeros({ n + 1, m }, torch::kInt32); + auto yv = std::vector(y.data_ptr(), y.data_ptr() + y.size(0)); // discretize input data by feature(row) for (auto i = 0; i < pFeatures.size(); ++i) { auto* discretizer = new mdlp::CPPFImdlp(); auto Xt_ptr = Xf.index({ i }).data_ptr(); - auto Xt = vector(Xt_ptr, Xt_ptr + Xf.size(1)); + auto Xt = std::vector(Xt_ptr, Xt_ptr + Xf.size(1)); discretizer->fit(Xt, yv); pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt))); - auto xStates = vector(discretizer->getCutPoints().size() + 1); + auto xStates = std::vector(discretizer->getCutPoints().size() + 1); iota(xStates.begin(), xStates.end(), 0); states[pFeatures[i]] = xStates; discretizers[pFeatures[i]] = discretizer; } int n_classes = torch::max(y).item() + 1; - auto yStates = vector(n_classes); + auto yStates = std::vector(n_classes); iota(yStates.begin(), yStates.end(), 0); states[pClassName] = yStates; pDataset.index_put_({ n, "..." }, y); @@ -101,7 +101,7 @@ namespace bayesnet { { auto Xtd = torch::zeros_like(X, torch::kInt32); for (int i = 0; i < X.size(0); ++i) { - auto Xt = vector(X[i].data_ptr(), X[i].data_ptr() + X.size(1)); + auto Xt = std::vector(X[i].data_ptr(), X[i].data_ptr() + X.size(1)); auto Xd = discretizers[pFeatures[i]]->transform(Xt); Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32)); } diff --git a/src/BayesNet/Proposal.h b/src/BayesNet/Proposal.h index e6ba2bc..6c4872c 100644 --- a/src/BayesNet/Proposal.h +++ b/src/BayesNet/Proposal.h @@ -10,20 +10,20 @@ namespace bayesnet { class Proposal { public: - Proposal(torch::Tensor& pDataset, vector& features_, string& className_); + Proposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_); virtual ~Proposal(); protected: void checkInput(const torch::Tensor& X, const torch::Tensor& y); torch::Tensor prepareX(torch::Tensor& X); - map> localDiscretizationProposal(const map>& states, Network& model); - map> fit_local_discretization(const torch::Tensor& y); + map> localDiscretizationProposal(const map>& states, Network& model); + map> fit_local_discretization(const torch::Tensor& y); torch::Tensor Xf; // X continuous nxm tensor torch::Tensor y; // y discrete nx1 tensor - map discretizers; + map discretizers; private: torch::Tensor& pDataset; // (n+1)xm tensor - vector& pFeatures; - string& pClassName; + std::vector& pFeatures; + std::string& pClassName; }; } diff --git a/src/BayesNet/SPODE.cc b/src/BayesNet/SPODE.cc index 83c9231..038c87e 100644 --- a/src/BayesNet/SPODE.cc +++ b/src/BayesNet/SPODE.cc @@ -17,7 +17,7 @@ namespace bayesnet { } } } - vector SPODE::graph(const string& name) const + std::vector SPODE::graph(const std::string& name) const { return model.graph(name); } diff --git a/src/BayesNet/SPODE.h b/src/BayesNet/SPODE.h index 0a78830..6621263 100644 --- a/src/BayesNet/SPODE.h +++ b/src/BayesNet/SPODE.h @@ -11,7 +11,7 @@ namespace bayesnet { public: explicit SPODE(int root); virtual ~SPODE() {}; - vector graph(const string& name = "SPODE") const override; + std::vector graph(const std::string& name = "SPODE") const override; }; } #endif \ No newline at end of file diff --git a/src/BayesNet/SPODELd.cc b/src/BayesNet/SPODELd.cc index 73b9d2b..329ecce 100644 --- a/src/BayesNet/SPODELd.cc +++ b/src/BayesNet/SPODELd.cc @@ -1,16 +1,15 @@ #include "SPODELd.h" namespace bayesnet { - using namespace std; SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {} - SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector& features_, const string& className_, map>& states_) + SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) { checkInput(X_, y_); features = features_; className = className_; Xf = X_; y = y_; - // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y + // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network @@ -18,7 +17,7 @@ namespace bayesnet { states = localDiscretizationProposal(states, model); return *this; } - SPODELd& SPODELd::fit(torch::Tensor& dataset, const vector& features_, const string& className_, map>& states_) + SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_) { if (!torch::is_floating_point(dataset)) { throw std::runtime_error("Dataset must be a floating point tensor"); @@ -27,7 +26,7 @@ namespace bayesnet { y = dataset.index({ -1, "..." }).clone(); features = features_; className = className_; - // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y + // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network @@ -36,12 +35,12 @@ namespace bayesnet { return *this; } - Tensor SPODELd::predict(Tensor& X) + torch::Tensor SPODELd::predict(torch::Tensor& X) { auto Xt = prepareX(X); return SPODE::predict(Xt); } - vector SPODELd::graph(const string& name) const + std::vector SPODELd::graph(const std::string& name) const { return SPODE::graph(name); } diff --git a/src/BayesNet/SPODELd.h b/src/BayesNet/SPODELd.h index b6121d9..9cc3310 100644 --- a/src/BayesNet/SPODELd.h +++ b/src/BayesNet/SPODELd.h @@ -4,16 +4,15 @@ #include "Proposal.h" namespace bayesnet { - using namespace std; class SPODELd : public SPODE, public Proposal { public: explicit SPODELd(int root); virtual ~SPODELd() = default; - SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const vector& features, const string& className, map>& states) override; - SPODELd& fit(torch::Tensor& dataset, const vector& features, const string& className, map>& states) override; - vector graph(const string& name = "SPODE") const override; - Tensor predict(Tensor& X) override; - static inline string version() { return "0.0.1"; }; + SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; + SPODELd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states) override; + std::vector graph(const std::string& name = "SPODE") const override; + torch::Tensor predict(torch::Tensor& X) override; + static inline std::string version() { return "0.0.1"; }; }; } #endif // !SPODELD_H \ No newline at end of file diff --git a/src/BayesNet/TAN.cc b/src/BayesNet/TAN.cc index f0728be..39f071e 100644 --- a/src/BayesNet/TAN.cc +++ b/src/BayesNet/TAN.cc @@ -1,8 +1,6 @@ #include "TAN.h" namespace bayesnet { - using namespace torch; - TAN::TAN() : Classifier(Network()) {} void TAN::buildModel(const torch::Tensor& weights) @@ -11,10 +9,10 @@ namespace bayesnet { addNodes(); // 1. Compute mutual information between each feature and the class and set the root node // as the highest mutual information with the class - auto mi = vector >(); - Tensor class_dataset = dataset.index({ -1, "..." }); + auto mi = std::vector >(); + torch::Tensor class_dataset = dataset.index({ -1, "..." }); for (int i = 0; i < static_cast(features.size()); ++i) { - Tensor feature_dataset = dataset.index({ i, "..." }); + torch::Tensor feature_dataset = dataset.index({ i, "..." }); auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights); mi.push_back({ i, mi_value }); } @@ -34,7 +32,7 @@ namespace bayesnet { model.addEdge(className, feature); } } - vector TAN::graph(const string& title) const + std::vector TAN::graph(const std::string& title) const { return model.graph(title); } diff --git a/src/BayesNet/TAN.h b/src/BayesNet/TAN.h index 60a5656..afbf2eb 100644 --- a/src/BayesNet/TAN.h +++ b/src/BayesNet/TAN.h @@ -2,7 +2,6 @@ #define TAN_H #include "Classifier.h" namespace bayesnet { - using namespace std; class TAN : public Classifier { private: protected: @@ -10,7 +9,7 @@ namespace bayesnet { public: TAN(); virtual ~TAN() {}; - vector graph(const string& name = "TAN") const override; + std::vector graph(const std::string& name = "TAN") const override; }; } #endif \ No newline at end of file diff --git a/src/BayesNet/TANLd.cc b/src/BayesNet/TANLd.cc index 320f756..b7f563c 100644 --- a/src/BayesNet/TANLd.cc +++ b/src/BayesNet/TANLd.cc @@ -1,16 +1,15 @@ #include "TANLd.h" namespace bayesnet { - using namespace std; TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} - TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector& features_, const string& className_, map>& states_) + TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_) { checkInput(X_, y_); features = features_; className = className_; Xf = X_; y = y_; - // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y + // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y states = fit_local_discretization(y); // We have discretized the input data // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network @@ -19,12 +18,12 @@ namespace bayesnet { return *this; } - Tensor TANLd::predict(Tensor& X) + torch::Tensor TANLd::predict(torch::Tensor& X) { auto Xt = prepareX(X); return TAN::predict(Xt); } - vector TANLd::graph(const string& name) const + std::vector TANLd::graph(const std::string& name) const { return TAN::graph(name); } diff --git a/src/BayesNet/TANLd.h b/src/BayesNet/TANLd.h index b218ae3..88b1d65 100644 --- a/src/BayesNet/TANLd.h +++ b/src/BayesNet/TANLd.h @@ -4,16 +4,15 @@ #include "Proposal.h" namespace bayesnet { - using namespace std; class TANLd : public TAN, public Proposal { private: public: TANLd(); virtual ~TANLd() = default; - TANLd& fit(torch::Tensor& X, torch::Tensor& y, const vector& features, const string& className, map>& states) override; - vector graph(const string& name = "TAN") const override; - Tensor predict(Tensor& X) override; - static inline string version() { return "0.0.1"; }; + TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states) override; + std::vector graph(const std::string& name = "TAN") const override; + torch::Tensor predict(torch::Tensor& X) override; + static inline std::string version() { return "0.0.1"; }; }; } #endif // !TANLD_H \ No newline at end of file diff --git a/src/BayesNet/bayesnetUtils.cc b/src/BayesNet/bayesnetUtils.cc index 480034b..accef68 100644 --- a/src/BayesNet/bayesnetUtils.cc +++ b/src/BayesNet/bayesnetUtils.cc @@ -1,25 +1,23 @@ #include "bayesnetUtils.h" namespace bayesnet { - using namespace std; - using namespace torch; // Return the indices in descending order - vector argsort(vector& nums) + std::vector argsort(std::vector& nums) { int n = nums.size(); - vector indices(n); + std::vector indices(n); iota(indices.begin(), indices.end(), 0); sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];}); return indices; } - vector> tensorToVector(Tensor& tensor) + std::vector> tensorToVector(torch::Tensor& tensor) { - // convert mxn tensor to nxm vector - vector> result; + // convert mxn tensor to nxm std::vector + std::vector> result; // Iterate over cols for (int i = 0; i < tensor.size(1); ++i) { auto col_tensor = tensor.index({ "...", i }); - auto col = vector(col_tensor.data_ptr(), col_tensor.data_ptr() + tensor.size(0)); + auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + tensor.size(0)); result.push_back(col); } return result; diff --git a/src/BayesNet/bayesnetUtils.h b/src/BayesNet/bayesnetUtils.h index b5811f7..4f477a0 100644 --- a/src/BayesNet/bayesnetUtils.h +++ b/src/BayesNet/bayesnetUtils.h @@ -3,9 +3,7 @@ #include #include namespace bayesnet { - using namespace std; - using namespace torch; - vector argsort(vector& nums); - vector> tensorToVector(Tensor& tensor); + std::vector argsort(std::vector& nums); + std::vector> tensorToVector(torch::Tensor& tensor); } #endif //BAYESNET_UTILS_H \ No newline at end of file diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 4fcd575..ba0c082 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -13,26 +13,25 @@ namespace fs = std::filesystem; -// function ftime_to_string, Code taken from +// function ftime_to_std::string, Code taken from // https://stackoverflow.com/a/58237530/1389271 template std::string ftime_to_string(TP tp) { - using namespace std::chrono; - auto sctp = time_point_cast(tp - TP::clock::now() - + system_clock::now()); - auto tt = system_clock::to_time_t(sctp); + auto sctp = std::chrono::time_point_cast(tp - TP::clock::now() + + std::chrono::system_clock::now()); + auto tt = std::chrono::system_clock::to_time_t(sctp); std::tm* gmt = std::gmtime(&tt); std::stringstream buffer; buffer << std::put_time(gmt, "%Y-%m-%d %H:%M"); return buffer.str(); } namespace platform { - string BestResults::build() + std::string BestResults::build() { auto files = loadResultFiles(); if (files.size() == 0) { - cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl; + std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl; exit(1); } json bests; @@ -42,7 +41,7 @@ namespace platform { for (auto const& item : data.at("results")) { bool update = false; // Check if results file contains only one dataset - auto datasetName = item.at("dataset").get(); + auto datasetName = item.at("dataset").get(); if (bests.contains(datasetName)) { if (item.at("score").get() > bests[datasetName].at(0).get()) { update = true; @@ -55,39 +54,39 @@ namespace platform { } } } - string bestFileName = path + bestResultFile(); + std::string bestFileName = path + bestResultFile(); if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { fclose(fileTest); - cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl; + std::cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << std::endl; } - ofstream file(bestFileName); + std::ofstream file(bestFileName); file << bests; file.close(); return bestFileName; } - string BestResults::bestResultFile() + std::string BestResults::bestResultFile() { return "best_results_" + score + "_" + model + ".json"; } - pair getModelScore(string name) + std::pair getModelScore(std::string name) { // results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json int i = 0; auto pos = name.find("_"); auto pos2 = name.find("_", pos + 1); - string score = name.substr(pos + 1, pos2 - pos - 1); + std::string score = name.substr(pos + 1, pos2 - pos - 1); pos = name.find("_", pos2 + 1); - string model = name.substr(pos2 + 1, pos - pos2 - 1); + std::string model = name.substr(pos2 + 1, pos - pos2 - 1); return { model, score }; } - vector BestResults::loadResultFiles() + std::vector BestResults::loadResultFiles() { - vector files; + std::vector files; using std::filesystem::directory_iterator; - string fileModel, fileScore; + std::string fileModel, fileScore; for (const auto& file : directory_iterator(path)) { auto fileName = file.path().filename().string(); - if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) { + if (fileName.find(".json") != std::string::npos && fileName.find("results_") == 0) { tie(fileModel, fileScore) = getModelScore(fileName); if (score == fileScore && (model == fileModel || model == "any")) { files.push_back(fileName); @@ -96,37 +95,37 @@ namespace platform { } return files; } - json BestResults::loadFile(const string& fileName) + json BestResults::loadFile(const std::string& fileName) { - ifstream resultData(fileName); + std::ifstream resultData(fileName); if (resultData.is_open()) { json data = json::parse(resultData); return data; } - throw invalid_argument("Unable to open result file. [" + fileName + "]"); + throw std::invalid_argument("Unable to open result file. [" + fileName + "]"); } - vector BestResults::getModels() + std::vector BestResults::getModels() { - set models; - vector result; + std::set models; + std::vector result; auto files = loadResultFiles(); if (files.size() == 0) { - cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl; + std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl; exit(1); } - string fileModel, fileScore; + std::string fileModel, fileScore; for (const auto& file : files) { // extract the model from the file name tie(fileModel, fileScore) = getModelScore(file); - // add the model to the vector of models + // add the model to the std::vector of models models.insert(fileModel); } - result = vector(models.begin(), models.end()); + result = std::vector(models.begin(), models.end()); return result; } - vector BestResults::getDatasets(json table) + std::vector BestResults::getDatasets(json table) { - vector datasets; + std::vector datasets; for (const auto& dataset : table.items()) { datasets.push_back(dataset.key()); } @@ -136,7 +135,7 @@ namespace platform { { auto models = getModels(); for (const auto& model : models) { - cout << "Building best results for model: " << model << endl; + std::cout << "Building best results for model: " << model << std::endl; this->model = model; build(); } @@ -144,62 +143,62 @@ namespace platform { } void BestResults::listFile() { - string bestFileName = path + bestResultFile(); + std::string bestFileName = path + bestResultFile(); if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { fclose(fileTest); } else { - cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl; + std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl; exit(1); } auto temp = ConfigLocale(); - auto date = ftime_to_string(filesystem::last_write_time(bestFileName)); + auto date = ftime_to_string(std::filesystem::last_write_time(bestFileName)); auto data = loadFile(bestFileName); auto datasets = getDatasets(data); - int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); + int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); int maxFileName = 0; int maxHyper = 15; for (auto const& item : data.items()) { - maxHyper = max(maxHyper, (int)item.value().at(1).dump().size()); - maxFileName = max(maxFileName, (int)item.value().at(2).get().size()); + maxHyper = std::max(maxHyper, (int)item.value().at(1).dump().size()); + maxFileName = std::max(maxFileName, (int)item.value().at(2).get().size()); } - stringstream oss; - oss << Colors::GREEN() << "Best results for " << model << " as of " << date << endl; - cout << oss.str(); - cout << string(oss.str().size() - 8, '-') << endl; - cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << "Dataset" << "Score " << setw(maxFileName) << "File" << " Hyperparameters" << endl; - cout << "=== " << string(maxDatasetName, '=') << " =========== " << string(maxFileName, '=') << " " << string(maxHyper, '=') << endl; + std::stringstream oss; + oss << Colors::GREEN() << "Best results for " << model << " as of " << date << std::endl; + std::cout << oss.str(); + std::cout << std::string(oss.str().size() - 8, '-') << std::endl; + std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << "Dataset" << "Score " << std::setw(maxFileName) << "File" << " Hyperparameters" << std::endl; + std::cout << "=== " << std::string(maxDatasetName, '=') << " =========== " << std::string(maxFileName, '=') << " " << std::string(maxHyper, '=') << std::endl; auto i = 0; bool odd = true; double total = 0; for (auto const& item : data.items()) { auto color = odd ? Colors::BLUE() : Colors::CYAN(); double value = item.value().at(0).get(); - cout << color << setw(3) << fixed << right << i++ << " "; - cout << setw(maxDatasetName) << left << item.key() << " "; - cout << setw(11) << setprecision(9) << fixed << value << " "; - cout << setw(maxFileName) << item.value().at(2).get() << " "; - cout << item.value().at(1) << " "; - cout << endl; + std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " "; + std::cout << std::setw(maxDatasetName) << std::left << item.key() << " "; + std::cout << std::setw(11) << std::setprecision(9) << std::fixed << value << " "; + std::cout << std::setw(maxFileName) << item.value().at(2).get() << " "; + std::cout << item.value().at(1) << " "; + std::cout << std::endl; total += value; odd = !odd; } - cout << Colors::GREEN() << "=== " << string(maxDatasetName, '=') << " ===========" << endl; - cout << setw(5 + maxDatasetName) << "Total.................. " << setw(11) << setprecision(8) << fixed << total << endl; + std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ===========" << std::endl; + std::cout << std::setw(5 + maxDatasetName) << "Total.................. " << std::setw(11) << std::setprecision(8) << std::fixed << total << std::endl; } - json BestResults::buildTableResults(vector models) + json BestResults::buildTableResults(std::vector models) { json table; - auto maxDate = filesystem::file_time_type::max(); + auto maxDate = std::filesystem::file_time_type::max(); for (const auto& model : models) { this->model = model; - string bestFileName = path + bestResultFile(); + std::string bestFileName = path + bestResultFile(); if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { fclose(fileTest); } else { - cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl; + std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl; exit(1); } - auto dateWrite = filesystem::last_write_time(bestFileName); + auto dateWrite = std::filesystem::last_write_time(bestFileName); if (dateWrite < maxDate) { maxDate = dateWrite; } @@ -209,25 +208,25 @@ namespace platform { table["dateTable"] = ftime_to_string(maxDate); return table; } - void BestResults::printTableResults(vector models, json table) + void BestResults::printTableResults(std::vector models, json table) { - stringstream oss; - oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get() << endl; - cout << oss.str(); - cout << string(oss.str().size() - 8, '-') << endl; - cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << string("Dataset"); + std::stringstream oss; + oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get() << std::endl; + std::cout << oss.str(); + std::cout << std::string(oss.str().size() - 8, '-') << std::endl; + std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset"); for (const auto& model : models) { - cout << setw(maxModelName) << left << model << " "; + std::cout << std::setw(maxModelName) << std::left << model << " "; } - cout << endl; - cout << "=== " << string(maxDatasetName, '=') << " "; + std::cout << std::endl; + std::cout << "=== " << std::string(maxDatasetName, '=') << " "; for (const auto& model : models) { - cout << string(maxModelName, '=') << " "; + std::cout << std::string(maxModelName, '=') << " "; } - cout << endl; + std::cout << std::endl; auto i = 0; bool odd = true; - map totals; + std::map totals; int nDatasets = table.begin().value().size(); for (const auto& model : models) { totals[model] = 0.0; @@ -235,8 +234,8 @@ namespace platform { auto datasets = getDatasets(table.begin().value()); for (auto const& dataset : datasets) { auto color = odd ? Colors::BLUE() : Colors::CYAN(); - cout << color << setw(3) << fixed << right << i++ << " "; - cout << setw(maxDatasetName) << left << dataset << " "; + std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " "; + std::cout << std::setw(maxDatasetName) << std::left << dataset << " "; double maxValue = 0; // Find out the max value for this dataset for (const auto& model : models) { @@ -247,23 +246,23 @@ namespace platform { } // Print the row with red colors on max values for (const auto& model : models) { - string efectiveColor = color; + std::string efectiveColor = color; double value = table[model].at(dataset).at(0).get(); if (value == maxValue) { efectiveColor = Colors::RED(); } totals[model] += value; - cout << efectiveColor << setw(maxModelName) << setprecision(maxModelName - 2) << fixed << value << " "; + std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " "; } - cout << endl; + std::cout << std::endl; odd = !odd; } - cout << Colors::GREEN() << "=== " << string(maxDatasetName, '=') << " "; + std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " "; for (const auto& model : models) { - cout << string(maxModelName, '=') << " "; + std::cout << std::string(maxModelName, '=') << " "; } - cout << endl; - cout << Colors::GREEN() << setw(5 + maxDatasetName) << " Totals..................."; + std::cout << std::endl; + std::cout << Colors::GREEN() << std::setw(5 + maxDatasetName) << " Totals..................."; double max = 0.0; for (const auto& total : totals) { if (total.second > max) { @@ -271,13 +270,13 @@ namespace platform { } } for (const auto& model : models) { - string efectiveColor = Colors::GREEN(); + std::string efectiveColor = Colors::GREEN(); if (totals[model] == max) { efectiveColor = Colors::RED(); } - cout << efectiveColor << right << setw(maxModelName) << setprecision(maxModelName - 4) << fixed << totals[model] << " "; + std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << totals[model] << " "; } - cout << endl; + std::cout << std::endl; } void BestResults::reportSingle(bool excel) { @@ -286,7 +285,7 @@ namespace platform { auto models = getModels(); // Build the table of results json table = buildTableResults(models); - vector datasets = getDatasets(table.begin().value()); + std::vector datasets = getDatasets(table.begin().value()); BestResultsExcel excel(score, datasets); excel.reportSingle(model, path + bestResultFile()); messageExcelFile(excel.getFileName()); @@ -297,15 +296,15 @@ namespace platform { auto models = getModels(); // Build the table of results json table = buildTableResults(models); - vector datasets = getDatasets(table.begin().value()); - maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); - maxModelName = max(12, maxModelName); - maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); - maxDatasetName = max(25, maxDatasetName); + std::vector datasets = getDatasets(table.begin().value()); + maxModelName = (*max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); + maxModelName = std::max(12, maxModelName); + maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); + maxDatasetName = std::max(25, maxDatasetName); // Print the table of results printTableResults(models, table); // Compute the Friedman test - map> ranksModels; + std::map> ranksModels; if (friedman) { Statistics stats(models, datasets, table, significance); auto result = stats.friedmanTest(); @@ -319,7 +318,7 @@ namespace platform { int idx = -1; double min = 2000; // Find out the control model - auto totals = vector(models.size(), 0.0); + auto totals = std::vector(models.size(), 0.0); for (const auto& dataset : datasets) { for (int i = 0; i < models.size(); ++i) { totals[i] += ranksModels[dataset][models[i]]; @@ -337,8 +336,8 @@ namespace platform { messageExcelFile(excel.getFileName()); } } - void BestResults::messageExcelFile(const string& fileName) + void BestResults::messageExcelFile(const std::string& fileName) { - cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << endl; + std::cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << std::endl; } } \ No newline at end of file diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h index 2406d33..7d576b0 100644 --- a/src/Platform/BestResults.h +++ b/src/Platform/BestResults.h @@ -2,32 +2,31 @@ #define BESTRESULTS_H #include #include -using namespace std; using json = nlohmann::json; namespace platform { class BestResults { public: - explicit BestResults(const string& path, const string& score, const string& model, bool friedman, double significance = 0.05) + explicit BestResults(const std::string& path, const std::string& score, const std::string& model, bool friedman, double significance = 0.05) : path(path), score(score), model(model), friedman(friedman), significance(significance) { } - string build(); + std::string build(); void reportSingle(bool excel); void reportAll(bool excel); void buildAll(); private: - vector getModels(); - vector getDatasets(json table); - vector loadResultFiles(); - void messageExcelFile(const string& fileName); - json buildTableResults(vector models); - void printTableResults(vector models, json table); - string bestResultFile(); - json loadFile(const string& fileName); + std::vector getModels(); + std::vector getDatasets(json table); + std::vector loadResultFiles(); + void messageExcelFile(const std::string& fileName); + json buildTableResults(std::vector models); + void printTableResults(std::vector models, json table); + std::string bestResultFile(); + json loadFile(const std::string& fileName); void listFile(); - string path; - string score; - string model; + std::string path; + std::string score; + std::string model; bool friedman; double significance; int maxModelName = 0; diff --git a/src/Platform/BestResultsExcel.cc b/src/Platform/BestResultsExcel.cc index ae487a6..d524665 100644 --- a/src/Platform/BestResultsExcel.cc +++ b/src/Platform/BestResultsExcel.cc @@ -7,20 +7,20 @@ #include "ReportExcel.h" namespace platform { - json loadResultData(const string& fileName) + json loadResultData(const std::string& fileName) { json data; - ifstream resultData(fileName); + std::ifstream resultData(fileName); if (resultData.is_open()) { data = json::parse(resultData); } else { - throw invalid_argument("Unable to open result file. [" + fileName + "]"); + throw std::invalid_argument("Unable to open result file. [" + fileName + "]"); } return data; } - string getColumnName(int colNum) + std::string getColumnName(int colNum) { - string columnName = ""; + std::string columnName = ""; if (colNum == 0) return "A"; while (colNum > 0) { @@ -30,15 +30,15 @@ namespace platform { } return columnName; } - BestResultsExcel::BestResultsExcel(const string& score, const vector& datasets) : score(score), datasets(datasets) + BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector& datasets) : score(score), datasets(datasets) { workbook = workbook_new((Paths::excel() + fileName).c_str()); setProperties("Best Results"); - int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); - datasetNameSize = max(datasetNameSize, maxDatasetName); + int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); + datasetNameSize = std::max(datasetNameSize, maxDatasetName); createFormats(); } - void BestResultsExcel::reportAll(const vector& models, const json& table, const map>& ranks, bool friedman, double significance) + void BestResultsExcel::reportAll(const std::vector& models, const json& table, const std::map>& ranks, bool friedman, double significance) { this->table = table; this->models = models; @@ -46,23 +46,23 @@ namespace platform { this->friedman = friedman; this->significance = significance; worksheet = workbook_add_worksheet(workbook, "Best Results"); - int maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); - modelNameSize = max(modelNameSize, maxModelName); + int maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); + modelNameSize = std::max(modelNameSize, maxModelName); formatColumns(); build(); } - void BestResultsExcel::reportSingle(const string& model, const string& fileName) + void BestResultsExcel::reportSingle(const std::string& model, const std::string& fileName) { worksheet = workbook_add_worksheet(workbook, "Report"); if (FILE* fileTest = fopen(fileName.c_str(), "r")) { fclose(fileTest); } else { - cerr << "File " << fileName << " doesn't exist." << endl; + std::cerr << "File " << fileName << " doesn't exist." << std::endl; exit(1); } json data = loadResultData(fileName); - string title = "Best results for " + model; + std::string title = "Best results for " + model; worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]); // Body header row = 3; @@ -73,30 +73,30 @@ namespace platform { writeString(row, 3, "File", "bodyHeader"); writeString(row, 4, "Hyperparameters", "bodyHeader"); auto i = 0; - string hyperparameters; + std::string hyperparameters; int hypSize = 22; - map files; // map of files imported and their tabs + std::map files; // map of files imported and their tabs for (auto const& item : data.items()) { row++; writeInt(row, 0, i++, "ints"); writeString(row, 1, item.key().c_str(), "text"); writeDouble(row, 2, item.value().at(0).get(), "result"); - auto fileName = item.value().at(2).get(); - string hyperlink = ""; + auto fileName = item.value().at(2).get(); + std::string hyperlink = ""; try { hyperlink = files.at(fileName); } - catch (const out_of_range& oor) { - auto tabName = "table_" + to_string(i); + catch (const std::out_of_range& oor) { + auto tabName = "table_" + std::to_string(i); auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str()); json data = loadResultData(Paths::results() + fileName); auto report = ReportExcel(data, false, workbook, worksheetNew); report.show(); - hyperlink = "#table_" + to_string(i); + hyperlink = "#table_" + std::to_string(i); files[fileName] = hyperlink; } - hyperlink += "!H" + to_string(i + 6); - string fileNameText = "=HYPERLINK(\"" + hyperlink + "\",\"" + fileName + "\")"; + hyperlink += "!H" + std::to_string(i + 6); + std::string fileNameText = "=HYPERLINK(\"" + hyperlink + "\",\"" + fileName + "\")"; worksheet_write_formula(worksheet, row, 3, fileNameText.c_str(), efectiveStyle("text")); hyperparameters = item.value().at(1).dump(); if (hyperparameters.size() > hypSize) { @@ -107,13 +107,13 @@ namespace platform { row++; // Set Totals writeString(row, 1, "Total", "bodyHeader"); - stringstream oss; + std::stringstream oss; auto colName = getColumnName(2); oss << "=sum(" << colName << "5:" << colName << row << ")"; worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]); // Set format worksheet_freeze_panes(worksheet, 4, 2); - vector columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 }; + std::vector columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 }; for (int i = 0; i < columns_sizes.size(); ++i) { worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); } @@ -125,7 +125,7 @@ namespace platform { void BestResultsExcel::formatColumns() { worksheet_freeze_panes(worksheet, 4, 2); - vector columns_sizes = { 5, datasetNameSize }; + std::vector columns_sizes = { 5, datasetNameSize }; for (int i = 0; i < models.size(); ++i) { columns_sizes.push_back(modelNameSize); } @@ -133,7 +133,7 @@ namespace platform { worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); } } - void BestResultsExcel::addConditionalFormat(string formula) + void BestResultsExcel::addConditionalFormat(std::string formula) { // Add conditional format for max/min values in scores/ranks sheets lxw_format* custom_format = workbook_add_format(workbook); @@ -142,8 +142,8 @@ namespace platform { // Create a conditional format object. A static object would also work. lxw_conditional_format* conditional_format = (lxw_conditional_format*)calloc(1, sizeof(lxw_conditional_format)); conditional_format->type = LXW_CONDITIONAL_TYPE_FORMULA; - string col = getColumnName(models.size() + 1); - stringstream oss; + std::string col = getColumnName(models.size() + 1); + std::stringstream oss; oss << "=C5=" << formula << "($C5:$" << col << "5)"; auto formulaValue = oss.str(); conditional_format->value_string = formulaValue.c_str(); @@ -170,14 +170,14 @@ namespace platform { doFriedman(); } } - string BestResultsExcel::getFileName() + std::string BestResultsExcel::getFileName() { return Paths::excel() + fileName; } void BestResultsExcel::header(bool ranks) { row = 0; - string message = ranks ? "Ranks for score " + score : "Best results for " + score; + std::string message = ranks ? "Ranks for score " + score : "Best results for " + score; worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), message.c_str(), styles["headerFirst"]); // Body header row = 3; @@ -210,7 +210,7 @@ namespace platform { writeString(row, 1, "Total", "bodyHeader"); int col = 1; for (const auto& model : models) { - stringstream oss; + std::stringstream oss; auto colName = getColumnName(col + 1); oss << "=SUM(" << colName << "5:" << colName << row << ")"; worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]); @@ -221,7 +221,7 @@ namespace platform { int col = 1; for (const auto& model : models) { auto colName = getColumnName(col + 1); - stringstream oss; + std::stringstream oss; oss << "=SUM(" << colName << "5:" << colName << row - 1 << ")/" << datasets.size(); worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]); } @@ -230,7 +230,7 @@ namespace platform { void BestResultsExcel::doFriedman() { worksheet = workbook_add_worksheet(workbook, "Friedman"); - vector columns_sizes = { 5, datasetNameSize }; + std::vector columns_sizes = { 5, datasetNameSize }; for (int i = 0; i < models.size(); ++i) { columns_sizes.push_back(modelNameSize); } @@ -262,7 +262,7 @@ namespace platform { row += 2; worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between the control model and the other models.'", styles["headerSmall"]); row += 2; - string controlModel = "Control Model: " + holmResult.model; + std::string controlModel = "Control Model: " + holmResult.model; worksheet_merge_range(worksheet, row, 1, row, 7, controlModel.c_str(), styles["bodyHeader_odd"]); row++; writeString(row, 1, "Model", "bodyHeader"); diff --git a/src/Platform/BestResultsExcel.h b/src/Platform/BestResultsExcel.h index 67b84ab..1ab75d0 100644 --- a/src/Platform/BestResultsExcel.h +++ b/src/Platform/BestResultsExcel.h @@ -5,18 +5,17 @@ #include #include -using namespace std; using json = nlohmann::json; namespace platform { class BestResultsExcel : ExcelFile { public: - BestResultsExcel(const string& score, const vector& datasets); + BestResultsExcel(const std::string& score, const std::vector& datasets); ~BestResultsExcel(); - void reportAll(const vector& models, const json& table, const map>& ranks, bool friedman, double significance); - void reportSingle(const string& model, const string& fileName); - string getFileName(); + void reportAll(const std::vector& models, const json& table, const std::map>& ranks, bool friedman, double significance); + void reportSingle(const std::string& model, const std::string& fileName); + std::string getFileName(); private: void build(); void header(bool ranks); @@ -24,13 +23,13 @@ namespace platform { void footer(bool ranks); void formatColumns(); void doFriedman(); - void addConditionalFormat(string formula); - const string fileName = "BestResults.xlsx"; - string score; - vector models; - vector datasets; + void addConditionalFormat(std::string formula); + const std::string fileName = "BestResults.xlsx"; + std::string score; + std::vector models; + std::vector datasets; json table; - map> ranksModels; + std::map> ranksModels; bool friedman; double significance; int modelNameSize = 12; // Min size of the column diff --git a/src/Platform/BestScore.h b/src/Platform/BestScore.h index 6f660c3..f1b552f 100644 --- a/src/Platform/BestScore.h +++ b/src/Platform/BestScore.h @@ -7,14 +7,14 @@ namespace platform { class BestScore { public: - static pair getScore(const std::string& metric) + static std::pair getScore(const std::string& metric) { - static map, pair> data = { + static std::map, std::pair> data = { {{"discretiz", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}}, {{"odte", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}}, }; auto env = platform::DotEnv(); - string experiment = env.get("experiment"); + std::string experiment = env.get("experiment"); try { return data[{experiment, metric}]; } diff --git a/src/Platform/CLocale.h b/src/Platform/CLocale.h index f88b05b..4403562 100644 --- a/src/Platform/CLocale.h +++ b/src/Platform/CLocale.h @@ -2,22 +2,20 @@ #define LOCALE_H #include #include -#include #include -using namespace std; namespace platform { - struct separation : numpunct { + struct separation : std::numpunct { char do_decimal_point() const { return ','; } char do_thousands_sep() const { return '.'; } - string do_grouping() const { return "\03"; } + std::string do_grouping() const { return "\03"; } }; class ConfigLocale { public: explicit ConfigLocale() { - locale mylocale(cout.getloc(), new separation); - locale::global(mylocale); - cout.imbue(mylocale); + std::locale mylocale(std::cout.getloc(), new separation); + std::locale::global(mylocale); + std::cout.imbue(mylocale); } }; } diff --git a/src/Platform/CommandParser.cc b/src/Platform/CommandParser.cc index 30fc272..5c58f3e 100644 --- a/src/Platform/CommandParser.cc +++ b/src/Platform/CommandParser.cc @@ -6,16 +6,16 @@ #include "Utils.h" namespace platform { - void CommandParser::messageError(const string& message) + void CommandParser::messageError(const std::string& message) { - cout << Colors::RED() << message << Colors::RESET() << endl; + std::cout << Colors::RED() << message << Colors::RESET() << std::endl; } - pair CommandParser::parse(const string& color, const vector>& options, const char defaultCommand, const int maxIndex) + std::pair CommandParser::parse(const std::string& color, const std::vector>& options, const char defaultCommand, const int maxIndex) { bool finished = false; while (!finished) { - stringstream oss; - string line; + std::stringstream oss; + std::string line; oss << color << "Choose option ("; bool first = true; for (auto& option : options) { @@ -24,12 +24,12 @@ namespace platform { } else { oss << ", "; } - oss << get(option) << "=" << get(option); + oss << std::get(option) << "=" << std::get(option); } oss << "): "; - cout << oss.str(); - getline(cin, line); - cout << Colors::RESET(); + std::cout << oss.str(); + getline(std::cin, line); + std::cout << Colors::RESET(); line = trim(line); if (line.size() == 0) continue; @@ -45,15 +45,15 @@ namespace platform { } bool found = false; for (auto& option : options) { - if (line[0] == get(option)) { + if (line[0] == std::get(option)) { found = true; // it's a match line.erase(line.begin()); line = trim(line); - if (get(option)) { + if (std::get(option)) { // The option requires a value if (line.size() == 0) { - messageError("Option " + get(option) + " requires a value"); + messageError("Option " + std::get(option) + " requires a value"); break; } try { @@ -69,11 +69,11 @@ namespace platform { } } else { if (line.size() > 0) { - messageError("option " + get(option) + " doesn't accept values"); + messageError("option " + std::get(option) + " doesn't accept values"); break; } } - command = get(option); + command = std::get(option); finished = true; break; } diff --git a/src/Platform/CommandParser.h b/src/Platform/CommandParser.h index 0667357..c34554b 100644 --- a/src/Platform/CommandParser.h +++ b/src/Platform/CommandParser.h @@ -3,17 +3,16 @@ #include #include #include -using namespace std; namespace platform { class CommandParser { public: CommandParser() = default; - pair parse(const string& color, const vector>& options, const char defaultCommand, const int maxIndex); + std::pair parse(const std::string& color, const std::vector>& options, const char defaultCommand, const int maxIndex); char getCommand() const { return command; }; int getIndex() const { return index; }; private: - void messageError(const string& message); + void messageError(const std::string& message); char command; int index; }; diff --git a/src/Platform/Dataset.cc b/src/Platform/Dataset.cc index f75fdbc..0322249 100644 --- a/src/Platform/Dataset.cc +++ b/src/Platform/Dataset.cc @@ -5,20 +5,20 @@ namespace platform { Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType) { } - string Dataset::getName() const + std::string Dataset::getName() const { return name; } - string Dataset::getClassName() const + std::string Dataset::getClassName() const { return className; } - vector Dataset::getFeatures() const + std::vector Dataset::getFeatures() const { if (loaded) { return features; } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } int Dataset::getNFeatures() const @@ -26,7 +26,7 @@ namespace platform { if (loaded) { return n_features; } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } int Dataset::getNSamples() const @@ -34,31 +34,31 @@ namespace platform { if (loaded) { return n_samples; } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } - map> Dataset::getStates() const + std::map> Dataset::getStates() const { if (loaded) { return states; } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } - pair>&, vector&> Dataset::getVectors() + pair>&, std::vector&> Dataset::getVectors() { if (loaded) { return { Xv, yv }; } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } - pair>&, vector&> Dataset::getVectorsDiscretized() + pair>&, std::vector&> Dataset::getVectorsDiscretized() { if (loaded) { return { Xd, yv }; } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } pair Dataset::getTensors() @@ -67,22 +67,22 @@ namespace platform { buildTensors(); return { X, y }; } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } void Dataset::load_csv() { ifstream file(path + "/" + name + ".csv"); if (file.is_open()) { - string line; + std::string line; getline(file, line); - vector tokens = split(line, ','); - features = vector(tokens.begin(), tokens.end() - 1); + std::vector tokens = split(line, ','); + features = std::vector(tokens.begin(), tokens.end() - 1); if (className == "-1") { className = tokens.back(); } for (auto i = 0; i < features.size(); ++i) { - Xv.push_back(vector()); + Xv.push_back(std::vector()); } while (getline(file, line)) { tokens = split(line, ','); @@ -93,17 +93,17 @@ namespace platform { } file.close(); } else { - throw invalid_argument("Unable to open dataset file."); + throw std::invalid_argument("Unable to open dataset file."); } } void Dataset::computeStates() { for (int i = 0; i < features.size(); ++i) { - states[features[i]] = vector(*max_element(Xd[i].begin(), Xd[i].end()) + 1); + states[features[i]] = std::vector(*max_element(Xd[i].begin(), Xd[i].end()) + 1); auto item = states.at(features[i]); iota(begin(item), end(item), 0); } - states[className] = vector(*max_element(yv.begin(), yv.end()) + 1); + states[className] = std::vector(*max_element(yv.begin(), yv.end()) + 1); iota(begin(states.at(className)), end(states.at(className)), 0); } void Dataset::load_arff() @@ -118,12 +118,12 @@ namespace platform { auto attributes = arff.getAttributes(); transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; }); } - vector tokenize(string line) + std::vector tokenize(std::string line) { - vector tokens; + std::vector tokens; for (auto i = 0; i < line.size(); ++i) { if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') { - string token = line.substr(0, i); + std::string token = line.substr(0, i); tokens.push_back(token); line.erase(line.begin(), line.begin() + i + 1); i = 0; @@ -140,16 +140,16 @@ namespace platform { { ifstream file(path + "/" + name + "_R.dat"); if (file.is_open()) { - string line; + std::string line; getline(file, line); line = ArffFiles::trim(line); - vector tokens = tokenize(line); + std::vector tokens = tokenize(line); transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); }); if (className == "-1") { className = ArffFiles::trim(tokens.back()); } for (auto i = 0; i < features.size(); ++i) { - Xv.push_back(vector()); + Xv.push_back(std::vector()); } while (getline(file, line)) { tokens = tokenize(line); @@ -162,7 +162,7 @@ namespace platform { } file.close(); } else { - throw invalid_argument("Unable to open dataset file."); + throw std::invalid_argument("Unable to open dataset file."); } } void Dataset::load() @@ -201,9 +201,9 @@ namespace platform { } y = torch::tensor(yv, torch::kInt32); } - vector Dataset::discretizeDataset(vector& X, mdlp::labels_t& y) + std::vector Dataset::discretizeDataset(std::vector& X, mdlp::labels_t& y) { - vector Xd; + std::vector Xd; auto fimdlp = mdlp::CPPFImdlp(); for (int i = 0; i < X.size(); i++) { fimdlp.fit(X[i], y); diff --git a/src/Platform/Dataset.h b/src/Platform/Dataset.h index 21b619e..dd75351 100644 --- a/src/Platform/Dataset.h +++ b/src/Platform/Dataset.h @@ -7,12 +7,10 @@ #include "CPPFImdlp.h" #include "Utils.h" namespace platform { - using namespace std; - enum fileType_t { CSV, ARFF, RDATA }; class SourceData { public: - SourceData(string source) + SourceData(std::string source) { if (source == "Surcov") { path = "datasets/"; @@ -24,10 +22,10 @@ namespace platform { path = "data/"; fileType = RDATA; } else { - throw invalid_argument("Unknown source."); + throw std::invalid_argument("Unknown source."); } } - string getPath() + std::string getPath() { return path; } @@ -36,40 +34,40 @@ namespace platform { return fileType; } private: - string path; + std::string path; fileType_t fileType; }; class Dataset { private: - string path; - string name; + std::string path; + std::string name; fileType_t fileType; - string className; + std::string className; int n_samples{ 0 }, n_features{ 0 }; - vector features; - map> states; + std::vector features; + std::map> states; bool loaded; bool discretize; torch::Tensor X, y; - vector> Xv; - vector> Xd; - vector yv; + std::vector> Xv; + std::vector> Xd; + std::vector yv; void buildTensors(); void load_csv(); void load_arff(); void load_rdata(); void computeStates(); - vector discretizeDataset(vector& X, mdlp::labels_t& y); + std::vector discretizeDataset(std::vector& X, mdlp::labels_t& y); public: - Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; + Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; explicit Dataset(const Dataset&); - string getName() const; - string getClassName() const; - vector getFeatures() const; - map> getStates() const; - pair>&, vector&> getVectors(); - pair>&, vector&> getVectorsDiscretized(); - pair getTensors(); + std::string getName() const; + std::string getClassName() const; + std::vector getFeatures() const; + std::map> getStates() const; + std::pair>&, std::vector&> getVectors(); + std::pair>&, std::vector&> getVectorsDiscretized(); + std::pair getTensors(); int getNFeatures() const; int getNSamples() const; void load(); diff --git a/src/Platform/Datasets.cc b/src/Platform/Datasets.cc index 4f53a2b..af5457d 100644 --- a/src/Platform/Datasets.cc +++ b/src/Platform/Datasets.cc @@ -8,14 +8,14 @@ namespace platform { path = sd.getPath(); ifstream catalog(path + "all.txt"); if (catalog.is_open()) { - string line; + std::string line; while (getline(catalog, line)) { if (line.empty() || line[0] == '#') { continue; } - vector tokens = split(line, ','); - string name = tokens[0]; - string className; + std::vector tokens = split(line, ','); + std::string name = tokens[0]; + std::string className; if (tokens.size() == 1) { className = "-1"; } else { @@ -25,32 +25,32 @@ namespace platform { } catalog.close(); } else { - throw invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]"); + throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]"); } } - vector Datasets::getNames() + std::vector Datasets::getNames() { - vector result; + std::vector result; transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; }); return result; } - vector Datasets::getFeatures(const string& name) const + std::vector Datasets::getFeatures(const std::string& name) const { if (datasets.at(name)->isLoaded()) { return datasets.at(name)->getFeatures(); } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } - map> Datasets::getStates(const string& name) const + map> Datasets::getStates(const std::string& name) const { if (datasets.at(name)->isLoaded()) { return datasets.at(name)->getStates(); } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } - void Datasets::loadDataset(const string& name) const + void Datasets::loadDataset(const std::string& name) const { if (datasets.at(name)->isLoaded()) { return; @@ -58,23 +58,23 @@ namespace platform { datasets.at(name)->load(); } } - string Datasets::getClassName(const string& name) const + std::string Datasets::getClassName(const std::string& name) const { if (datasets.at(name)->isLoaded()) { return datasets.at(name)->getClassName(); } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } - int Datasets::getNSamples(const string& name) const + int Datasets::getNSamples(const std::string& name) const { if (datasets.at(name)->isLoaded()) { return datasets.at(name)->getNSamples(); } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } - int Datasets::getNClasses(const string& name) + int Datasets::getNClasses(const std::string& name) { if (datasets.at(name)->isLoaded()) { auto className = datasets.at(name)->getClassName(); @@ -83,46 +83,46 @@ namespace platform { return states.at(className).size(); } auto [Xv, yv] = getVectors(name); - return *max_element(yv.begin(), yv.end()) + 1; + return *std::max_element(yv.begin(), yv.end()) + 1; } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } - vector Datasets::getClassesCounts(const string& name) const + std::vector Datasets::getClassesCounts(const std::string& name) const { if (datasets.at(name)->isLoaded()) { auto [Xv, yv] = datasets.at(name)->getVectors(); - vector counts(*max_element(yv.begin(), yv.end()) + 1); + std::vector counts(*std::max_element(yv.begin(), yv.end()) + 1); for (auto y : yv) { counts[y]++; } return counts; } else { - throw invalid_argument("Dataset not loaded."); + throw std::invalid_argument("Dataset not loaded."); } } - pair>&, vector&> Datasets::getVectors(const string& name) + pair>&, std::vector&> Datasets::getVectors(const std::string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getVectors(); } - pair>&, vector&> Datasets::getVectorsDiscretized(const string& name) + pair>&, std::vector&> Datasets::getVectorsDiscretized(const std::string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getVectorsDiscretized(); } - pair Datasets::getTensors(const string& name) + pair Datasets::getTensors(const std::string& name) { if (!datasets[name]->isLoaded()) { datasets[name]->load(); } return datasets[name]->getTensors(); } - bool Datasets::isDataset(const string& name) const + bool Datasets::isDataset(const std::string& name) const { return datasets.find(name) != datasets.end(); } diff --git a/src/Platform/Datasets.h b/src/Platform/Datasets.h index 1d16400..4ead616 100644 --- a/src/Platform/Datasets.h +++ b/src/Platform/Datasets.h @@ -2,29 +2,28 @@ #define DATASETS_H #include "Dataset.h" namespace platform { - using namespace std; class Datasets { private: - string path; + std::string path; fileType_t fileType; - string sfileType; - map> datasets; + std::string sfileType; + std::map> datasets; bool discretize; void load(); // Loads the list of datasets public: - explicit Datasets(bool discretize, string sfileType) : discretize(discretize), sfileType(sfileType) { load(); }; - vector getNames(); - vector getFeatures(const string& name) const; - int getNSamples(const string& name) const; - string getClassName(const string& name) const; - int getNClasses(const string& name); - vector getClassesCounts(const string& name) const; - map> getStates(const string& name) const; - pair>&, vector&> getVectors(const string& name); - pair>&, vector&> getVectorsDiscretized(const string& name); - pair getTensors(const string& name); - bool isDataset(const string& name) const; - void loadDataset(const string& name) const; + explicit Datasets(bool discretize, std::string sfileType) : discretize(discretize), sfileType(sfileType) { load(); }; + std::vector getNames(); + std::vector getFeatures(const std::string& name) const; + int getNSamples(const std::string& name) const; + std::string getClassName(const std::string& name) const; + int getNClasses(const std::string& name); + std::vector getClassesCounts(const std::string& name) const; + std::map> getStates(const std::string& name) const; + std::pair>&, std::vector&> getVectors(const std::string& name); + std::pair>&, std::vector&> getVectorsDiscretized(const std::string& name); + std::pair getTensors(const std::string& name); + bool isDataset(const std::string& name) const; + void loadDataset(const std::string& name) const; }; }; diff --git a/src/Platform/ExcelFile.cc b/src/Platform/ExcelFile.cc index 4449df8..3149480 100644 --- a/src/Platform/ExcelFile.cc +++ b/src/Platform/ExcelFile.cc @@ -26,7 +26,7 @@ namespace platform { { return workbook; } - void ExcelFile::setProperties(string title) + void ExcelFile::setProperties(std::string title) { char line[title.size() + 1]; strcpy(line, title.c_str()); @@ -40,34 +40,34 @@ namespace platform { }; workbook_set_properties(workbook, &properties); } - lxw_format* ExcelFile::efectiveStyle(const string& style) + lxw_format* ExcelFile::efectiveStyle(const std::string& style) { lxw_format* efectiveStyle = NULL; if (style != "") { - string suffix = row % 2 ? "_odd" : "_even"; + std::string suffix = row % 2 ? "_odd" : "_even"; try { efectiveStyle = styles.at(style + suffix); } - catch (const out_of_range& oor) { + catch (const std::out_of_range& oor) { try { efectiveStyle = styles.at(style); } - catch (const out_of_range& oor) { - throw invalid_argument("Style " + style + " not found"); + catch (const std::out_of_range& oor) { + throw std::invalid_argument("Style " + style + " not found"); } } } return efectiveStyle; } - void ExcelFile::writeString(int row, int col, const string& text, const string& style) + void ExcelFile::writeString(int row, int col, const std::string& text, const std::string& style) { worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style)); } - void ExcelFile::writeInt(int row, int col, const int number, const string& style) + void ExcelFile::writeInt(int row, int col, const int number, const std::string& style) { worksheet_write_number(worksheet, row, col, number, efectiveStyle(style)); } - void ExcelFile::writeDouble(int row, int col, const double number, const string& style) + void ExcelFile::writeDouble(int row, int col, const double number, const std::string& style) { worksheet_write_number(worksheet, row, col, number, efectiveStyle(style)); } @@ -76,7 +76,7 @@ namespace platform { uint32_t efectiveColor = odd ? colorEven : colorOdd; format_set_bg_color(style, lxw_color_t(efectiveColor)); } - void ExcelFile::createStyle(const string& name, lxw_format* style, bool odd) + void ExcelFile::createStyle(const std::string& name, lxw_format* style, bool odd) { addColor(style, odd); if (name == "textCentered") { @@ -116,7 +116,7 @@ namespace platform { { auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" }; lxw_format* style; - for (string name : styleNames) { + for (std::string name : styleNames) { lxw_format* style = workbook_add_format(workbook); style = workbook_add_format(workbook); createStyle(name, style, true); diff --git a/src/Platform/ExcelFile.h b/src/Platform/ExcelFile.h index 931261e..efc115f 100644 --- a/src/Platform/ExcelFile.h +++ b/src/Platform/ExcelFile.h @@ -5,14 +5,13 @@ #include #include "xlsxwriter.h" -using namespace std; namespace platform { - struct separated : numpunct { + struct separated : std::numpunct { char do_decimal_point() const { return ','; } char do_thousands_sep() const { return '.'; } - string do_grouping() const { return "\03"; } + std::string do_grouping() const { return "\03"; } }; class ExcelFile { public: @@ -21,17 +20,17 @@ namespace platform { ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet); lxw_workbook* getWorkbook(); protected: - void setProperties(string title); - void writeString(int row, int col, const string& text, const string& style = ""); - void writeInt(int row, int col, const int number, const string& style = ""); - void writeDouble(int row, int col, const double number, const string& style = ""); + void setProperties(std::string title); + void writeString(int row, int col, const std::string& text, const std::string& style = ""); + void writeInt(int row, int col, const int number, const std::string& style = ""); + void writeDouble(int row, int col, const double number, const std::string& style = ""); void createFormats(); - void createStyle(const string& name, lxw_format* style, bool odd); + void createStyle(const std::string& name, lxw_format* style, bool odd); void addColor(lxw_format* style, bool odd); - lxw_format* efectiveStyle(const string& name); + lxw_format* efectiveStyle(const std::string& name); lxw_workbook* workbook; lxw_worksheet* worksheet; - map styles; + std::map styles; int row; int normalSize; //font size for report body uint32_t colorTitle; diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index 219295c..db8c20a 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -6,7 +6,7 @@ #include "Paths.h" namespace platform { using json = nlohmann::json; - string get_date() + std::string get_date() { time_t rawtime; tm* timeinfo; @@ -16,7 +16,7 @@ namespace platform { oss << std::put_time(timeinfo, "%Y-%m-%d"); return oss.str(); } - string get_time() + std::string get_time() { time_t rawtime; tm* timeinfo; @@ -27,9 +27,9 @@ namespace platform { return oss.str(); } Experiment::Experiment() : hyperparameters(json::parse("{}")) {} - string Experiment::get_file_name() + std::string Experiment::get_file_name() { - string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json"; + std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json"; return result; } @@ -81,7 +81,7 @@ namespace platform { } return result; } - void Experiment::save(const string& path) + void Experiment::save(const std::string& path) { json data = build_json(); ofstream file(path + "/" + get_file_name()); @@ -99,20 +99,20 @@ namespace platform { void Experiment::show() { json data = build_json(); - cout << data.dump(4) << endl; + std::cout << data.dump(4) << std::endl; } - void Experiment::go(vector filesToProcess, bool quiet) + void Experiment::go(std::vector filesToProcess, bool quiet) { - cout << "*** Starting experiment: " << title << " ***" << endl; + std::cout << "*** Starting experiment: " << title << " ***" << std::endl; for (auto fileName : filesToProcess) { - cout << "- " << setw(20) << left << fileName << " " << right << flush; + std::cout << "- " << setw(20) << left << fileName << " " << right << flush; cross_validation(fileName, quiet); - cout << endl; + std::cout << std::endl; } } - string getColor(bayesnet::status_t status) + std::string getColor(bayesnet::status_t status) { switch (status) { case bayesnet::NORMAL: @@ -126,13 +126,13 @@ namespace platform { } } - void showProgress(int fold, const string& color, const string& phase) + void showProgress(int fold, const std::string& color, const std::string& phase) { - string prefix = phase == "a" ? "" : "\b\b\b\b"; - cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush; + std::string prefix = phase == "a" ? "" : "\b\b\b\b"; + std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush; } - void Experiment::cross_validation(const string& fileName, bool quiet) + void Experiment::cross_validation(const std::string& fileName, bool quiet) { auto datasets = platform::Datasets(discretized, Paths::datasets()); // Get dataset @@ -142,14 +142,14 @@ namespace platform { auto samples = datasets.getNSamples(fileName); auto className = datasets.getClassName(fileName); if (!quiet) { - cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush; + std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush; } // Prepare Result auto result = Result(); auto [values, counts] = at::_unique(y); result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0)); result.setHyperparameters(hyperparameters); - // Initialize results vectors + // Initialize results std::vectors int nResults = nfolds * static_cast(randomSeeds.size()); auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64); auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64); @@ -162,7 +162,7 @@ namespace platform { int item = 0; for (auto seed : randomSeeds) { if (!quiet) - cout << "(" << seed << ") doing Fold: " << flush; + std::cout << "(" << seed << ") doing Fold: " << flush; Fold* fold; if (stratified) fold = new StratifiedKFold(nfolds, y, seed); @@ -204,8 +204,8 @@ namespace platform { accuracy_train[item] = accuracy_train_value; accuracy_test[item] = accuracy_test_value; if (!quiet) - cout << "\b\b\b, " << flush; - // Store results and times in vector + std::cout << "\b\b\b, " << flush; + // Store results and times in std::vector result.addScoreTrain(accuracy_train_value); result.addScoreTest(accuracy_test_value); result.addTimeTrain(train_time[item].item()); @@ -214,7 +214,7 @@ namespace platform { clf.reset(); } if (!quiet) - cout << "end. " << flush; + std::cout << "end. " << flush; delete fold; } result.setScoreTest(torch::mean(accuracy_test).item()).setScoreTrain(torch::mean(accuracy_train).item()); diff --git a/src/Platform/Experiment.h b/src/Platform/Experiment.h index 1af372e..00438b6 100644 --- a/src/Platform/Experiment.h +++ b/src/Platform/Experiment.h @@ -10,34 +10,33 @@ #include "KDB.h" #include "AODE.h" -using namespace std; namespace platform { using json = nlohmann::json; class Timer { private: - chrono::high_resolution_clock::time_point begin; + std::chrono::high_resolution_clock::time_point begin; public: Timer() = default; ~Timer() = default; - void start() { begin = chrono::high_resolution_clock::now(); } + void start() { begin = std::chrono::high_resolution_clock::now(); } double getDuration() { - chrono::high_resolution_clock::time_point end = chrono::high_resolution_clock::now(); - chrono::duration time_span = chrono::duration_cast>(end - begin); + std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); + std::chrono::duration time_span = std::chrono::duration_cast> (end - begin); return time_span.count(); } }; class Result { private: - string dataset, model_version; + std::string dataset, model_version; json hyperparameters; int samples{ 0 }, features{ 0 }, classes{ 0 }; double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 }; float nodes{ 0 }, leaves{ 0 }, depth{ 0 }; - vector scores_train, scores_test, times_train, times_test; + std::vector scores_train, scores_test, times_train, times_test; public: Result() = default; - Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; } + Result& setDataset(const std::string& dataset) { this->dataset = dataset; return *this; } Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; } Result& setSamples(int samples) { this->samples = samples; return *this; } Result& setFeatures(int features) { this->features = features; return *this; } @@ -59,7 +58,7 @@ namespace platform { Result& addTimeTest(double time) { times_test.push_back(time); return *this; } const float get_score_train() const { return score_train; } float get_score_test() { return score_test; } - const string& getDataset() const { return dataset; } + const std::string& getDataset() const { return dataset; } const json& getHyperparameters() const { return hyperparameters; } const int getSamples() const { return samples; } const int getFeatures() const { return features; } @@ -75,30 +74,30 @@ namespace platform { const float getNodes() const { return nodes; } const float getLeaves() const { return leaves; } const float getDepth() const { return depth; } - const vector& getScoresTrain() const { return scores_train; } - const vector& getScoresTest() const { return scores_test; } - const vector& getTimesTrain() const { return times_train; } - const vector& getTimesTest() const { return times_test; } + const std::vector& getScoresTrain() const { return scores_train; } + const std::vector& getScoresTest() const { return scores_test; } + const std::vector& getTimesTrain() const { return times_train; } + const std::vector& getTimesTest() const { return times_test; } }; class Experiment { private: - string title, model, platform, score_name, model_version, language_version, language; + std::string title, model, platform, score_name, model_version, language_version, language; bool discretized{ false }, stratified{ false }; - vector results; - vector randomSeeds; + std::vector results; + std::vector randomSeeds; json hyperparameters = "{}"; int nfolds{ 0 }; float duration{ 0 }; json build_json(); public: Experiment(); - Experiment& setTitle(const string& title) { this->title = title; return *this; } - Experiment& setModel(const string& model) { this->model = model; return *this; } - Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; } - Experiment& setScoreName(const string& score_name) { this->score_name = score_name; return *this; } - Experiment& setModelVersion(const string& model_version) { this->model_version = model_version; return *this; } - Experiment& setLanguage(const string& language) { this->language = language; return *this; } - Experiment& setLanguageVersion(const string& language_version) { this->language_version = language_version; return *this; } + Experiment& setTitle(const std::string& title) { this->title = title; return *this; } + Experiment& setModel(const std::string& model) { this->model = model; return *this; } + Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; } + Experiment& setScoreName(const std::string& score_name) { this->score_name = score_name; return *this; } + Experiment& setModelVersion(const std::string& model_version) { this->model_version = model_version; return *this; } + Experiment& setLanguage(const std::string& language) { this->language = language; return *this; } + Experiment& setLanguageVersion(const std::string& language_version) { this->language_version = language_version; return *this; } Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; } Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; } Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; } @@ -106,10 +105,10 @@ namespace platform { Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; } Experiment& setDuration(float duration) { this->duration = duration; return *this; } Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; } - string get_file_name(); - void save(const string& path); - void cross_validation(const string& fileName, bool quiet); - void go(vector filesToProcess, bool quiet); + std::string get_file_name(); + void save(const std::string& path); + void cross_validation(const std::string& fileName, bool quiet); + void go(std::vector filesToProcess, bool quiet); void show(); void report(); }; diff --git a/src/Platform/Folding.cc b/src/Platform/Folding.cc index d55d311..05f5923 100644 --- a/src/Platform/Folding.cc +++ b/src/Platform/Folding.cc @@ -4,23 +4,23 @@ namespace platform { Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed) { - random_device rd; - random_seed = default_random_engine(seed == -1 ? rd() : seed); - srand(seed == -1 ? time(0) : seed); + std::random_device rd; + random_seed = std::default_random_engine(seed == -1 ? rd() : seed); + std::srand(seed == -1 ? time(0) : seed); } - KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector(n)) + KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(std::vector(n)) { - iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1 + std::iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1 shuffle(indices.begin(), indices.end(), random_seed); } - pair, vector> KFold::getFold(int nFold) + std::pair, std::vector> KFold::getFold(int nFold) { if (nFold >= k || nFold < 0) { - throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")"); + throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")"); } int nTest = n / k; - auto train = vector(); - auto test = vector(); + auto train = std::vector(); + auto test = std::vector(); for (int i = 0; i < n; i++) { if (i >= nTest * nFold && i < nTest * (nFold + 1)) { test.push_back(indices[i]); @@ -33,10 +33,10 @@ namespace platform { StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed) { n = y.numel(); - this->y = vector(y.data_ptr(), y.data_ptr() + n); + this->y = std::vector(y.data_ptr(), y.data_ptr() + n); build(); } - StratifiedKFold::StratifiedKFold(int k, const vector& y, int seed) + StratifiedKFold::StratifiedKFold(int k, const std::vector& y, int seed) : Fold(k, y.size(), seed) { this->y = y; @@ -45,12 +45,12 @@ namespace platform { } void StratifiedKFold::build() { - stratified_indices = vector>(k); + stratified_indices = std::vector>(k); int fold_size = n / k; // Compute class counts and indices - auto class_indices = map>(); - vector class_counts(*max_element(y.begin(), y.end()) + 1, 0); + auto class_indices = std::map>(); + std::vector class_counts(*max_element(y.begin(), y.end()) + 1, 0); for (auto i = 0; i < n; ++i) { class_counts[y[i]]++; class_indices[y[i]].push_back(i); @@ -63,8 +63,8 @@ namespace platform { for (auto label = 0; label < class_counts.size(); ++label) { auto num_samples_to_take = class_counts.at(label) / k; if (num_samples_to_take == 0) { - cerr << "Warning! The number of samples in class " << label << " (" << class_counts.at(label) - << ") is less than the number of folds (" << k << ")." << endl; + std::cerr << "Warning! The number of samples in class " << label << " (" << class_counts.at(label) + << ") is less than the number of folds (" << k << ")." << std::endl; faulty = true; continue; } @@ -74,7 +74,7 @@ namespace platform { move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ## class_indices[label].erase(class_indices[label].begin(), it); } - auto chosen = vector(k, false); + auto chosen = std::vector(k, false); while (remainder_samples_to_take > 0) { int fold = (rand() % static_cast(k)); if (chosen.at(fold)) { @@ -88,13 +88,13 @@ namespace platform { } } } - pair, vector> StratifiedKFold::getFold(int nFold) + std::pair, std::vector> StratifiedKFold::getFold(int nFold) { if (nFold >= k || nFold < 0) { - throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")"); + throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")"); } - vector test_indices = stratified_indices[nFold]; - vector train_indices; + std::vector test_indices = stratified_indices[nFold]; + std::vector train_indices; for (int i = 0; i < k; ++i) { if (i == nFold) continue; train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end()); diff --git a/src/Platform/Folding.h b/src/Platform/Folding.h index 6f0c909..de3bd3a 100644 --- a/src/Platform/Folding.h +++ b/src/Platform/Folding.h @@ -3,37 +3,36 @@ #include #include #include -using namespace std; namespace platform { class Fold { protected: int k; int n; int seed; - default_random_engine random_seed; + std::default_random_engine random_seed; public: Fold(int k, int n, int seed = -1); - virtual pair, vector> getFold(int nFold) = 0; + virtual std::pair, std::vector> getFold(int nFold) = 0; virtual ~Fold() = default; int getNumberOfFolds() { return k; } }; class KFold : public Fold { private: - vector indices; + std::vector indices; public: KFold(int k, int n, int seed = -1); - pair, vector> getFold(int nFold) override; + std::pair, std::vector> getFold(int nFold) override; }; class StratifiedKFold : public Fold { private: - vector y; - vector> stratified_indices; + std::vector y; + std::vector> stratified_indices; void build(); bool faulty = false; // Only true if the number of samples of any class is less than the number of folds. public: - StratifiedKFold(int k, const vector& y, int seed = -1); + StratifiedKFold(int k, const std::vector& y, int seed = -1); StratifiedKFold(int k, torch::Tensor& y, int seed = -1); - pair, vector> getFold(int nFold) override; + std::pair, std::vector> getFold(int nFold) override; bool isFaulty() { return faulty; } }; } diff --git a/src/Platform/ManageResults.cc b/src/Platform/ManageResults.cc index 37f262b..dc03979 100644 --- a/src/Platform/ManageResults.cc +++ b/src/Platform/ManageResults.cc @@ -10,7 +10,7 @@ namespace platform { - ManageResults::ManageResults(int numFiles, const string& model, const string& score, bool complete, bool partial, bool compare) : + ManageResults::ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare) : numFiles{ numFiles }, complete{ complete }, partial{ partial }, compare{ compare }, results(Results(Paths::results(), model, score, complete, partial)) { indexList = true; @@ -23,7 +23,7 @@ namespace platform { void ManageResults::doMenu() { if (results.empty()) { - cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << endl; + std::cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl; return; } results.sortDate(); @@ -32,68 +32,68 @@ namespace platform { if (openExcel) { workbook_close(workbook); } - cout << Colors::RESET() << "Done!" << endl; + std::cout << Colors::RESET() << "Done!" << std::endl; } void ManageResults::list() { auto temp = ConfigLocale(); - string suffix = numFiles != results.size() ? " of " + to_string(results.size()) : ""; - stringstream oss; + std::string suffix = numFiles != results.size() ? " of " + std::to_string(results.size()) : ""; + std::stringstream oss; oss << "Results on screen: " << numFiles << suffix; - cout << Colors::GREEN() << oss.str() << endl; - cout << string(oss.str().size(), '-') << endl; + std::cout << Colors::GREEN() << oss.str() << std::endl; + std::cout << std::string(oss.str().size(), '-') << std::endl; if (complete) { - cout << Colors::MAGENTA() << "Only listing complete results" << endl; + std::cout << Colors::MAGENTA() << "Only listing complete results" << std::endl; } if (partial) { - cout << Colors::MAGENTA() << "Only listing partial results" << endl; + std::cout << Colors::MAGENTA() << "Only listing partial results" << std::endl; } auto i = 0; int maxModel = results.maxModelSize(); - cout << Colors::GREEN() << " # Date " << setw(maxModel) << left << "Model" << " Score Name Score C/P Duration Title" << endl; - cout << "=== ========== " << string(maxModel, '=') << " =========== =========== === ========= =============================================================" << endl; + std::cout << Colors::GREEN() << " # Date " << std::setw(maxModel) << std::left << "Model" << " Score Name Score C/P Duration Title" << std::endl; + std::cout << "=== ========== " << std::string(maxModel, '=') << " =========== =========== === ========= =============================================================" << std::endl; bool odd = true; for (auto& result : results) { auto color = odd ? Colors::BLUE() : Colors::CYAN(); - cout << color << setw(3) << fixed << right << i++ << " "; - cout << result.to_string(maxModel) << endl; + std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " "; + std::cout << result.to_string(maxModel) << std::endl; if (i == numFiles) { break; } odd = !odd; } } - bool ManageResults::confirmAction(const string& intent, const string& fileName) const + bool ManageResults::confirmAction(const std::string& intent, const std::string& fileName) const { - string color; + std::string color; if (intent == "delete") { color = Colors::RED(); } else { color = Colors::YELLOW(); } - string line; + std::string line; bool finished = false; while (!finished) { - cout << color << "Really want to " << intent << " " << fileName << "? (y/n): "; - getline(cin, line); + std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): "; + getline(std::cin, line); finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n')); } if (tolower(line[0]) == 'y') { return true; } - cout << "Not done!" << endl; + std::cout << "Not done!" << std::endl; return false; } void ManageResults::report(const int index, const bool excelReport) { - cout << Colors::YELLOW() << "Reporting " << results.at(index).getFilename() << endl; + std::cout << Colors::YELLOW() << "Reporting " << results.at(index).getFilename() << std::endl; auto data = results.at(index).load(); if (excelReport) { ReportExcel reporter(data, compare, workbook); reporter.show(); openExcel = true; workbook = reporter.getWorkbook(); - cout << "Adding sheet to " << Paths::excel() + Paths::excelResults() << endl; + std::cout << "Adding sheet to " << Paths::excel() + Paths::excelResults() << std::endl; } else { ReportConsole reporter(data, compare); reporter.show(); @@ -103,20 +103,20 @@ namespace platform { { // Show a dataset result inside a report auto data = results.at(index).load(); - cout << Colors::YELLOW() << "Showing " << results.at(index).getFilename() << endl; + std::cout << Colors::YELLOW() << "Showing " << results.at(index).getFilename() << std::endl; ReportConsole reporter(data, compare, idx); reporter.show(); } void ManageResults::sortList() { - cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): "; - string line; + std::cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): "; + std::string line; char option; - getline(cin, line); + getline(std::cin, line); if (line.size() == 0) return; if (line.size() > 1) { - cout << "Invalid option" << endl; + std::cout << "Invalid option" << std::endl; return; } option = line[0]; @@ -134,7 +134,7 @@ namespace platform { results.sortModel(); break; default: - cout << "Invalid option" << endl; + std::cout << "Invalid option" << std::endl; } } void ManageResults::menu() @@ -142,9 +142,9 @@ namespace platform { char option; int index, subIndex; bool finished = false; - string filename; + std::string filename; // tuple - vector> mainOptions = { + std::vector> mainOptions = { {"quit", 'q', false}, {"list", 'l', false}, {"delete", 'd', true}, @@ -153,7 +153,7 @@ namespace platform { {"report", 'r', true}, {"excel", 'e', true} }; - vector> listOptions = { + std::vector> listOptions = { {"report", 'r', true}, {"list", 'l', false}, {"quit", 'q', false} @@ -161,9 +161,9 @@ namespace platform { auto parser = CommandParser(); while (!finished) { if (indexList) { - tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1); + std::tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1); } else { - tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).load()["results"].size() - 1); + std::tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).load()["results"].size() - 1); } switch (option) { case 'q': @@ -177,9 +177,9 @@ namespace platform { filename = results.at(index).getFilename(); if (!confirmAction("delete", filename)) break; - cout << "Deleting " << filename << endl; + std::cout << "Deleting " << filename << std::endl; results.deleteResult(index); - cout << "File: " + filename + " deleted!" << endl; + std::cout << "File: " + filename + " deleted!" << std::endl; list(); break; case 'h': @@ -187,9 +187,9 @@ namespace platform { if (!confirmAction("hide", filename)) break; filename = results.at(index).getFilename(); - cout << "Hiding " << filename << endl; + std::cout << "Hiding " << filename << std::endl; results.hideResult(index, Paths::hiddenResults()); - cout << "File: " + filename + " hidden! (moved to " << Paths::hiddenResults() << ")" << endl; + std::cout << "File: " + filename + " hidden! (moved to " << Paths::hiddenResults() << ")" << std::endl; list(); break; case 's': diff --git a/src/Platform/ManageResults.h b/src/Platform/ManageResults.h index 3766970..6bd3704 100644 --- a/src/Platform/ManageResults.h +++ b/src/Platform/ManageResults.h @@ -6,12 +6,12 @@ namespace platform { class ManageResults { public: - ManageResults(int numFiles, const string& model, const string& score, bool complete, bool partial, bool compare); + ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare); ~ManageResults() = default; void doMenu(); private: void list(); - bool confirmAction(const string& intent, const string& fileName) const; + bool confirmAction(const std::string& intent, const std::string& fileName) const; void report(const int index, const bool excelReport); void showIndex(const int index, const int idx); void sortList(); diff --git a/src/Platform/Models.cc b/src/Platform/Models.cc index 08b4a45..2791f1a 100644 --- a/src/Platform/Models.cc +++ b/src/Platform/Models.cc @@ -1,6 +1,5 @@ #include "Models.h" namespace platform { - using namespace std; // Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory Models* Models::factory = nullptr;; Models* Models::instance() @@ -10,13 +9,13 @@ namespace platform { factory = new Models(); return factory; } - void Models::registerFactoryFunction(const string& name, + void Models::registerFactoryFunction(const std::string& name, function classFactoryFunction) { // register the class factory function functionRegistry[name] = classFactoryFunction; } - shared_ptr Models::create(const string& name) + shared_ptr Models::create(const std::string& name) { bayesnet::BaseClassifier* instance = nullptr; @@ -30,23 +29,22 @@ namespace platform { else return nullptr; } - vector Models::getNames() + std::vector Models::getNames() { - vector names; + std::vector names; transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names), - [](const pair>& pair) { return pair.first; }); + [](const pair>& pair) { return pair.first; }); return names; } - string Models::toString() + std::string Models::tostring() { - string result = ""; + std::string result = ""; for (const auto& pair : functionRegistry) { result += pair.first + ", "; } return "{" + result.substr(0, result.size() - 2) + "}"; } - - Registrar::Registrar(const string& name, function classFactoryFunction) + Registrar::Registrar(const std::string& name, function classFactoryFunction) { // register the class factory function Models::instance()->registerFactoryFunction(name, classFactoryFunction); diff --git a/src/Platform/Models.h b/src/Platform/Models.h index 6c5d437..bcf6792 100644 --- a/src/Platform/Models.h +++ b/src/Platform/Models.h @@ -14,7 +14,7 @@ namespace platform { class Models { private: - map> functionRegistry; + map> functionRegistry; static Models* factory; //singleton Models() {}; public: @@ -22,16 +22,16 @@ namespace platform { void operator=(const Models&) = delete; // Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory static Models* instance(); - shared_ptr create(const string& name); - void registerFactoryFunction(const string& name, + shared_ptr create(const std::string& name); + void registerFactoryFunction(const std::string& name, function classFactoryFunction); - vector getNames(); - string toString(); + std::vector getNames(); + std::string tostring(); }; class Registrar { public: - Registrar(const string& className, function classFactoryFunction); + Registrar(const std::string& className, function classFactoryFunction); }; } #endif \ No newline at end of file diff --git a/src/Platform/ReportBase.cc b/src/Platform/ReportBase.cc index 2be08a5..49e6617 100644 --- a/src/Platform/ReportBase.cc +++ b/src/Platform/ReportBase.cc @@ -7,8 +7,8 @@ namespace platform { ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1) { - stringstream oss; - oss << "Better than ZeroR + " << setprecision(1) << fixed << margin * 100 << "%"; + std::stringstream oss; + oss << "Better than ZeroR + " << std::setprecision(1) << fixed << margin * 100 << "%"; meaning = { {Symbols::equal_best, "Equal to best"}, {Symbols::better_best, "Better than best"}, @@ -16,10 +16,10 @@ namespace platform { {Symbols::upward_arrow, oss.str()} }; } - string ReportBase::fromVector(const string& key) + std::string ReportBase::fromVector(const std::string& key) { - stringstream oss; - string sep = ""; + std::stringstream oss; + std::string sep = ""; oss << "["; for (auto& item : data[key]) { oss << sep << item.get(); @@ -28,13 +28,13 @@ namespace platform { oss << "]"; return oss.str(); } - string ReportBase::fVector(const string& title, const json& data, const int width, const int precision) + std::string ReportBase::fVector(const std::string& title, const json& data, const int width, const int precision) { - stringstream oss; - string sep = ""; + std::stringstream oss; + std::string sep = ""; oss << title << "["; for (const auto& item : data) { - oss << sep << fixed << setw(width) << setprecision(precision) << item.get(); + oss << sep << fixed << setw(width) << std::setprecision(precision) << item.get(); sep = ", "; } oss << "]"; @@ -45,25 +45,25 @@ namespace platform { header(); body(); } - string ReportBase::compareResult(const string& dataset, double result) + std::string ReportBase::compareResult(const std::string& dataset, double result) { - string status = " "; + std::string status = " "; if (compare) { - double best = bestResult(dataset, data["model"].get()); + double best = bestResult(dataset, data["model"].get()); if (result == best) { status = Symbols::equal_best; } else if (result > best) { status = Symbols::better_best; } } else { - if (data["score_name"].get() == "accuracy") { + if (data["score_name"].get() == "accuracy") { auto dt = Datasets(false, Paths::datasets()); dt.loadDataset(dataset); auto numClasses = dt.getNClasses(dataset); if (numClasses == 2) { - vector distribution = dt.getClassesCounts(dataset); + std::vector distribution = dt.getClassesCounts(dataset); double nSamples = dt.getNSamples(dataset); - vector::iterator maxValue = max_element(distribution.begin(), distribution.end()); + std::vector::iterator maxValue = max_element(distribution.begin(), distribution.end()); double mark = *maxValue / nSamples * (1 + margin); if (mark > 1) { mark = 0.9995; @@ -82,14 +82,14 @@ namespace platform { } return status; } - double ReportBase::bestResult(const string& dataset, const string& model) + double ReportBase::bestResult(const std::string& dataset, const std::string& model) { double value = 0.0; if (bestResults.size() == 0) { // try to load the best results - string score = data["score_name"]; + std::string score = data["score_name"]; replace(score.begin(), score.end(), '_', '-'); - string fileName = "best_results_" + score + "_" + model + ".json"; + std::string fileName = "best_results_" + score + "_" + model + ".json"; ifstream resultData(Paths::results() + "/" + fileName); if (resultData.is_open()) { bestResults = json::parse(resultData); diff --git a/src/Platform/ReportBase.h b/src/Platform/ReportBase.h index 5797b1b..35cde6b 100644 --- a/src/Platform/ReportBase.h +++ b/src/Platform/ReportBase.h @@ -8,7 +8,6 @@ using json = nlohmann::json; namespace platform { - using namespace std; class ReportBase { public: @@ -17,19 +16,19 @@ namespace platform { void show(); protected: json data; - string fromVector(const string& key); - string fVector(const string& title, const json& data, const int width, const int precision); + std::string fromVector(const std::string& key); + std::string fVector(const std::string& title, const json& data, const int width, const int precision); bool getExistBestFile(); virtual void header() = 0; virtual void body() = 0; virtual void showSummary() = 0; - string compareResult(const string& dataset, double result); - map summary; + std::string compareResult(const std::string& dataset, double result); + std::map summary; double margin; - map meaning; + std::map meaning; bool compare; private: - double bestResult(const string& dataset, const string& model); + double bestResult(const std::string& dataset, const std::string& model); json bestResults; bool existBestFile = true; }; diff --git a/src/Platform/ReportConsole.cc b/src/Platform/ReportConsole.cc index 2479d06..9a1ce0d 100644 --- a/src/Platform/ReportConsole.cc +++ b/src/Platform/ReportConsole.cc @@ -6,25 +6,30 @@ #include "CLocale.h" namespace platform { - string ReportConsole::headerLine(const string& text, int utf = 0) + std::string ReportConsole::headerLine(const std::string& text, int utf = 0) { int n = MAXL - text.length() - 3; n = n < 0 ? 0 : n; - return "* " + text + string(n + utf, ' ') + "*\n"; + return "* " + text + std::string(n + utf, ' ') + "*\n"; } void ReportConsole::header() { - stringstream oss; - cout << Colors::MAGENTA() << string(MAXL, '*') << endl; - cout << headerLine("Report " + data["model"].get() + " ver. " + data["version"].get() + " with " + to_string(data["folds"].get()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get() + " " + data["time"].get()); - cout << headerLine(data["title"].get()); - cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get() ? "True" : "False")); - oss << "Execution took " << setprecision(2) << fixed << data["duration"].get() << " seconds, " << data["duration"].get() / 3600 << " hours, on " << data["platform"].get(); - cout << headerLine(oss.str()); - cout << headerLine("Score is " + data["score_name"].get()); - cout << string(MAXL, '*') << endl; - cout << endl; + std::stringstream oss; + std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl; + std::cout << headerLine( + "Report " + data["model"].get() + " ver. " + data["version"].get() + + " with " + std::to_string(data["folds"].get()) + " Folds cross validation and " + std::to_string(data["seeds"].size()) + + " random seeds. " + data["date"].get() + " " + data["time"].get() + ); + std::cout << headerLine(data["title"].get()); + std::cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get() ? "True" : "False")); + oss << "Execution took " << std::setprecision(2) << std::fixed << data["duration"].get() + << " seconds, " << data["duration"].get() / 3600 << " hours, on " << data["platform"].get(); + std::cout << headerLine(oss.str()); + std::cout << headerLine("Score is " + data["score_name"].get()); + std::cout << std::string(MAXL, '*') << std::endl; + std::cout << std::endl; } void ReportConsole::body() { @@ -32,12 +37,12 @@ namespace platform { int maxHyper = 15; int maxDataset = 7; for (const auto& r : data["results"]) { - maxHyper = max(maxHyper, (int)r["hyperparameters"].dump().size()); - maxDataset = max(maxDataset, (int)r["dataset"].get().size()); + maxHyper = std::max(maxHyper, (int)r["hyperparameters"].dump().size()); + maxDataset = std::max(maxDataset, (int)r["dataset"].get().size()); } - cout << Colors::GREEN() << " # " << setw(maxDataset) << left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl; - cout << "=== " << string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << string(maxHyper, '=') << endl; + std::cout << Colors::GREEN() << " # " << std::setw(maxDataset) << std::left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << std::endl; + std::cout << "=== " << std::string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << std::string(maxHyper, '=') << std::endl; json lastResult; double totalScore = 0.0; bool odd = true; @@ -48,33 +53,33 @@ namespace platform { continue; } auto color = odd ? Colors::CYAN() : Colors::BLUE(); - cout << color; - cout << setw(3) << right << index++ << " "; - cout << setw(maxDataset) << left << r["dataset"].get() << " "; - cout << setw(6) << right << r["samples"].get() << " "; - cout << setw(5) << right << r["features"].get() << " "; - cout << setw(3) << right << r["classes"].get() << " "; - cout << setw(9) << setprecision(2) << fixed << r["nodes"].get() << " "; - cout << setw(9) << setprecision(2) << fixed << r["leaves"].get() << " "; - cout << setw(9) << setprecision(2) << fixed << r["depth"].get() << " "; - cout << setw(8) << right << setprecision(6) << fixed << r["score"].get() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get(); - const string status = compareResult(r["dataset"].get(), r["score"].get()); - cout << status; - cout << setw(12) << right << setprecision(6) << fixed << r["time"].get() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get() << " "; - cout << r["hyperparameters"].dump(); - cout << endl; - cout << flush; + std::cout << color; + std::cout << std::setw(3) << std::right << index++ << " "; + std::cout << std::setw(maxDataset) << std::left << r["dataset"].get() << " "; + std::cout << std::setw(6) << std::right << r["samples"].get() << " "; + std::cout << std::setw(5) << std::right << r["features"].get() << " "; + std::cout << std::setw(3) << std::right << r["classes"].get() << " "; + std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["nodes"].get() << " "; + std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["leaves"].get() << " "; + std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["depth"].get() << " "; + std::cout << std::setw(8) << std::right << std::setprecision(6) << std::fixed << r["score"].get() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["score_std"].get(); + const std::string status = compareResult(r["dataset"].get(), r["score"].get()); + std::cout << status; + std::cout << std::setw(12) << std::right << std::setprecision(6) << std::fixed << r["time"].get() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["time_std"].get() << " "; + std::cout << r["hyperparameters"].dump(); + std::cout << std::endl; + std::cout << std::flush; lastResult = r; totalScore += r["score"].get(); odd = !odd; } if (data["results"].size() == 1 || selectedIndex != -1) { - cout << string(MAXL, '*') << endl; - cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12)); - cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12)); - cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3)); - cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3)); - cout << string(MAXL, '*') << endl; + std::cout << std::string(MAXL, '*') << std::endl; + std::cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12)); + std::cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12)); + std::cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3)); + std::cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3)); + std::cout << std::string(MAXL, '*') << std::endl; } else { footer(totalScore); } @@ -82,28 +87,28 @@ namespace platform { void ReportConsole::showSummary() { for (const auto& item : summary) { - stringstream oss; - oss << setw(3) << left << item.first; - oss << setw(3) << right << item.second << " "; - oss << left << meaning.at(item.first); - cout << headerLine(oss.str(), 2); + std::stringstream oss; + oss << std::setw(3) << std::left << item.first; + oss << std::setw(3) << std::right << item.second << " "; + oss << std::left << meaning.at(item.first); + std::cout << headerLine(oss.str(), 2); } } void ReportConsole::footer(double totalScore) { - cout << Colors::MAGENTA() << string(MAXL, '*') << endl; + std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl; showSummary(); - auto score = data["score_name"].get(); + auto score = data["score_name"].get(); auto best = BestScore::getScore(score); if (best.first != "") { - stringstream oss; + std::stringstream oss; oss << score << " compared to " << best.first << " .: " << totalScore / best.second; - cout << headerLine(oss.str()); + std::cout << headerLine(oss.str()); } if (!getExistBestFile() && compare) { - cout << headerLine("*** Best Results File not found. Couldn't compare any result!"); + std::cout << headerLine("*** Best Results File not found. Couldn't compare any result!"); } - cout << string(MAXL, '*') << endl << Colors::RESET(); + std::cout << std::string(MAXL, '*') << std::endl << Colors::RESET(); } } \ No newline at end of file diff --git a/src/Platform/ReportConsole.h b/src/Platform/ReportConsole.h index a36dd03..8ba3ffe 100644 --- a/src/Platform/ReportConsole.h +++ b/src/Platform/ReportConsole.h @@ -5,7 +5,6 @@ #include "Colors.h" namespace platform { - using namespace std; const int MAXL = 133; class ReportConsole : public ReportBase { public: @@ -13,7 +12,7 @@ namespace platform { virtual ~ReportConsole() = default; private: int selectedIndex; - string headerLine(const string& text, int utf); + std::string headerLine(const std::string& text, int utf); void header() override; void body() override; void footer(double totalScore); diff --git a/src/Platform/ReportExcel.cc b/src/Platform/ReportExcel.cc index 2411358..addbf4c 100644 --- a/src/Platform/ReportExcel.cc +++ b/src/Platform/ReportExcel.cc @@ -14,28 +14,28 @@ namespace platform { void ReportExcel::formatColumns() { worksheet_freeze_panes(worksheet, 6, 1); - vector columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 }; + std::vector columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 }; for (int i = 0; i < columns_sizes.size(); ++i) { worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); } } void ReportExcel::createWorksheet() { - const string name = data["model"].get(); - string suffix = ""; - string efectiveName; + const std::string name = data["model"].get(); + std::string suffix = ""; + std::string efectiveName; int num = 1; // Create a sheet with the name of the model while (true) { efectiveName = name + suffix; if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) { - suffix = to_string(++num); + suffix = std::to_string(++num); } else { worksheet = workbook_add_worksheet(workbook, efectiveName.c_str()); break; } if (num > 100) { - throw invalid_argument("Couldn't create sheet " + efectiveName); + throw std::invalid_argument("Couldn't create sheet " + efectiveName); } } } @@ -48,7 +48,7 @@ namespace platform { if (worksheet == NULL) { createWorksheet(); } - setProperties(data["title"].get()); + setProperties(data["title"].get()); createFormats(); formatColumns(); } @@ -60,26 +60,26 @@ namespace platform { void ReportExcel::header() { - locale mylocale(cout.getloc(), new separated); - locale::global(mylocale); - cout.imbue(mylocale); - stringstream oss; - string message = data["model"].get() + " ver. " + data["version"].get() + " " + - data["language"].get() + " ver. " + data["language_version"].get() + - " with " + to_string(data["folds"].get()) + " Folds cross validation and " + to_string(data["seeds"].size()) + - " random seeds. " + data["date"].get() + " " + data["time"].get(); + std::locale mylocale(std::cout.getloc(), new separated); + std::locale::global(mylocale); + std::cout.imbue(mylocale); + std::stringstream oss; + std::string message = data["model"].get() + " ver. " + data["version"].get() + " " + + data["language"].get() + " ver. " + data["language_version"].get() + + " with " + std::to_string(data["folds"].get()) + " Folds cross validation and " + std::to_string(data["seeds"].size()) + + " random seeds. " + data["date"].get() + " " + data["time"].get(); worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]); - worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get().c_str(), styles["headerRest"]); - worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get()).c_str(), styles["headerRest"]); + worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get().c_str(), styles["headerRest"]); + worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get()).c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]); - oss << setprecision(2) << fixed << data["duration"].get() << " s"; + oss << std::setprecision(2) << std::fixed << data["duration"].get() << " s"; worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]); oss.str(""); oss.clear(); - oss << setprecision(2) << fixed << data["duration"].get() / 3600 << " h"; + oss << std::setprecision(2) << std::fixed << data["duration"].get() / 3600 << " h"; worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]); - worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get().c_str(), styles["headerRest"]); + worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get().c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]); oss.str(""); oss.clear(); @@ -93,7 +93,7 @@ namespace platform { void ReportExcel::body() { - auto head = vector( + auto head = std::vector( { "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time", "Time Std.", "Hyperparameters" }); int col = 0; @@ -105,9 +105,9 @@ namespace platform { int hypSize = 22; json lastResult; double totalScore = 0.0; - string hyperparameters; + std::string hyperparameters; for (const auto& r : data["results"]) { - writeString(row, col, r["dataset"].get(), "text"); + writeString(row, col, r["dataset"].get(), "text"); writeInt(row, col + 1, r["samples"].get(), "ints"); writeInt(row, col + 2, r["features"].get(), "ints"); writeInt(row, col + 3, r["classes"].get(), "ints"); @@ -116,7 +116,7 @@ namespace platform { writeDouble(row, col + 6, r["depth"].get(), "floats"); writeDouble(row, col + 7, r["score"].get(), "result"); writeDouble(row, col + 8, r["score_std"].get(), "result"); - const string status = compareResult(r["dataset"].get(), r["score"].get()); + const std::string status = compareResult(r["dataset"].get(), r["score"].get()); writeString(row, col + 9, status, "textCentered"); writeDouble(row, col + 10, r["time"].get(), "time"); writeDouble(row, col + 11, r["time_std"].get(), "time"); @@ -133,12 +133,12 @@ namespace platform { worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL); // Show totals if only one dataset is present in the result if (data["results"].size() == 1) { - for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) { + for (const std::string& group : { "scores_train", "scores_test", "times_train", "times_test" }) { row++; col = 1; writeString(row, col, group, "text"); for (double item : lastResult[group]) { - string style = group.find("scores") != string::npos ? "result" : "time"; + std::string style = group.find("scores") != std::string::npos ? "result" : "time"; writeDouble(row, ++col, item, style); } } @@ -167,7 +167,7 @@ namespace platform { { showSummary(); row += 4 + summary.size(); - auto score = data["score_name"].get(); + auto score = data["score_name"].get(); auto best = BestScore::getScore(score); if (best.first != "") { worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + best.first + " .:").c_str(), efectiveStyle("text")); diff --git a/src/Platform/ReportExcel.h b/src/Platform/ReportExcel.h index 3d614b0..b7fda10 100644 --- a/src/Platform/ReportExcel.h +++ b/src/Platform/ReportExcel.h @@ -6,7 +6,6 @@ #include "ExcelFile.h" #include "Colors.h" namespace platform { - using namespace std; class ReportExcel : public ReportBase, public ExcelFile { public: explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL); diff --git a/src/Platform/Result.cc b/src/Platform/Result.cc index a444877..43c33d1 100644 --- a/src/Platform/Result.cc +++ b/src/Platform/Result.cc @@ -8,7 +8,7 @@ #include "CLocale.h" namespace platform { - Result::Result(const string& path, const string& filename) + Result::Result(const std::string& path, const std::string& filename) : path(path) , filename(filename) { @@ -31,28 +31,28 @@ namespace platform { json Result::load() const { - ifstream resultData(path + "/" + filename); + std::ifstream resultData(path + "/" + filename); if (resultData.is_open()) { json data = json::parse(resultData); return data; } - throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]"); + throw std::invalid_argument("Unable to open result file. [" + path + "/" + filename + "]"); } - string Result::to_string(int maxModel) const + std::string Result::to_string(int maxModel) const { auto tmp = ConfigLocale(); - stringstream oss; + std::stringstream oss; double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration; - string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s"; + std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s"; oss << date << " "; - oss << setw(maxModel) << left << model << " "; - oss << setw(11) << left << scoreName << " "; - oss << right << setw(11) << setprecision(7) << fixed << score << " "; + oss << std::setw(maxModel) << std::left << model << " "; + oss << std::setw(11) << std::left << scoreName << " "; + oss << std::right << std::setw(11) << std::setprecision(7) << std::fixed << score << " "; auto completeString = isComplete() ? "C" : "P"; - oss << setw(1) << " " << completeString << " "; - oss << setw(7) << setprecision(2) << fixed << durationShow << " " << durationUnit << " "; - oss << setw(50) << left << title << " "; + oss << std::setw(1) << " " << completeString << " "; + oss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " "; + oss << std::setw(50) << std::left << title << " "; return oss.str(); } } \ No newline at end of file diff --git a/src/Platform/Result.h b/src/Platform/Result.h index 4d35ae6..85ec832 100644 --- a/src/Platform/Result.h +++ b/src/Platform/Result.h @@ -5,31 +5,30 @@ #include #include namespace platform { - using namespace std; using json = nlohmann::json; class Result { public: - Result(const string& path, const string& filename); + Result(const std::string& path, const std::string& filename); json load() const; - string to_string(int maxModel) const; - string getFilename() const { return filename; }; - string getDate() const { return date; }; + std::string to_string(int maxModel) const; + std::string getFilename() const { return filename; }; + std::string getDate() const { return date; }; double getScore() const { return score; }; - string getTitle() const { return title; }; + std::string getTitle() const { return title; }; double getDuration() const { return duration; }; - string getModel() const { return model; }; - string getScoreName() const { return scoreName; }; + std::string getModel() const { return model; }; + std::string getScoreName() const { return scoreName; }; bool isComplete() const { return complete; }; private: - string path; - string filename; - string date; + std::string path; + std::string filename; + std::string date; double score; - string title; + std::string title; double duration; - string model; - string scoreName; + std::string model; + std::string scoreName; bool complete; }; }; diff --git a/src/Platform/Results.cc b/src/Platform/Results.cc index dfa3c84..4f6184f 100644 --- a/src/Platform/Results.cc +++ b/src/Platform/Results.cc @@ -2,7 +2,7 @@ #include namespace platform { - Results::Results(const string& path, const string& model, const string& score, bool complete, bool partial) : + Results::Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial) : path(path), model(model), scoreName(score), complete(complete), partial(partial) { load(); @@ -17,7 +17,7 @@ namespace platform { using std::filesystem::directory_iterator; for (const auto& file : directory_iterator(path)) { auto filename = file.path().filename().string(); - if (filename.find(".json") != string::npos && filename.find("results_") == 0) { + if (filename.find(".json") != std::string::npos && filename.find("results_") == 0) { auto result = Result(path, filename); bool addResult = true; if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete()) @@ -27,7 +27,7 @@ namespace platform { } } } - void Results::hideResult(int index, const string& pathHidden) + void Results::hideResult(int index, const std::string& pathHidden) { auto filename = files.at(index).getFilename(); rename((path + "/" + filename).c_str(), (pathHidden + "/" + filename).c_str()); diff --git a/src/Platform/Results.h b/src/Platform/Results.h index f946cc3..aa293d8 100644 --- a/src/Platform/Results.h +++ b/src/Platform/Results.h @@ -6,32 +6,31 @@ #include #include "Result.h" namespace platform { - using namespace std; using json = nlohmann::json; class Results { public: - Results(const string& path, const string& model, const string& score, bool complete, bool partial); + Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial); void sortDate(); void sortScore(); void sortModel(); void sortDuration(); int maxModelSize() const { return maxModel; }; - void hideResult(int index, const string& pathHidden); + void hideResult(int index, const std::string& pathHidden); void deleteResult(int index); int size() const; bool empty() const; - vector::iterator begin() { return files.begin(); }; - vector::iterator end() { return files.end(); }; + std::vector::iterator begin() { return files.begin(); }; + std::vector::iterator end() { return files.end(); }; Result& at(int index) { return files.at(index); }; private: - string path; - string model; - string scoreName; + std::string path; + std::string model; + std::string scoreName; bool complete; bool partial; int maxModel; - vector files; + std::vector files; void load(); // Loads the list of results }; }; diff --git a/src/Platform/Statistics.cc b/src/Platform/Statistics.cc index c64b9b0..2d58c36 100644 --- a/src/Platform/Statistics.cc +++ b/src/Platform/Statistics.cc @@ -9,7 +9,7 @@ namespace platform { - Statistics::Statistics(const vector& models, const vector& datasets, const json& data, double significance, bool output) : + Statistics::Statistics(const std::vector& models, const std::vector& datasets, const json& data, double significance, bool output) : models(models), datasets(datasets), data(data), significance(significance), output(output) { nModels = models.size(); @@ -20,27 +20,27 @@ namespace platform { void Statistics::fit() { if (nModels < 3 || nDatasets < 3) { - cerr << "nModels: " << nModels << endl; - cerr << "nDatasets: " << nDatasets << endl; - throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); + std::cerr << "nModels: " << nModels << std::endl; + std::cerr << "nDatasets: " << nDatasets << std::endl; + throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); } ranksModels.clear(); computeRanks(); // Set the control model as the one with the lowest average rank controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); computeWTL(); - maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); - maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); + maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); + maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); fitted = true; } - map assignRanks(vector>& ranksOrder) + std::map assignRanks(std::vector>& ranksOrder) { - // sort the ranksOrder vector by value - sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { + // sort the ranksOrder std::vector by value + std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair& a, const std::pair& b) { return a.second > b.second; }); //Assign ranks to values and if they are the same they share the same averaged rank - map ranks; + std::map ranks; for (int i = 0; i < ranksOrder.size(); i++) { ranks[ranksOrder[i].first] = i + 1.0; } @@ -63,9 +63,9 @@ namespace platform { } void Statistics::computeRanks() { - map ranksLine; + std::map ranksLine; for (const auto& dataset : datasets) { - vector> ranksOrder; + std::vector> ranksOrder; for (const auto& model : models) { double value = data[model].at(dataset).at(0).get(); ranksOrder.push_back({ model, value }); @@ -118,11 +118,11 @@ namespace platform { if (!fitted) { fit(); } - stringstream oss; + std::stringstream oss; // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8 // Post-hoc Holm test // Calculate the p-value for the models paired with the control model - map stats; // p-value of each model paired with the control model + std::map stats; // p-value of each model paired with the control model boost::math::normal dist(0.0, 1.0); double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets)); for (int i = 0; i < nModels; i++) { @@ -135,11 +135,11 @@ namespace platform { stats[i] = p_value; } // Sort the models by p-value - vector> statsOrder; + std::vector> statsOrder; for (const auto& stat : stats) { statsOrder.push_back({ stat.first, stat.second }); } - sort(statsOrder.begin(), statsOrder.end(), [](const pair& a, const pair& b) { + std::sort(statsOrder.begin(), statsOrder.end(), [](const std::pair& a, const std::pair& b) { return a.second < b.second; }); @@ -147,29 +147,29 @@ namespace platform { for (int i = 0; i < statsOrder.size(); ++i) { auto item = statsOrder.at(i); double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second; - double p_value = min((double)1.0, item.second * (nModels - i)); - p_value = max(before, p_value); + double p_value = std::min((double)1.0, item.second * (nModels - i)); + p_value = std::max(before, p_value); statsOrder[i] = { item.first, p_value }; } holmResult.model = models.at(controlIdx); auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW(); oss << color; - oss << " *************************************************************************************************************" << endl; - oss << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl; - oss << " Control model: " << models.at(controlIdx) << endl; - oss << " " << left << setw(maxModelName) << string("Model") << " p-value rank win tie loss Status" << endl; - oss << " " << string(maxModelName, '=') << " ============ ========= === === ==== =============" << endl; + oss << " *************************************************************************************************************" << std::endl; + oss << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl; + oss << " Control model: " << models.at(controlIdx) << std::endl; + oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl; + oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl; // sort ranks from lowest to highest - vector> ranksOrder; + std::vector> ranksOrder; for (const auto& rank : ranks) { ranksOrder.push_back({ rank.first, rank.second }); } - sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { + std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair& a, const std::pair& b) { return a.second < b.second; }); // Show the control model info. - oss << " " << Colors::BLUE() << left << setw(maxModelName) << ranksOrder.at(0).first << " "; - oss << setw(12) << " " << setprecision(7) << fixed << " " << ranksOrder.at(0).second << endl; + oss << " " << Colors::BLUE() << std::left << std::setw(maxModelName) << ranksOrder.at(0).first << " "; + oss << std::setw(12) << " " << std::setprecision(7) << std::fixed << " " << ranksOrder.at(0).second << std::endl; for (const auto& item : ranksOrder) { auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first)); double pvalue = 0.0; @@ -185,15 +185,15 @@ namespace platform { auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA(); auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross; auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0"; - oss << " " << colorStatus << left << setw(maxModelName) << item.first << " "; - oss << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second; - oss << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss; - oss << " " << status << textStatus << endl; + oss << " " << colorStatus << std::left << std::setw(maxModelName) << item.first << " "; + oss << std::setprecision(6) << std::scientific << pvalue << std::setprecision(7) << std::fixed << " " << item.second; + oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss; + oss << " " << status << textStatus << std::endl; } - oss << color << " *************************************************************************************************************" << endl; + oss << color << " *************************************************************************************************************" << std::endl; oss << Colors::RESET(); if (output) { - cout << oss.str(); + std::cout << oss.str(); } } bool Statistics::friedmanTest() @@ -201,12 +201,12 @@ namespace platform { if (!fitted) { fit(); } - stringstream oss; + std::stringstream oss; // Friedman test // Calculate the Friedman statistic - oss << Colors::BLUE() << endl; - oss << "***************************************************************************************************************" << endl; - oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl; + oss << Colors::BLUE() << std::endl; + oss << "***************************************************************************************************************" << std::endl; + oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl; double degreesOfFreedom = nModels - 1.0; double sumSquared = 0; for (const auto& rank : ranks) { @@ -218,21 +218,21 @@ namespace platform { boost::math::chi_squared chiSquared(degreesOfFreedom); long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ); double criticalValue = quantile(chiSquared, 1 - significance); - oss << "Friedman statistic: " << friedmanQ << endl; - oss << "Critical χ2 Value for df=" << fixed << (int)degreesOfFreedom - << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl; - oss << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl; + oss << "Friedman statistic: " << friedmanQ << std::endl; + oss << "Critical χ2 Value for df=" << std::fixed << (int)degreesOfFreedom + << " and alpha=" << std::setprecision(2) << std::fixed << significance << ": " << std::setprecision(7) << std::scientific << criticalValue << std::endl; + oss << "p-value: " << std::scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << std::setprecision(2) << std::fixed << significance << std::endl; bool result; if (p_value < significance) { - oss << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl; + oss << Colors::GREEN() << "The null hypothesis H0 is rejected." << std::endl; result = true; } else { - oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl; + oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl; result = false; } - oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << endl; + oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << std::endl; if (output) { - cout << oss.str(); + std::cout << oss.str(); } friedmanResult = { friedmanQ, criticalValue, p_value, result }; return result; @@ -245,7 +245,7 @@ namespace platform { { return holmResult; } - map>& Statistics::getRanks() + std::map>& Statistics::getRanks() { return ranksModels; } diff --git a/src/Platform/Statistics.h b/src/Platform/Statistics.h index a84aed7..aee7409 100644 --- a/src/Platform/Statistics.h +++ b/src/Platform/Statistics.h @@ -5,7 +5,6 @@ #include #include -using namespace std; using json = nlohmann::json; namespace platform { @@ -21,30 +20,30 @@ namespace platform { bool reject; }; struct HolmLine { - string model; + std::string model; long double pvalue; double rank; WTL wtl; bool reject; }; struct HolmResult { - string model; - vector holmLines; + std::string model; + std::vector holmLines; }; class Statistics { public: - Statistics(const vector& models, const vector& datasets, const json& data, double significance = 0.05, bool output = true); + Statistics(const std::vector& models, const std::vector& datasets, const json& data, double significance = 0.05, bool output = true); bool friedmanTest(); void postHocHolmTest(bool friedmanResult); FriedmanResult& getFriedmanResult(); HolmResult& getHolmResult(); - map>& getRanks(); + std::map>& getRanks(); private: void fit(); void computeRanks(); void computeWTL(); - const vector& models; - const vector& datasets; + const std::vector& models; + const std::vector& datasets; const json& data; double significance; bool output; @@ -52,13 +51,13 @@ namespace platform { int nModels = 0; int nDatasets = 0; int controlIdx = 0; - map wtl; - map ranks; + std::map wtl; + std::map ranks; int maxModelName = 0; int maxDatasetName = 0; FriedmanResult friedmanResult; HolmResult holmResult; - map> ranksModels; + std::map> ranksModels; }; } #endif // !STATISTICS_H \ No newline at end of file diff --git a/src/Platform/Symbols.h b/src/Platform/Symbols.h index a9fa1e7..5a8c9be 100644 --- a/src/Platform/Symbols.h +++ b/src/Platform/Symbols.h @@ -1,18 +1,17 @@ #ifndef SYMBOLS_H #define SYMBOLS_H #include -using namespace std; namespace platform { class Symbols { public: - inline static const string check_mark{ "\u2714" }; - inline static const string exclamation{ "\u2757" }; - inline static const string black_star{ "\u2605" }; - inline static const string cross{ "\u2717" }; - inline static const string upward_arrow{ "\u27B6" }; - inline static const string down_arrow{ "\u27B4" }; - inline static const string equal_best{ check_mark }; - inline static const string better_best{ black_star }; + inline static const std::string check_mark{ "\u2714" }; + inline static const std::string exclamation{ "\u2757" }; + inline static const std::string black_star{ "\u2605" }; + inline static const std::string cross{ "\u2717" }; + inline static const std::string upward_arrow{ "\u27B6" }; + inline static const std::string down_arrow{ "\u27B4" }; + inline static const std::string equal_best{ check_mark }; + inline static const std::string better_best{ black_star }; }; } #endif // !SYMBOLS_H \ No newline at end of file diff --git a/src/Platform/Utils.h b/src/Platform/Utils.h index 6b6f599..1a08ac5 100644 --- a/src/Platform/Utils.h +++ b/src/Platform/Utils.h @@ -4,7 +4,7 @@ #include #include namespace platform { - //static vector split(const string& text, char delimiter); + //static std::vector split(const std::string& text, char delimiter); static std::vector split(const std::string& text, char delimiter) { std::vector result; diff --git a/src/Platform/b_best.cc b/src/Platform/b_best.cc index b23b3db..b559d03 100644 --- a/src/Platform/b_best.cc +++ b/src/Platform/b_best.cc @@ -4,7 +4,6 @@ #include "BestResults.h" #include "Colors.h" -using namespace std; argparse::ArgumentParser manageArguments(int argc, char** argv) { @@ -15,19 +14,19 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true); program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true); program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true); - program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const string& value) { + program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const std::string& value) { try { - auto k = stod(value); + auto k = std::stod(value); if (k < 0.01 || k > 0.15) { - throw runtime_error("Significance level hast to be a number in [0.01, 0.15]"); + throw std::runtime_error("Significance level hast to be a number in [0.01, 0.15]"); } return k; } - catch (const runtime_error& err) { - throw runtime_error(err.what()); + catch (const std::runtime_error& err) { + throw std::runtime_error(err.what()); } catch (...) { - throw runtime_error("Number of folds must be an decimal number"); + throw std::runtime_error("Number of folds must be an decimal number"); }}); return program; } @@ -35,35 +34,35 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) int main(int argc, char** argv) { auto program = manageArguments(argc, argv); - string model, score; + std::string model, score; bool build, report, friedman, excel; double level; try { program.parse_args(argc, argv); - model = program.get("model"); - score = program.get("score"); + model = program.get("model"); + score = program.get("score"); build = program.get("build"); report = program.get("report"); friedman = program.get("friedman"); excel = program.get("excel"); level = program.get("level"); if (model == "" || score == "") { - throw runtime_error("Model and score name must be supplied"); + throw std::runtime_error("Model and score name must be supplied"); } if (friedman && model != "any") { - cerr << "Friedman test can only be used with all models" << endl; - cerr << program; + std::cerr << "Friedman test can only be used with all models" << std::endl; + std::cerr << program; exit(1); } if (!report && !build) { - cerr << "Either build, report or both, have to be selected to do anything!" << endl; - cerr << program; + std::cerr << "Either build, report or both, have to be selected to do anything!" << std::endl; + std::cerr << program; exit(1); } } - catch (const exception& err) { - cerr << err.what() << endl; - cerr << program; + catch (const std::exception& err) { + std::cerr << err.what() << std::endl; + std::cerr << program; exit(1); } // Generate report @@ -72,8 +71,8 @@ int main(int argc, char** argv) if (model == "any") { results.buildAll(); } else { - string fileName = results.build(); - cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl; + std::string fileName = results.build(); + std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl; } } if (report) { diff --git a/src/Platform/b_list.cc b/src/Platform/b_list.cc index 581ee5f..00907aa 100644 --- a/src/Platform/b_list.cc +++ b/src/Platform/b_list.cc @@ -4,54 +4,53 @@ #include "Colors.h" #include "Datasets.h" -using namespace std; const int BALANCE_LENGTH = 75; struct separated : numpunct { char do_decimal_point() const { return ','; } char do_thousands_sep() const { return '.'; } - string do_grouping() const { return "\03"; } + std::string do_grouping() const { return "\03"; } }; -void outputBalance(const string& balance) +void outputBalance(const std::string& balance) { - auto temp = string(balance); + auto temp = std::string(balance); while (temp.size() > BALANCE_LENGTH - 1) { auto part = temp.substr(0, BALANCE_LENGTH); - cout << part << endl; - cout << setw(48) << " "; + std::cout << part << std::endl; + std::cout << setw(48) << " "; temp = temp.substr(BALANCE_LENGTH); } - cout << temp << endl; + std::cout << temp << std::endl; } int main(int argc, char** argv) { auto data = platform::Datasets(false, platform::Paths::datasets()); - locale mylocale(cout.getloc(), new separated); + locale mylocale(std::cout.getloc(), new separated); locale::global(mylocale); - cout.imbue(mylocale); - cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << endl; - string balanceBars = string(BALANCE_LENGTH, '='); - cout << "============================== ====== ===== === " << balanceBars << endl; + std::cout.imbue(mylocale); + std::cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << std::endl; + std::string balanceBars = std::string(BALANCE_LENGTH, '='); + std::cout << "============================== ====== ===== === " << balanceBars << std::endl; bool odd = true; for (const auto& dataset : data.getNames()) { auto color = odd ? Colors::CYAN() : Colors::BLUE(); - cout << color << setw(30) << left << dataset << " "; + std::cout << color << setw(30) << left << dataset << " "; data.loadDataset(dataset); auto nSamples = data.getNSamples(dataset); - cout << setw(6) << right << nSamples << " "; - cout << setw(5) << right << data.getFeatures(dataset).size() << " "; - cout << setw(3) << right << data.getNClasses(dataset) << " "; - stringstream oss; - string sep = ""; + std::cout << setw(6) << right << nSamples << " "; + std::cout << setw(5) << right << data.getFeatures(dataset).size() << " "; + std::cout << setw(3) << right << data.getNClasses(dataset) << " "; + std::stringstream oss; + std::string sep = ""; for (auto number : data.getClassesCounts(dataset)) { - oss << sep << setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; + oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; sep = " / "; } outputBalance(oss.str()); odd = !odd; } - cout << Colors::RESET() << endl; + std::cout << Colors::RESET() << std::endl; return 0; } diff --git a/src/Platform/b_main.cc b/src/Platform/b_main.cc index 033c8a1..25f206a 100644 --- a/src/Platform/b_main.cc +++ b/src/Platform/b_main.cc @@ -9,7 +9,6 @@ #include "Paths.h" -using namespace std; using json = nlohmann::json; argparse::ArgumentParser manageArguments() @@ -19,13 +18,13 @@ argparse::ArgumentParser manageArguments() program.add_argument("-d", "--dataset").default_value("").help("Dataset file name"); program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment"); program.add_argument("-m", "--model") - .help("Model to use " + platform::Models::instance()->toString()) + .help("Model to use " + platform::Models::instance()->tostring()) .action([](const std::string& value) { - static const vector choices = platform::Models::instance()->getNames(); + static const std::vector choices = platform::Models::instance()->getNames(); if (find(choices.begin(), choices.end(), value) != choices.end()) { return value; } - throw runtime_error("Model must be one of " + platform::Models::instance()->toString()); + throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring()); } ); program.add_argument("--title").default_value("").help("Experiment title"); @@ -33,19 +32,19 @@ argparse::ArgumentParser manageArguments() program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); - program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const string& value) { + program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) { try { auto k = stoi(value); if (k < 2) { - throw runtime_error("Number of folds must be greater than 1"); + throw std::runtime_error("Number of folds must be greater than 1"); } return k; } catch (const runtime_error& err) { - throw runtime_error(err.what()); + throw std::runtime_error(err.what()); } catch (...) { - throw runtime_error("Number of folds must be an integer"); + throw std::runtime_error("Number of folds must be an integer"); }}); auto seed_values = env.getSeeds(); program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); @@ -54,39 +53,39 @@ argparse::ArgumentParser manageArguments() int main(int argc, char** argv) { - string file_name, model_name, title; + std::string file_name, model_name, title; json hyperparameters_json; bool discretize_dataset, stratified, saveResults, quiet; - vector seeds; - vector filesToTest; + std::vector seeds; + std::vector filesToTest; int n_folds; auto program = manageArguments(); try { program.parse_args(argc, argv); - file_name = program.get("dataset"); - model_name = program.get("model"); + file_name = program.get("dataset"); + model_name = program.get("model"); discretize_dataset = program.get("discretize"); stratified = program.get("stratified"); quiet = program.get("quiet"); n_folds = program.get("folds"); - seeds = program.get>("seeds"); - auto hyperparameters = program.get("hyperparameters"); + seeds = program.get>("seeds"); + auto hyperparameters = program.get("hyperparameters"); hyperparameters_json = json::parse(hyperparameters); - title = program.get("title"); + title = program.get("title"); if (title == "" && file_name == "") { throw runtime_error("title is mandatory if dataset is not provided"); } saveResults = program.get("save"); } catch (const exception& err) { - cerr << err.what() << endl; + cerr << err.what() << std::endl; cerr << program; exit(1); } auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets()); if (file_name != "") { if (!datasets.isDataset(file_name)) { - cerr << "Dataset " << file_name << " not found" << endl; + cerr << "Dataset " << file_name << " not found" << std::endl; exit(1); } if (title == "") { @@ -118,6 +117,6 @@ int main(int argc, char** argv) } if (!quiet) experiment.report(); - cout << "Done!" << endl; + std::cout << "Done!" << std::endl; return 0; } diff --git a/src/Platform/b_manage.cc b/src/Platform/b_manage.cc index ef62868..d4b6fa1 100644 --- a/src/Platform/b_manage.cc +++ b/src/Platform/b_manage.cc @@ -2,7 +2,6 @@ #include #include "ManageResults.h" -using namespace std; argparse::ArgumentParser manageArguments(int argc, char** argv) { @@ -17,17 +16,17 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) program.parse_args(argc, argv); auto number = program.get("number"); if (number < 0) { - throw runtime_error("Number of results must be greater than or equal to 0"); + throw std::runtime_error("Number of results must be greater than or equal to 0"); } - auto model = program.get("model"); - auto score = program.get("score"); + auto model = program.get("model"); + auto score = program.get("score"); auto complete = program.get("complete"); auto partial = program.get("partial"); auto compare = program.get("compare"); } - catch (const exception& err) { - cerr << err.what() << endl; - cerr << program; + catch (const std::exception& err) { + std::cerr << err.what() << std::endl; + std::cerr << program; exit(1); } return program; @@ -37,8 +36,8 @@ int main(int argc, char** argv) { auto program = manageArguments(argc, argv); int number = program.get("number"); - string model = program.get("model"); - string score = program.get("score"); + std::string model = program.get("model"); + std::string score = program.get("score"); auto complete = program.get("complete"); auto partial = program.get("partial"); auto compare = program.get("compare"); diff --git a/src/PyClassifiers/CMakeLists.txt b/src/PyClassifiers/CMakeLists.txt new file mode 100644 index 0000000..cc0f5a5 --- /dev/null +++ b/src/PyClassifiers/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) +include_directories(${BayesNet_SOURCE_DIR}/lib/Files) +include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) +include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) +include_directories(${BayesNet_SOURCE_DIR}/src/Platform) +add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc + KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc + Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) +target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/PyClassifiers/PyClf.h b/src/PyClassifiers/PyClf.h new file mode 100644 index 0000000..5a49dd8 --- /dev/null +++ b/src/PyClassifiers/PyClf.h @@ -0,0 +1,15 @@ +#ifndef PYCLF_H +#define PYCLF_H +#include +#include "DotEnv.h" +namespace PyClassifiers { + class PyClf { + public: + PyClf(const std::string& name); + virtual ~PyClf(); + private: + std::string name; + + }; +} /* namespace PyClassifiers */ +#endif /* PYCLF_H */ \ No newline at end of file diff --git a/src/PyClassifiers/Pyclf.cc b/src/PyClassifiers/Pyclf.cc new file mode 100644 index 0000000..d3cd7f5 --- /dev/null +++ b/src/PyClassifiers/Pyclf.cc @@ -0,0 +1,18 @@ +#include "PyClf.h" + +namespace PyClassifiers { + + PyClf::PyClf(const std::std::string& name) : name(name) + { + env = platform::DotEnv(); + + + } + + + PyClf::~PyClf() + { + + } + +} /* namespace PyClassifiers */ \ No newline at end of file diff --git a/tests/TestBayesMetrics.cc b/tests/TestBayesMetrics.cc index 97cda9f..5c34173 100644 --- a/tests/TestBayesMetrics.cc +++ b/tests/TestBayesMetrics.cc @@ -4,24 +4,23 @@ #include "BayesMetrics.h" #include "TestUtils.h" -using namespace std; TEST_CASE("Metrics Test", "[BayesNet]") { - string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); - map>> resultsKBest = { + std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); + map>> resultsKBest = { {"glass", {7, { 0, 1, 7, 6, 3, 5, 2 }}}, {"iris", {3, { 0, 3, 2 }} }, {"ecoli", {6, { 2, 4, 1, 0, 6, 5 }}}, {"diabetes", {2, { 7, 1 }}} }; - map resultsMI = { + map resultsMI = { {"glass", 0.12805398}, {"iris", 0.3158139948}, {"ecoli", 0.0089431099}, {"diabetes", 0.0345470614} }; - map, vector>> resultsMST = { + map, std::vector>> resultsMST = { { {"glass", 0}, { {0, 6}, {0, 5}, {0, 3}, {5, 1}, {5, 8}, {5, 4}, {6, 2}, {6, 7} } }, { {"glass", 1}, { {1, 5}, {5, 0}, {5, 8}, {5, 4}, {0, 6}, {0, 3}, {6, 2}, {6, 7} } }, { {"iris", 0}, { {0, 1}, {0, 2}, {1, 3} } }, @@ -41,7 +40,7 @@ TEST_CASE("Metrics Test", "[BayesNet]") SECTION("Test SelectKBestWeighted") { - vector kBest = metrics.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first); + std::vector kBest = metrics.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first); REQUIRE(kBest.size() == resultsKBest.at(file_name).first); REQUIRE(kBest == resultsKBest.at(file_name).second); } diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 51a5d27..671e961 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -2,9 +2,9 @@ #include #include #include -#include +#include #include -#include +#include #include "KDB.h" #include "TAN.h" #include "SPODE.h" @@ -18,7 +18,7 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") { - map , float> scores = { + map , float> scores = { // Diabetes {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f}, @@ -33,7 +33,7 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} }; - string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); + std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); auto raw = RawDatasets(file_name, false); SECTION("Test TAN classifier (" + file_name + ")") @@ -111,12 +111,12 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon)); } // for (auto scores : scores) { - // cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; + // std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; // } } TEST_CASE("Models features", "[BayesNet]") { - auto graph = vector({ "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n", + auto graph = std::vector({ "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n", "class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n", "class -> sepallength", "class -> sepalwidth", "class -> petallength", "class -> petalwidth", "petallength [shape=circle] \n", "petallength -> sepallength", "petalwidth [shape=circle] \n", "sepallength [shape=circle] \n", @@ -128,7 +128,7 @@ TEST_CASE("Models features", "[BayesNet]") clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); REQUIRE(clf.getNumberOfNodes() == 6); REQUIRE(clf.getNumberOfEdges() == 7); - REQUIRE(clf.show() == vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); + REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); REQUIRE(clf.graph("Test") == graph); } TEST_CASE("Get num features & num edges", "[BayesNet]") diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index db290b5..f572505 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -1,13 +1,13 @@ #include #include #include -#include +#include #include "TestUtils.h" #include "Network.h" -void buildModel(bayesnet::Network& net, const vector& features, const string& className) +void buildModel(bayesnet::Network& net, const std::vector& features, const std::std::string& className) { - vector> network = { {0, 1}, {0, 2}, {1, 3} }; + std::vector> network = { {0, 1}, {0, 2}, {1, 3} }; for (const auto& feature : features) { net.addNode(feature); } @@ -30,9 +30,9 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]") { net.addNode("A"); net.addNode("B"); - REQUIRE(net.getFeatures() == vector{"A", "B"}); + REQUIRE(net.getFeatures() == std::vector{"A", "B"}); net.addNode("C"); - REQUIRE(net.getFeatures() == vector{"A", "B", "C"}); + REQUIRE(net.getFeatures() == std::vector{"A", "B", "C"}); } SECTION("Test get edges") { @@ -41,10 +41,10 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]") net.addNode("C"); net.addEdge("A", "B"); net.addEdge("B", "C"); - REQUIRE(net.getEdges() == vector>{ {"A", "B"}, { "B", "C" } }); + REQUIRE(net.getEdges() == std::vector>{ {"A", "B"}, { "B", "C" } }); REQUIRE(net.getNumEdges() == 2); net.addEdge("A", "C"); - REQUIRE(net.getEdges() == vector>{ {"A", "B"}, { "A", "C" }, { "B", "C" } }); + REQUIRE(net.getEdges() == std::vector>{ {"A", "B"}, { "A", "C" }, { "B", "C" } }); REQUIRE(net.getNumEdges() == 3); } SECTION("Test getNodes") @@ -66,7 +66,7 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]") buildModel(net, raw.featuresv, raw.classNamev); buildModel(net2, raw.featurest, raw.classNamet); buildModel(net3, raw.featurest, raw.classNamet); - vector> edges = { + std::vector> edges = { {"class", "sepallength"}, {"class", "sepalwidth"}, {"class", "petallength"}, {"class", "petalwidth" }, {"sepallength", "sepalwidth"}, {"sepallength", "petallength"}, {"sepalwidth", "petalwidth"} @@ -74,7 +74,7 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]") REQUIRE(net.getEdges() == edges); REQUIRE(net2.getEdges() == edges); REQUIRE(net3.getEdges() == edges); - vector features = { "sepallength", "sepalwidth", "petallength", "petalwidth", "class" }; + std::vector features = { "sepallength", "sepalwidth", "petallength", "petalwidth", "class" }; REQUIRE(net.getFeatures() == features); REQUIRE(net2.getFeatures() == features); REQUIRE(net3.getFeatures() == features); @@ -84,7 +84,7 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]") // Check Nodes parents & children for (const auto& feature : features) { // Parents - vector parents, parents2, parents3, children, children2, children3; + std::vector parents, parents2, parents3, children, children2, children3; auto nodeParents = nodes[feature]->getParents(); auto nodeParents2 = nodes2[feature]->getParents(); auto nodeParents3 = nodes3[feature]->getParents(); @@ -173,8 +173,8 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]") // { // auto net = bayesnet::Network(); // net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv); - // vector> test = { {1, 2, 0, 1}, {0, 1, 2, 0}, {1, 1, 1, 1}, {0, 0, 0, 0}, {2, 2, 2, 2} }; - // vector y_test = { 0, 1, 1, 0, 2 }; + // std::vector> test = { {1, 2, 0, 1}, {0, 1, 2, 0}, {1, 1, 1, 1}, {0, 0, 0, 0}, {2, 2, 2, 2} }; + // std::vector y_test = { 0, 1, 1, 0, 2 }; // auto y_pred = net.predict(test); // REQUIRE(y_pred == y_test); // } @@ -183,7 +183,7 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]") // { // auto net = bayesnet::Network(); // net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv); - // vector> test = { {1, 2, 0, 1}, {0, 1, 2, 0}, {1, 1, 1, 1}, {0, 0, 0, 0}, {2, 2, 2, 2} }; + // std::vector> test = { {1, 2, 0, 1}, {0, 1, 2, 0}, {1, 1, 1, 1}, {0, 0, 0, 0}, {2, 2, 2, 2} }; // auto y_test = { 0, 1, 1, 0, 2 }; // auto y_pred = net.predict(test); // REQUIRE(y_pred == y_test); diff --git a/tests/TestFolding.cc b/tests/TestFolding.cc index 431f2d5..a7b3359 100644 --- a/tests/TestFolding.cc +++ b/tests/TestFolding.cc @@ -7,7 +7,7 @@ TEST_CASE("KFold Test", "[Platform][KFold]") { // Initialize a KFold object with k=5 and a seed of 19. - string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); + std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); auto raw = RawDatasets(file_name, true); int nFolds = 5; platform::KFold kfold(nFolds, raw.nSamples, 19); @@ -29,7 +29,7 @@ TEST_CASE("KFold Test", "[Platform][KFold]") } } -map counts(vector y, vector indices) +map counts(std::vector y, std::vector indices) { map result; for (auto i = 0; i < indices.size(); ++i) { @@ -40,8 +40,8 @@ map counts(vector y, vector indices) TEST_CASE("StratifiedKFold Test", "[Platform][StratifiedKFold]") { - // Initialize a StratifiedKFold object with k=3, using the y vector, and a seed of 17. - string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); + // Initialize a StratifiedKFold object with k=3, using the y std::vector, and a seed of 17. + std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); int nFolds = GENERATE(3, 5, 10); auto raw = RawDatasets(file_name, true); platform::StratifiedKFold stratified_kfoldt(nFolds, raw.yt, 17); @@ -55,10 +55,10 @@ TEST_CASE("StratifiedKFold Test", "[Platform][StratifiedKFold]") SECTION("Stratified Fold Test") { // Test each fold's size and contents. - auto counts = map>(); + auto counts = map>(); // Initialize the counts per Fold for (int i = 0; i < nFolds; ++i) { - counts[i] = vector(raw.classNumStates, 0); + counts[i] = std::vector(raw.classNumStates, 0); } // Check fold and compute counts of each fold for (int fold = 0; fold < nFolds; ++fold) { diff --git a/tests/TestUtils.cc b/tests/TestUtils.cc index b54be48..1a63675 100644 --- a/tests/TestUtils.cc +++ b/tests/TestUtils.cc @@ -1,19 +1,17 @@ #include "TestUtils.h" -using namespace std; -using namespace torch; class Paths { public: - static string datasets() + static std::string datasets() { return "../../data/"; } }; -pair, map> discretize(vector& X, mdlp::labels_t& y, vector features) +pair, map> discretize(std::vector& X, mdlp::labels_t& y, std::vector features) { - vector Xd; - map maxes; + std::vector Xd; + map maxes; auto fimdlp = mdlp::CPPFImdlp(); for (int i = 0; i < X.size(); i++) { fimdlp.fit(X[i], y); @@ -24,9 +22,9 @@ pair, map> discretize(vector discretizeDataset(vector& X, mdlp::labels_t& y) +std::vector discretizeDataset(std::vector& X, mdlp::labels_t& y) { - vector Xd; + std::vector Xd; auto fimdlp = mdlp::CPPFImdlp(); for (int i = 0; i < X.size(); i++) { fimdlp.fit(X[i], y); @@ -36,7 +34,7 @@ vector discretizeDataset(vector& X, mdlp::label return Xd; } -bool file_exists(const string& name) +bool file_exists(const std::std::string& name) { if (FILE* file = fopen(name.c_str(), "r")) { fclose(file); @@ -46,30 +44,30 @@ bool file_exists(const string& name) } } -tuple, string, map>> loadDataset(const string& name, bool class_last, bool discretize_dataset) +tuple, std::string, map>> loadDataset(const std::std::string& name, bool class_last, bool discretize_dataset) { auto handler = ArffFiles(); - handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); + handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); // Get Dataset X, y - vector& X = handler.getX(); + std::vector& X = handler.getX(); mdlp::labels_t& y = handler.getY(); // Get className & Features auto className = handler.getClassName(); - vector features; + std::vector features; auto attributes = handler.getAttributes(); transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); - Tensor Xd; - auto states = map>(); + torch::Tensor Xd; + auto states = map>(); if (discretize_dataset) { auto Xr = discretizeDataset(X, y); Xd = torch::zeros({ static_cast(Xr.size()), static_cast(Xr[0].size()) }, torch::kInt32); for (int i = 0; i < features.size(); ++i) { - states[features[i]] = vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); + states[features[i]] = std::vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); auto item = states.at(features[i]); iota(begin(item), end(item), 0); Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32)); } - states[className] = vector(*max_element(y.begin(), y.end()) + 1); + states[className] = std::vector(*max_element(y.begin(), y.end()) + 1); iota(begin(states.at(className)), end(states.at(className)), 0); } else { Xd = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kFloat32); @@ -80,27 +78,27 @@ tuple, string, map>> loadData return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; } -tuple>, vector, vector, string, map>> loadFile(const string& name) +tuple>, std::vector, std::vector, std::string, map>> loadFile(const std::std::string& name) { auto handler = ArffFiles(); - handler.load(Paths::datasets() + static_cast(name) + ".arff"); + handler.load(Paths::datasets() + static_cast(name) + ".arff"); // Get Dataset X, y - vector& X = handler.getX(); + std::vector& X = handler.getX(); mdlp::labels_t& y = handler.getY(); // Get className & Features auto className = handler.getClassName(); - vector features; + std::vector features; auto attributes = handler.getAttributes(); transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); // Discretize Dataset - vector Xd; - map maxes; + std::vector Xd; + map maxes; tie(Xd, maxes) = discretize(X, y, features); maxes[className] = *max_element(y.begin(), y.end()) + 1; - map> states; + map> states; for (auto feature : features) { - states[feature] = vector(maxes[feature]); + states[feature] = std::vector(maxes[feature]); } - states[className] = vector(maxes[className]); + states[className] = std::vector(maxes[className]); return { Xd, y, features, className, states }; } diff --git a/tests/TestUtils.h b/tests/TestUtils.h index 1d091a7..e6a713f 100644 --- a/tests/TestUtils.h +++ b/tests/TestUtils.h @@ -4,20 +4,19 @@ #include #include #include -#include +#include #include "ArffFiles.h" #include "CPPFImdlp.h" -using namespace std; -bool file_exists(const std::string& name); -pair, map> discretize(vector& X, mdlp::labels_t& y, vector features); -vector discretizeDataset(vector& X, mdlp::labels_t& y); -tuple>, vector, vector, string, map>> loadFile(const string& name); -tuple, string, map>> loadDataset(const string& name, bool class_last, bool discretize_dataset); +bool file_exists(const std::std::string& name); +std::pair, map> discretize(std::vector& X, mdlp::labels_t& y, std::vector features); +std::vector discretizeDataset(std::vector& X, mdlp::labels_t& y); +std::tuple>, std::vector, std::vector, std::string, map>> loadFile(const std::string& name); +std::tuple, std::string, map>> loadDataset(const std::string& name, bool class_last, bool discretize_dataset); class RawDatasets { public: - RawDatasets(const string& file_name, bool discretize) + RawDatasets(const std::string& file_name, bool discretize) { // Xt can be either discretized or not tie(Xt, yt, featurest, classNamet, statest) = loadDataset(file_name, true, discretize); @@ -27,16 +26,16 @@ public: dataset = torch::cat({ Xt, yresized }, 0); nSamples = dataset.size(1); weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble); - weightsv = vector(nSamples, 1.0 / nSamples); + weightsv = std::vector(nSamples, 1.0 / nSamples); classNumStates = discretize ? statest.at(classNamet).size() : 0; } torch::Tensor Xt, yt, dataset, weights; - vector> Xv; - vector weightsv; - vector yv; - vector featurest, featuresv; - map> statest, statesv; - string classNamet, classNamev; + std::vector> Xv; + std::vector weightsv; + std::vector yv; + std::vector featurest, featuresv; + map> statest, statesv; + std::string classNamet, classNamev; int nSamples, classNumStates; double epsilon = 1e-5; };