diff --git a/.vscode/settings.json b/.vscode/settings.json index ca5a4e3..9159a42 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -99,7 +99,8 @@ "typeindex": "cpp", "shared_mutex": "cpp", "*.ipp": "cpp", - "cassert": "cpp" + "cassert": "cpp", + "charconv": "cpp" }, "cmake.configureOnOpen": false, "C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools" diff --git a/sample/sample.cc b/sample/sample.cc index d2191f7..f5c41f7 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include "BaseClassifier.h" #include "ArffFiles.h" #include "Network.h" #include "BayesMetrics.h" @@ -143,38 +145,12 @@ int main(int argc, char** argv) states[className] = vector( maxes[className]); double score; - vector lines; - vector graph; - auto kdb = bayesnet::KDB(2); - auto aode = bayesnet::AODE(); - auto spode = bayesnet::SPODE(2); - auto tan = bayesnet::TAN(); - switch (hash_conv(model_name)) { - case "AODE"_sh: - aode.fit(Xd, y, features, className, states); - lines = aode.show(); - score = aode.score(Xd, y); - graph = aode.graph(); - break; - case "KDB"_sh: - kdb.fit(Xd, y, features, className, states); - lines = kdb.show(); - score = kdb.score(Xd, y); - graph = kdb.graph(); - break; - case "SPODE"_sh: - spode.fit(Xd, y, features, className, states); - lines = spode.show(); - score = spode.score(Xd, y); - graph = spode.graph(); - break; - case "TAN"_sh: - tan.fit(Xd, y, features, className, states); - lines = tan.show(); - score = tan.score(Xd, y); - graph = tan.graph(); - break; - } + auto classifiers = map({ { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) }, { "SPODE", new bayesnet::SPODE(2) }, { "TAN", new bayesnet::TAN() } }); + bayesnet::BaseClassifier* clf = classifiers[model_name]; + clf->fit(Xd, y, features, className, states); + score = clf->score(Xd, y); + auto lines = clf->show(); + auto graph = clf->graph(); for (auto line : lines) { cout << line << endl; } diff --git a/src/BayesNet/AODE.h b/src/BayesNet/AODE.h index a53ec8a..84386d3 100644 --- a/src/BayesNet/AODE.h +++ b/src/BayesNet/AODE.h @@ -8,7 +8,7 @@ namespace bayesnet { void train() override; public: AODE(); - vector graph(string title = "AODE"); + vector graph(string title = "AODE") override; }; } #endif \ No newline at end of file diff --git a/src/BayesNet/BaseClassifier.h b/src/BayesNet/BaseClassifier.h index 2157805..3d69dfb 100644 --- a/src/BayesNet/BaseClassifier.h +++ b/src/BayesNet/BaseClassifier.h @@ -1,48 +1,17 @@ -#ifndef CLASSIFIERS_H -#define CLASSIFIERS_H +#ifndef BASE_H +#define BASE_H #include -#include "Network.h" -#include "BayesMetrics.h" -using namespace std; -using namespace torch; - +#include namespace bayesnet { + using namespace std; class BaseClassifier { - private: - bool fitted; - BaseClassifier& build(vector& features, string className, map>& states); - protected: - Network model; - int m, n; // m: number of samples, n: number of features - Tensor X; - vector> Xv; - Tensor y; - vector yv; - Tensor dataset; - Metrics metrics; - vector features; - string className; - map> states; - void checkFitParameters(); - virtual void train() = 0; public: - BaseClassifier(Network model); + virtual BaseClassifier& fit(vector>& X, vector& y, vector& features, string className, map>& states) = 0; + vector virtual predict(vector>& X) = 0; + float virtual score(vector>& X, vector& y) = 0; + vector virtual show() = 0; + vector virtual graph(string title = "") = 0; virtual ~BaseClassifier() = default; - BaseClassifier& fit(vector>& X, vector& y, vector& features, string className, map>& states); - void addNodes(); - int getNumberOfNodes(); - int getNumberOfEdges(); - Tensor predict(Tensor& X); - vector predict(vector>& X); - float score(Tensor& X, Tensor& y); - float score(vector>& X, vector& y); - vector show(); - virtual vector graph(string title) = 0; }; } -#endif - - - - - +#endif \ No newline at end of file diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt index cbaa5ac..6433d93 100644 --- a/src/BayesNet/CMakeLists.txt +++ b/src/BayesNet/CMakeLists.txt @@ -1,2 +1,2 @@ -add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc BaseClassifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc Mst.cc) +add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc Mst.cc) target_link_libraries(BayesNet "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/BayesNet/BaseClassifier.cc b/src/BayesNet/Classifier.cc similarity index 80% rename from src/BayesNet/BaseClassifier.cc rename to src/BayesNet/Classifier.cc index 0f0adba..b4e3abc 100644 --- a/src/BayesNet/BaseClassifier.cc +++ b/src/BayesNet/Classifier.cc @@ -1,12 +1,12 @@ -#include "BaseClassifier.h" +#include "Classifier.h" #include "bayesnetUtils.h" namespace bayesnet { using namespace std; using namespace torch; - BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} - BaseClassifier& BaseClassifier::build(vector& features, string className, map>& states) + Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} + Classifier& Classifier::build(vector& features, string className, map>& states) { dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1); this->features = features; @@ -20,7 +20,7 @@ namespace bayesnet { fitted = true; return *this; } - BaseClassifier& BaseClassifier::fit(vector>& X, vector& y, vector& features, string className, map>& states) + Classifier& Classifier::fit(vector>& X, vector& y, vector& features, string className, map>& states) { this->X = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, kInt64); Xv = X; @@ -31,7 +31,7 @@ namespace bayesnet { yv = y; return build(features, className, states); } - void BaseClassifier::checkFitParameters() + void Classifier::checkFitParameters() { auto sizes = X.sizes(); m = sizes[0]; @@ -52,7 +52,7 @@ namespace bayesnet { } } - Tensor BaseClassifier::predict(Tensor& X) + Tensor Classifier::predict(Tensor& X) { if (!fitted) { throw logic_error("Classifier has not been fitted"); @@ -68,7 +68,7 @@ namespace bayesnet { auto ypred = torch::tensor(yp, torch::kInt64); return ypred; } - vector BaseClassifier::predict(vector>& X) + vector Classifier::predict(vector>& X) { if (!fitted) { throw logic_error("Classifier has not been fitted"); @@ -82,7 +82,7 @@ namespace bayesnet { auto yp = model.predict(Xd); return yp; } - float BaseClassifier::score(Tensor& X, Tensor& y) + float Classifier::score(Tensor& X, Tensor& y) { if (!fitted) { throw logic_error("Classifier has not been fitted"); @@ -90,7 +90,7 @@ namespace bayesnet { Tensor y_pred = predict(X); return (y_pred == y).sum().item() / y.size(0); } - float BaseClassifier::score(vector>& X, vector& y) + float Classifier::score(vector>& X, vector& y) { if (!fitted) { throw logic_error("Classifier has not been fitted"); @@ -103,11 +103,11 @@ namespace bayesnet { } return model.score(Xd, y); } - vector BaseClassifier::show() + vector Classifier::show() { return model.show(); } - void BaseClassifier::addNodes() + void Classifier::addNodes() { // Add all nodes to the network for (auto feature : features) { @@ -115,12 +115,12 @@ namespace bayesnet { } model.addNode(className, states[className].size()); } - int BaseClassifier::getNumberOfNodes() + int Classifier::getNumberOfNodes() { // Features does not include class return fitted ? model.getFeatures().size() + 1 : 0; } - int BaseClassifier::getNumberOfEdges() + int Classifier::getNumberOfEdges() { return fitted ? model.getEdges().size() : 0; } diff --git a/src/BayesNet/Classifier.h b/src/BayesNet/Classifier.h new file mode 100644 index 0000000..0a44b02 --- /dev/null +++ b/src/BayesNet/Classifier.h @@ -0,0 +1,48 @@ +#ifndef CLASSIFIER_H +#define CLASSIFIER_H +#include +#include "BaseClassifier.h" +#include "Network.h" +#include "BayesMetrics.h" +using namespace std; +using namespace torch; + +namespace bayesnet { + class Classifier : public BaseClassifier { + private: + bool fitted; + Classifier& build(vector& features, string className, map>& states); + protected: + Network model; + int m, n; // m: number of samples, n: number of features + Tensor X; + vector> Xv; + Tensor y; + vector yv; + Tensor dataset; + Metrics metrics; + vector features; + string className; + map> states; + void checkFitParameters(); + virtual void train() = 0; + public: + Classifier(Network model); + virtual ~Classifier() = default; + Classifier& fit(vector>& X, vector& y, vector& features, string className, map>& states); + void addNodes(); + int getNumberOfNodes(); + int getNumberOfEdges(); + Tensor predict(Tensor& X); + vector predict(vector>& X); + float score(Tensor& X, Tensor& y); + float score(vector>& X, vector& y); + vector show(); + }; +} +#endif + + + + + diff --git a/src/BayesNet/Ensemble.h b/src/BayesNet/Ensemble.h index 389e3a1..3ea9ad3 100644 --- a/src/BayesNet/Ensemble.h +++ b/src/BayesNet/Ensemble.h @@ -1,20 +1,20 @@ #ifndef ENSEMBLE_H #define ENSEMBLE_H #include -#include "BaseClassifier.h" +#include "Classifier.h" #include "BayesMetrics.h" #include "bayesnetUtils.h" using namespace std; using namespace torch; namespace bayesnet { - class Ensemble { + class Ensemble : public BaseClassifier { private: bool fitted; long n_models; Ensemble& build(vector& features, string className, map>& states); protected: - vector> models; + vector> models; int m, n; // m: number of samples, n: number of features Tensor X; vector> Xv; @@ -30,13 +30,13 @@ namespace bayesnet { public: Ensemble(); virtual ~Ensemble() = default; - Ensemble& fit(vector>& X, vector& y, vector& features, string className, map>& states); + Ensemble& fit(vector>& X, vector& y, vector& features, string className, map>& states) override; Tensor predict(Tensor& X); - vector predict(vector>& X); + vector predict(vector>& X) override; float score(Tensor& X, Tensor& y); - float score(vector>& X, vector& y); - vector show(); - vector graph(string title); + float score(vector>& X, vector& y) override; + vector show() override; + vector graph(string title) override; }; } #endif diff --git a/src/BayesNet/KDB.cc b/src/BayesNet/KDB.cc index f1023f6..32f7184 100644 --- a/src/BayesNet/KDB.cc +++ b/src/BayesNet/KDB.cc @@ -4,7 +4,7 @@ namespace bayesnet { using namespace std; using namespace torch; - KDB::KDB(int k, float theta) : BaseClassifier(Network()), k(k), theta(theta) {} + KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {} void KDB::train() { /* diff --git a/src/BayesNet/KDB.h b/src/BayesNet/KDB.h index 6d0fa7a..9683955 100644 --- a/src/BayesNet/KDB.h +++ b/src/BayesNet/KDB.h @@ -1,11 +1,11 @@ #ifndef KDB_H #define KDB_H -#include "BaseClassifier.h" +#include "Classifier.h" #include "bayesnetUtils.h" namespace bayesnet { using namespace std; using namespace torch; - class KDB : public BaseClassifier { + class KDB : public Classifier { private: int k; float theta; diff --git a/src/BayesNet/SPODE.cc b/src/BayesNet/SPODE.cc index dc661e7..68ff0b9 100644 --- a/src/BayesNet/SPODE.cc +++ b/src/BayesNet/SPODE.cc @@ -2,7 +2,7 @@ namespace bayesnet { - SPODE::SPODE(int root) : BaseClassifier(Network()), root(root) {} + SPODE::SPODE(int root) : Classifier(Network()), root(root) {} void SPODE::train() { diff --git a/src/BayesNet/SPODE.h b/src/BayesNet/SPODE.h index dae600b..668bbca 100644 --- a/src/BayesNet/SPODE.h +++ b/src/BayesNet/SPODE.h @@ -1,8 +1,8 @@ #ifndef SPODE_H #define SPODE_H -#include "BaseClassifier.h" +#include "Classifier.h" namespace bayesnet { - class SPODE : public BaseClassifier { + class SPODE : public Classifier { private: int root; protected: diff --git a/src/BayesNet/TAN.cc b/src/BayesNet/TAN.cc index fb0e533..dc3d4cd 100644 --- a/src/BayesNet/TAN.cc +++ b/src/BayesNet/TAN.cc @@ -4,7 +4,7 @@ namespace bayesnet { using namespace std; using namespace torch; - TAN::TAN() : BaseClassifier(Network()) {} + TAN::TAN() : Classifier(Network()) {} void TAN::train() { diff --git a/src/BayesNet/TAN.h b/src/BayesNet/TAN.h index f438d91..11e7421 100644 --- a/src/BayesNet/TAN.h +++ b/src/BayesNet/TAN.h @@ -1,10 +1,10 @@ #ifndef TAN_H #define TAN_H -#include "BaseClassifier.h" +#include "Classifier.h" namespace bayesnet { using namespace std; using namespace torch; - class TAN : public BaseClassifier { + class TAN : public Classifier { private: protected: void train() override; diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index d2191f7..40d1da4 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -11,12 +11,11 @@ #include "SPODE.h" #include "AODE.h" #include "TAN.h" +#include "platformUtils.h" using namespace std; -const string PATH = "../../data/"; - inline constexpr auto hash_conv(const std::string_view sv) { unsigned long hash{ 5381 }; @@ -31,31 +30,6 @@ inline constexpr auto operator"" _sh(const char* str, size_t len) return hash_conv(std::string_view{ str, len }); } -pair, map> discretize(vector& X, mdlp::labels_t& y, vector features) -{ - vectorXd; - map maxes; - - auto fimdlp = mdlp::CPPFImdlp(); - for (int i = 0; i < X.size(); i++) { - fimdlp.fit(X[i], y); - mdlp::labels_t& xd = fimdlp.transform(X[i]); - maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1; - Xd.push_back(xd); - } - return { Xd, maxes }; -} - -bool file_exists(const std::string& name) -{ - if (FILE* file = fopen(name.c_str(), "r")) { - fclose(file); - return true; - } else { - return false; - } -} - int main(int argc, char** argv) { map datasets = { diff --git a/src/Platform/Folding.cc b/src/Platform/Folding.cc index 7186a14..9d9f009 100644 --- a/src/Platform/Folding.cc +++ b/src/Platform/Folding.cc @@ -63,7 +63,7 @@ StratifiedKFold::StratifiedKFold(int k, const vector& y, int seed) : class_indices[label].erase(class_indices[label].begin(), it); } while (remainder_samples_to_take > 0) { - int fold = (rand() % static_cast(k)); + int fold = (arc4random() % static_cast(k)); if (stratified_indices[fold].size() == fold_size) { continue; }