diff --git a/sample/main.cc b/sample/main.cc index d7302b1..d496a29 100644 --- a/sample/main.cc +++ b/sample/main.cc @@ -8,6 +8,8 @@ #include "Metrics.hpp" #include "CPPFImdlp.h" #include "KDB.h" +#include "SPODE.h" +#include "AODE.h" using namespace std; @@ -249,13 +251,14 @@ int main(int argc, char** argv) // long m2 = features.size() + 1; // auto matrix2 = torch::from_blob(conditional2.data(), { m, m }); // cout << matrix2 << endl; - cout << "****************** KDB ******************" << endl; + cout << "****************** Preparing ******************" << endl; map> states; for (auto feature : features) { states[feature] = vector(maxes[feature]); } states[className] = vector( maxes[className]); + cout << "****************** KDB ******************" << endl; auto kdb = bayesnet::KDB(2); kdb.fit(Xd, y, features, className, states); for (auto line : kdb.show()) { @@ -263,5 +266,21 @@ int main(int argc, char** argv) } cout << "Score: " << kdb.score(Xd, y) << endl; cout << "****************** KDB ******************" << endl; + cout << "****************** SPODE ******************" << endl; + auto spode = bayesnet::SPODE(2); + spode.fit(Xd, y, features, className, states); + for (auto line : spode.show()) { + cout << line << endl; + } + cout << "Score: " << spode.score(Xd, y) << endl; + cout << "****************** SPODE ******************" << endl; + cout << "****************** AODE ******************" << endl; + auto aode = bayesnet::AODE(); + aode.fit(Xd, y, features, className, states); + for (auto line : aode.show()) { + cout << line << endl; + } + cout << "Score: " << aode.score(Xd, y) << endl; + cout << "****************** AODE ******************" << endl; return 0; } \ No newline at end of file diff --git a/src/AODE.cc b/src/AODE.cc index 63188bb..ef17c77 100644 --- a/src/AODE.cc +++ b/src/AODE.cc @@ -1,16 +1,12 @@ #include "AODE.h" namespace bayesnet { - - AODE::AODE() : Ensemble() - { - models = vector(); - } + AODE::AODE() : Ensemble() {} void AODE::train() { + models.clear(); for (int i = 0; i < features.size(); ++i) { - SPODE model = SPODE(i); - models.push_back(model); + models.push_back(std::make_unique(i)); } } } \ No newline at end of file diff --git a/src/BaseClassifier.cc b/src/BaseClassifier.cc index 74b7035..7cc3814 100644 --- a/src/BaseClassifier.cc +++ b/src/BaseClassifier.cc @@ -69,6 +69,20 @@ namespace bayesnet { auto ypred = torch::tensor(yp, torch::kInt64); return ypred; } + vector BaseClassifier::predict(vector>& X) + { + if (!fitted) { + throw logic_error("Classifier has not been fitted"); + } + auto m_ = X[0].size(); + auto n_ = X.size(); + vector> Xd(n_, vector(m_, 0)); + for (auto i = 0; i < n_; i++) { + Xd[i] = vector(X[i].begin(), X[i].end()); + } + auto yp = model.predict(Xd); + return yp; + } float BaseClassifier::score(Tensor& X, Tensor& y) { if (!fitted) { diff --git a/src/BaseClassifier.h b/src/BaseClassifier.h index 929772e..301f847 100644 --- a/src/BaseClassifier.h +++ b/src/BaseClassifier.h @@ -31,6 +31,7 @@ namespace bayesnet { BaseClassifier& fit(vector>& X, vector& y, vector& features, string className, map>& states); void addNodes(); Tensor predict(Tensor& X); + vector predict(vector>& X); float score(Tensor& X, Tensor& y); float score(vector>& X, vector& y); vector show(); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5897385..8bb813a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,2 +1,2 @@ -add_library(BayesNet utils.cc Network.cc Node.cc Metrics.cc BaseClassifier.cc KDB.cc TAN.cc SPODE.cc) +add_library(BayesNet utils.cc Network.cc Node.cc Metrics.cc BaseClassifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc) target_link_libraries(BayesNet "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Ensemble.cc b/src/Ensemble.cc index f5f8412..c37d2a0 100644 --- a/src/Ensemble.cc +++ b/src/Ensemble.cc @@ -4,10 +4,10 @@ namespace bayesnet { using namespace std; using namespace torch; - Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()) {} + Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()), fitted(false) {} Ensemble& Ensemble::build(vector& features, string className, map>& states) { - dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1); + dataset = cat({ X, y.view({y.size(0), 1}) }, 1); this->features = features; this->className = className; this->states = states; @@ -18,34 +18,35 @@ namespace bayesnet { // Train models n_models = models.size(); for (auto i = 0; i < n_models; ++i) { - models[i].fit(X, y, features, className, states); + models[i]->fit(Xv, yv, features, className, states); } + fitted = true; return *this; } - Ensemble& Ensemble::fit(Tensor& X, Tensor& y, vector& features, string className, map>& states) - { - this->X = X; - this->y = y; - auto sizes = X.sizes(); - m = sizes[0]; - n = sizes[1]; - return build(features, className, states); - } Ensemble& Ensemble::fit(vector>& X, vector& y, vector& features, string className, map>& states) { this->X = torch::zeros({ static_cast(X[0].size()), static_cast(X.size()) }, kInt64); + Xv = X; for (int i = 0; i < X.size(); ++i) { this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64)); } this->y = torch::tensor(y, kInt64); + yv = y; return build(features, className, states); } Tensor Ensemble::predict(Tensor& X) { - Tensor y_pred = torch::zeros({ X.size(0), n_models }, torch::kInt64); - for (auto i = 0; i < n_models; ++i) { - y_pred.index_put_({ "...", i }, models[i].predict(X)); + if (!fitted) { + throw logic_error("Ensemble has not been fitted"); } + Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt64); + for (auto i = 0; i < n_models; ++i) { + y_pred.index_put_({ "...", i }, models[i]->predict(X)); + } + return torch::tensor(voting(y_pred)); + } + vector Ensemble::voting(Tensor& y_pred) + { auto y_pred_ = y_pred.accessor(); vector y_pred_final; for (int i = 0; i < y_pred.size(0); ++i) { @@ -56,18 +57,45 @@ namespace bayesnet { auto indices = argsort(votes); y_pred_final.push_back(indices[0]); } - return torch::tensor(y_pred_final, torch::kInt64); + return y_pred_final; } - float Ensemble::score(Tensor& X, Tensor& y) + vector Ensemble::predict(vector>& X) { - Tensor y_pred = predict(X); - return (y_pred == y).sum().item() / y.size(0); + if (!fitted) { + throw logic_error("Ensemble has not been fitted"); + } + long m_ = X[0].size(); + long n_ = X.size(); + vector> Xd(n_, vector(m_, 0)); + for (auto i = 0; i < n_; i++) { + Xd[i] = vector(X[i].begin(), X[i].end()); + } + Tensor y_pred = torch::zeros({ m_, n_models }, kInt64); + for (auto i = 0; i < n_models; ++i) { + y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt64)); + } + return voting(y_pred); + } + float Ensemble::score(vector>& X, vector& y) + { + if (!fitted) { + throw logic_error("Ensemble has not been fitted"); + } + auto y_pred = predict(X); + int correct = 0; + for (int i = 0; i < y_pred.size(); ++i) { + if (y_pred[i] == y[i]) { + correct++; + } + } + return (double)correct / y_pred.size(); + } vector Ensemble::show() { vector result; for (auto i = 0; i < n_models; ++i) { - auto res = models[i].show(); + auto res = models[i]->show(); result.insert(result.end(), res.begin(), res.end()); } return result; diff --git a/src/Ensemble.h b/src/Ensemble.h index 6950f4b..118f5e0 100644 --- a/src/Ensemble.h +++ b/src/Ensemble.h @@ -10,26 +10,31 @@ using namespace torch; namespace bayesnet { class Ensemble { private: + bool fitted; long n_models; Ensemble& build(vector& features, string className, map>& states); protected: - vector models; + vector> models; int m, n; // m: number of samples, n: number of features Tensor X; + vector> Xv; Tensor y; + vector yv; Tensor dataset; Metrics metrics; vector features; string className; map> states; void virtual train() = 0; + vector voting(Tensor& y_pred); public: Ensemble(); virtual ~Ensemble() = default; - Ensemble& fit(Tensor& X, Tensor& y, vector& features, string className, map>& states); Ensemble& fit(vector>& X, vector& y, vector& features, string className, map>& states); Tensor predict(Tensor& X); + vector predict(vector>& X); float score(Tensor& X, Tensor& y); + float score(vector>& X, vector& y); vector show(); }; }