Complete SPODE & AODE

This commit is contained in:
Ricardo Montañana Gómez 2023-07-15 01:59:30 +02:00
parent db6908acd0
commit e311c27d43
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
7 changed files with 94 additions and 31 deletions

View File

@ -8,6 +8,8 @@
#include "Metrics.hpp" #include "Metrics.hpp"
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
#include "KDB.h" #include "KDB.h"
#include "SPODE.h"
#include "AODE.h"
using namespace std; using namespace std;
@ -249,13 +251,14 @@ int main(int argc, char** argv)
// long m2 = features.size() + 1; // long m2 = features.size() + 1;
// auto matrix2 = torch::from_blob(conditional2.data(), { m, m }); // auto matrix2 = torch::from_blob(conditional2.data(), { m, m });
// cout << matrix2 << endl; // cout << matrix2 << endl;
cout << "****************** KDB ******************" << endl; cout << "****************** Preparing ******************" << endl;
map<string, vector<int>> states; map<string, vector<int>> states;
for (auto feature : features) { for (auto feature : features) {
states[feature] = vector<int>(maxes[feature]); states[feature] = vector<int>(maxes[feature]);
} }
states[className] = vector<int>( states[className] = vector<int>(
maxes[className]); maxes[className]);
cout << "****************** KDB ******************" << endl;
auto kdb = bayesnet::KDB(2); auto kdb = bayesnet::KDB(2);
kdb.fit(Xd, y, features, className, states); kdb.fit(Xd, y, features, className, states);
for (auto line : kdb.show()) { for (auto line : kdb.show()) {
@ -263,5 +266,21 @@ int main(int argc, char** argv)
} }
cout << "Score: " << kdb.score(Xd, y) << endl; cout << "Score: " << kdb.score(Xd, y) << endl;
cout << "****************** KDB ******************" << endl; cout << "****************** KDB ******************" << endl;
cout << "****************** SPODE ******************" << endl;
auto spode = bayesnet::SPODE(2);
spode.fit(Xd, y, features, className, states);
for (auto line : spode.show()) {
cout << line << endl;
}
cout << "Score: " << spode.score(Xd, y) << endl;
cout << "****************** SPODE ******************" << endl;
cout << "****************** AODE ******************" << endl;
auto aode = bayesnet::AODE();
aode.fit(Xd, y, features, className, states);
for (auto line : aode.show()) {
cout << line << endl;
}
cout << "Score: " << aode.score(Xd, y) << endl;
cout << "****************** AODE ******************" << endl;
return 0; return 0;
} }

View File

@ -1,16 +1,12 @@
#include "AODE.h" #include "AODE.h"
namespace bayesnet { namespace bayesnet {
AODE::AODE() : Ensemble() {}
AODE::AODE() : Ensemble()
{
models = vector<SPODE>();
}
void AODE::train() void AODE::train()
{ {
models.clear();
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
SPODE model = SPODE(i); models.push_back(std::make_unique<SPODE>(i));
models.push_back(model);
} }
} }
} }

View File

@ -69,6 +69,20 @@ namespace bayesnet {
auto ypred = torch::tensor(yp, torch::kInt64); auto ypred = torch::tensor(yp, torch::kInt64);
return ypred; return ypred;
} }
vector<int> BaseClassifier::predict(vector<vector<int>>& X)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
}
auto m_ = X[0].size();
auto n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
}
auto yp = model.predict(Xd);
return yp;
}
float BaseClassifier::score(Tensor& X, Tensor& y) float BaseClassifier::score(Tensor& X, Tensor& y)
{ {
if (!fitted) { if (!fitted) {

View File

@ -31,6 +31,7 @@ namespace bayesnet {
BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states); BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
void addNodes(); void addNodes();
Tensor predict(Tensor& X); Tensor predict(Tensor& X);
vector<int> predict(vector<vector<int>>& X);
float score(Tensor& X, Tensor& y); float score(Tensor& X, Tensor& y);
float score(vector<vector<int>>& X, vector<int>& y); float score(vector<vector<int>>& X, vector<int>& y);
vector<string> show(); vector<string> show();

View File

@ -1,2 +1,2 @@
add_library(BayesNet utils.cc Network.cc Node.cc Metrics.cc BaseClassifier.cc KDB.cc TAN.cc SPODE.cc) add_library(BayesNet utils.cc Network.cc Node.cc Metrics.cc BaseClassifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc)
target_link_libraries(BayesNet "${TORCH_LIBRARIES}") target_link_libraries(BayesNet "${TORCH_LIBRARIES}")

View File

@ -4,10 +4,10 @@ namespace bayesnet {
using namespace std; using namespace std;
using namespace torch; using namespace torch;
Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()) {} Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()), fitted(false) {}
Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states) Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states)
{ {
dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1); dataset = cat({ X, y.view({y.size(0), 1}) }, 1);
this->features = features; this->features = features;
this->className = className; this->className = className;
this->states = states; this->states = states;
@ -18,34 +18,35 @@ namespace bayesnet {
// Train models // Train models
n_models = models.size(); n_models = models.size();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
models[i].fit(X, y, features, className, states); models[i]->fit(Xv, yv, features, className, states);
} }
fitted = true;
return *this; return *this;
} }
Ensemble& Ensemble::fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = X;
this->y = y;
auto sizes = X.sizes();
m = sizes[0];
n = sizes[1];
return build(features, className, states);
}
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{ {
this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64); this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
Xv = X;
for (int i = 0; i < X.size(); ++i) { for (int i = 0; i < X.size(); ++i) {
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64)); this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
} }
this->y = torch::tensor(y, kInt64); this->y = torch::tensor(y, kInt64);
yv = y;
return build(features, className, states); return build(features, className, states);
} }
Tensor Ensemble::predict(Tensor& X) Tensor Ensemble::predict(Tensor& X)
{ {
Tensor y_pred = torch::zeros({ X.size(0), n_models }, torch::kInt64); if (!fitted) {
for (auto i = 0; i < n_models; ++i) { throw logic_error("Ensemble has not been fitted");
y_pred.index_put_({ "...", i }, models[i].predict(X));
} }
Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt64);
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, models[i]->predict(X));
}
return torch::tensor(voting(y_pred));
}
vector<int> Ensemble::voting(Tensor& y_pred)
{
auto y_pred_ = y_pred.accessor<int64_t, 2>(); auto y_pred_ = y_pred.accessor<int64_t, 2>();
vector<int> y_pred_final; vector<int> y_pred_final;
for (int i = 0; i < y_pred.size(0); ++i) { for (int i = 0; i < y_pred.size(0); ++i) {
@ -56,18 +57,45 @@ namespace bayesnet {
auto indices = argsort(votes); auto indices = argsort(votes);
y_pred_final.push_back(indices[0]); y_pred_final.push_back(indices[0]);
} }
return torch::tensor(y_pred_final, torch::kInt64); return y_pred_final;
} }
float Ensemble::score(Tensor& X, Tensor& y) vector<int> Ensemble::predict(vector<vector<int>>& X)
{ {
Tensor y_pred = predict(X); if (!fitted) {
return (y_pred == y).sum().item<float>() / y.size(0); throw logic_error("Ensemble has not been fitted");
}
long m_ = X[0].size();
long n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
}
Tensor y_pred = torch::zeros({ m_, n_models }, kInt64);
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt64));
}
return voting(y_pred);
}
float Ensemble::score(vector<vector<int>>& X, vector<int>& y)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
}
auto y_pred = predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == y[i]) {
correct++;
}
}
return (double)correct / y_pred.size();
} }
vector<string> Ensemble::show() vector<string> Ensemble::show()
{ {
vector<string> result; vector<string> result;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
auto res = models[i].show(); auto res = models[i]->show();
result.insert(result.end(), res.begin(), res.end()); result.insert(result.end(), res.begin(), res.end());
} }
return result; return result;

View File

@ -10,26 +10,31 @@ using namespace torch;
namespace bayesnet { namespace bayesnet {
class Ensemble { class Ensemble {
private: private:
bool fitted;
long n_models; long n_models;
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states); Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
protected: protected:
vector<BaseClassifier> models; vector<unique_ptr<BaseClassifier>> models;
int m, n; // m: number of samples, n: number of features int m, n; // m: number of samples, n: number of features
Tensor X; Tensor X;
vector<vector<int>> Xv;
Tensor y; Tensor y;
vector<int> yv;
Tensor dataset; Tensor dataset;
Metrics metrics; Metrics metrics;
vector<string> features; vector<string> features;
string className; string className;
map<string, vector<int>> states; map<string, vector<int>> states;
void virtual train() = 0; void virtual train() = 0;
vector<int> voting(Tensor& y_pred);
public: public:
Ensemble(); Ensemble();
virtual ~Ensemble() = default; virtual ~Ensemble() = default;
Ensemble& fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states);
Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states); Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
Tensor predict(Tensor& X); Tensor predict(Tensor& X);
vector<int> predict(vector<vector<int>>& X);
float score(Tensor& X, Tensor& y); float score(Tensor& X, Tensor& y);
float score(vector<vector<int>>& X, vector<int>& y);
vector<string> show(); vector<string> show();
}; };
} }