Compare commits

...

5 Commits

20 changed files with 206 additions and 80 deletions

4
.vscode/launch.json vendored
View File

@@ -25,14 +25,14 @@
"program": "${workspaceFolder}/build/src/Platform/main", "program": "${workspaceFolder}/build/src/Platform/main",
"args": [ "args": [
"-m", "-m",
"TANNew", "AODELd",
"-p", "-p",
"/Users/rmontanana/Code/discretizbench/datasets", "/Users/rmontanana/Code/discretizbench/datasets",
"--stratified", "--stratified",
"-d", "-d",
"iris" "iris"
], ],
"cwd": "${workspaceFolder}/build/src/Platform", "cwd": "/Users/rmontanana/Code/discretizbench",
}, },
{ {
"name": "Build & debug active file", "name": "Build & debug active file",

View File

@@ -17,6 +17,11 @@ dependency: ## Create a dependency graph diagram of the project (build/dependenc
build: ## Build the main and BayesNetSample build: ## Build the main and BayesNetSample
cmake --build build -t main -t BayesNetSample -j 32 cmake --build build -t main -t BayesNetSample -j 32
clean: ## Clean the debug info
@echo ">>> Cleaning Debug BayesNet ...";
find . -name "*.gcda" -print0 | xargs -0 rm
@echo ">>> Done";
debug: ## Build a debug version of the project debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet ..."; @echo ">>> Building Debug BayesNet ...";
@if [ -d ./build ]; then rm -rf ./build; fi @if [ -d ./build ]; then rm -rf ./build; fi

34
src/BayesNet/AODELd.cc Normal file
View File

@@ -0,0 +1,34 @@
#include "AODELd.h"
namespace bayesnet {
using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(Ensemble::Xv, Ensemble::yv, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
features = features_;
className = className_;
states = states_;
train();
for (const auto& model : models) {
model->fit(X_, y_, features_, className_, states_);
}
n_models = models.size();
fitted = true;
return *this;
}
void AODELd::train()
{
models.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODELd>(i));
}
}
Tensor AODELd::predict(Tensor& X)
{
return Ensemble::predict(X);
}
vector<string> AODELd::graph(const string& name)
{
return Ensemble::graph(name);
}
}

20
src/BayesNet/AODELd.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef AODELD_H
#define AODELD_H
#include "Ensemble.h"
#include "Proposal.h"
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
class AODELd : public Ensemble, public Proposal {
public:
AODELd();
virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "AODE") override;
Tensor predict(Tensor& X) override;
void train() override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !AODELD_H

View File

@@ -1,4 +1,5 @@
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANNew.cc KDBNew.cc Mst.cc Proposal.cc) add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc Mst.cc Proposal.cc)
target_link_libraries(BayesNet mdlp ArffFiles "${TORCH_LIBRARIES}") target_link_libraries(BayesNet mdlp ArffFiles "${TORCH_LIBRARIES}")

View File

@@ -3,24 +3,24 @@
namespace bayesnet { namespace bayesnet {
using namespace torch; using namespace torch;
Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()), fitted(false) {} Ensemble::Ensemble() : n_models(0), metrics(Metrics()), fitted(false) {}
Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states) Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states)
{ {
dataset = cat({ X, y.view({y.size(0), 1}) }, 1); Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
this->features = features; this->features = features;
this->className = className; this->className = className;
this->states = states; this->states = states;
auto n_classes = states[className].size(); auto n_classes = states[className].size();
metrics = Metrics(dataset, features, className, n_classes); metrics = Metrics(samples, features, className, n_classes);
// Build models // Build models
train(); train();
// Train models // Train models
n_models = models.size(); n_models = models.size();
auto Xt = torch::transpose(X, 0, 1);
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
if (Xv == vector<vector<int>>()) { if (Xv.empty()) {
// fit with tensors // fit with tensors
models[i]->fit(Xt, y, features, className, states); models[i]->fit(X, y, features, className, states);
} else { } else {
// fit with vectors // fit with vectors
models[i]->fit(Xv, yv, features, className, states); models[i]->fit(Xv, yv, features, className, states);
@@ -29,9 +29,16 @@ namespace bayesnet {
fitted = true; fitted = true;
return *this; return *this;
} }
void Ensemble::generateTensorXFromVector()
{
X = torch::zeros({ static_cast<int>(Xv.size()), static_cast<int>(Xv[0].size()) }, kInt32);
for (int i = 0; i < Xv.size(); ++i) {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], kInt32));
}
}
Ensemble& Ensemble::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) Ensemble& Ensemble::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
{ {
this->X = torch::transpose(X, 0, 1); this->X = X;
this->y = y; this->y = y;
Xv = vector<vector<int>>(); Xv = vector<vector<int>>();
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)); yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
@@ -39,11 +46,8 @@ namespace bayesnet {
} }
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{ {
this->X = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, kInt32);
Xv = X; Xv = X;
for (int i = 0; i < X.size(); ++i) { generateTensorXFromVector();
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32));
}
this->y = torch::tensor(y, kInt32); this->y = torch::tensor(y, kInt32);
yv = y; yv = y;
return build(features, className, states); return build(features, className, states);
@@ -53,10 +57,11 @@ namespace bayesnet {
auto y_pred_ = y_pred.accessor<int, 2>(); auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final; vector<int> y_pred_final;
for (int i = 0; i < y_pred.size(0); ++i) { for (int i = 0; i < y_pred.size(0); ++i) {
vector<float> votes(states[className].size(), 0); vector<float> votes(y_pred.size(1), 0);
for (int j = 0; j < y_pred.size(1); ++j) { for (int j = 0; j < y_pred.size(1); ++j) {
votes[y_pred_[i][j]] += 1; votes[y_pred_[i][j]] += 1;
} }
// argsort in descending order
auto indices = argsort(votes); auto indices = argsort(votes);
y_pred_final.push_back(indices[0]); y_pred_final.push_back(indices[0]);
} }
@@ -70,13 +75,12 @@ namespace bayesnet {
Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32); Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32);
//Create a threadpool //Create a threadpool
auto threads{ vector<thread>() }; auto threads{ vector<thread>() };
auto lock = mutex(); mutex mtx;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
threads.push_back(thread([&, i]() { threads.push_back(thread([&, i]() {
auto ypredict = models[i]->predict(X); auto ypredict = models[i]->predict(X);
lock.lock(); lock_guard<mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict); y_pred.index_put_({ "...", i }, ypredict);
lock.unlock();
})); }));
} }
for (auto& thread : threads) { for (auto& thread : threads) {

View File

@@ -10,23 +10,23 @@ using namespace torch;
namespace bayesnet { namespace bayesnet {
class Ensemble : public BaseClassifier { class Ensemble : public BaseClassifier {
private: private:
bool fitted;
long n_models;
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states); Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
protected: protected:
unsigned n_models;
bool fitted;
vector<unique_ptr<Classifier>> models; vector<unique_ptr<Classifier>> models;
int m, n; // m: number of samples, n: number of features
Tensor X; Tensor X;
vector<vector<int>> Xv; vector<vector<int>> Xv;
Tensor y; Tensor y;
vector<int> yv; vector<int> yv;
Tensor dataset; Tensor samples;
Metrics metrics; Metrics metrics;
vector<string> features; vector<string> features;
string className; string className;
map<string, vector<int>> states; map<string, vector<int>> states;
void virtual train() = 0; void virtual train() = 0;
vector<int> voting(Tensor& y_pred); vector<int> voting(Tensor& y_pred);
void generateTensorXFromVector();
public: public:
Ensemble(); Ensemble();
virtual ~Ensemble() = default; virtual ~Ensemble() = default;

View File

@@ -1,9 +1,9 @@
#include "KDBNew.h" #include "KDBLd.h"
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
KDBNew::KDBNew(int k) : KDB(k), Proposal(KDB::Xv, KDB::yv, features, className) {} KDBLd::KDBLd(int k) : KDB(k), Proposal(KDB::Xv, KDB::yv, features, className) {}
KDBNew& KDBNew::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float... // This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_; features = features_;
@@ -23,12 +23,12 @@ namespace bayesnet {
model.fit(KDB::Xv, KDB::yv, features, className); model.fit(KDB::Xv, KDB::yv, features, className);
return *this; return *this;
} }
Tensor KDBNew::predict(Tensor& X) Tensor KDBLd::predict(Tensor& X)
{ {
auto Xt = prepareX(X); auto Xt = prepareX(X);
return KDB::predict(Xt); return KDB::predict(Xt);
} }
vector<string> KDBNew::graph(const string& name) vector<string> KDBLd::graph(const string& name)
{ {
return KDB::graph(name); return KDB::graph(name);
} }

19
src/BayesNet/KDBLd.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef KDBLD_H
#define KDBLD_H
#include "KDB.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class KDBLd : public KDB, public Proposal {
private:
public:
explicit KDBLd(int k);
virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !KDBLD_H

View File

@@ -1,19 +0,0 @@
#ifndef KDBNEW_H
#define KDBNEW_H
#include "KDB.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class KDBNew : public KDB, public Proposal {
private:
public:
KDBNew(int k);
virtual ~KDBNew() = default;
KDBNew& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !KDBNew_H

35
src/BayesNet/SPODELd.cc Normal file
View File

@@ -0,0 +1,35 @@
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
SPODELd::SPODELd(int root) : SPODE(root), Proposal(SPODE::Xv, SPODE::yv, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
fit_local_discretization(states, y);
generateTensorXFromVector();
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(SPODE::Xv, SPODE::yv, features, className, states);
localDiscretizationProposal(states, model);
generateTensorXFromVector();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
model.fit(SPODE::Xv, SPODE::yv, features, className);
return *this;
}
Tensor SPODELd::predict(Tensor& X)
{
auto Xt = prepareX(X);
return SPODE::predict(Xt);
}
vector<string> SPODELd::graph(const string& name)
{
return SPODE::graph(name);
}
}

19
src/BayesNet/SPODELd.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef SPODELD_H
#define SPODELD_H
#include "SPODE.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class SPODELd : public SPODE, public Proposal {
private:
public:
explicit SPODELd(int root);
virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !SPODELD_H

View File

@@ -1,9 +1,9 @@
#include "TANNew.h" #include "TANLd.h"
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
TANNew::TANNew() : TAN(), Proposal(TAN::Xv, TAN::yv, features, className) {} TANLd::TANLd() : TAN(), Proposal(TAN::Xv, TAN::yv, features, className) {}
TANNew& TANNew::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float... // This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_; features = features_;
@@ -23,12 +23,12 @@ namespace bayesnet {
model.fit(TAN::Xv, TAN::yv, features, className); model.fit(TAN::Xv, TAN::yv, features, className);
return *this; return *this;
} }
Tensor TANNew::predict(Tensor& X) Tensor TANLd::predict(Tensor& X)
{ {
auto Xt = prepareX(X); auto Xt = prepareX(X);
return TAN::predict(Xt); return TAN::predict(Xt);
} }
vector<string> TANNew::graph(const string& name) vector<string> TANLd::graph(const string& name)
{ {
return TAN::graph(name); return TAN::graph(name);
} }

19
src/BayesNet/TANLd.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef TANLD_H
#define TANLD_H
#include "TAN.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class TANLd : public TAN, public Proposal {
private:
public:
TANLd();
virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !TANLD_H

View File

@@ -1,19 +0,0 @@
#ifndef TANNEW_H
#define TANNEW_H
#include "TAN.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class TANNew : public TAN, public Proposal {
private:
public:
TANNew();
virtual ~TANNew() = default;
TANNew& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !TANNEW_H

View File

@@ -3,6 +3,7 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
using namespace torch; using namespace torch;
// Return the indices in descending order
vector<int> argsort(vector<float>& nums) vector<int> argsort(vector<float>& nums)
{ {
int n = nums.size(); int n = nums.size();

View File

@@ -6,8 +6,10 @@
#include "TAN.h" #include "TAN.h"
#include "KDB.h" #include "KDB.h"
#include "SPODE.h" #include "SPODE.h"
#include "TANNew.h" #include "TANLd.h"
#include "KDBNew.h" #include "KDBLd.h"
#include "SPODELd.h"
#include "AODELd.h"
namespace platform { namespace platform {
class Models { class Models {
private: private:

View File

@@ -48,9 +48,9 @@ namespace platform {
cout << setw(6) << right << r["samples"].get<int>() << " "; cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " "; cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " "; cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(7) << right << r["nodes"].get<float>() << " "; cout << setw(7) << setprecision(2) << fixed << r["nodes"].get<float>() << " ";
cout << setw(7) << right << r["leaves"].get<float>() << " "; cout << setw(7) << setprecision(2) << fixed << r["leaves"].get<float>() << " ";
cout << setw(7) << right << r["depth"].get<float>() << " "; cout << setw(7) << setprecision(2) << fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score_test"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_test_std"].get<double>() << " "; cout << setw(8) << right << setprecision(6) << fixed << r["score_test"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_test_std"].get<double>() << " ";
cout << setw(10) << right << setprecision(6) << fixed << r["test_time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["test_time_std"].get<double>() << " "; cout << setw(10) << right << setprecision(6) << fixed << r["test_time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["test_time_std"].get<double>() << " ";
cout << " " << r["hyperparameters"].get<string>(); cout << " " << r["hyperparameters"].get<string>();

View File

@@ -102,9 +102,10 @@ int main(int argc, char** argv)
/* /*
* Begin Processing * Begin Processing
*/ */
auto env = platform::DotEnv();
auto experiment = platform::Experiment(); auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0"); experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("BayesNet"); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy"); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
for (auto seed : seeds) { for (auto seed : seeds) {
experiment.addRandomSeed(seed); experiment.addRandomSeed(seed);

View File

@@ -2,14 +2,18 @@
#define MODEL_REGISTER_H #define MODEL_REGISTER_H
static platform::Registrar registrarT("TAN", static platform::Registrar registrarT("TAN",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
static platform::Registrar registrarTN("TANNew", static platform::Registrar registrarTLD("TANLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANNew();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
static platform::Registrar registrarS("SPODE", static platform::Registrar registrarS("SPODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
static platform::Registrar registrarSLD("SPODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
static platform::Registrar registrarK("KDB", static platform::Registrar registrarK("KDB",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);});
static platform::Registrar registrarKN("KDBNew", static platform::Registrar registrarKLD("KDBLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBNew(2);}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
static platform::Registrar registrarA("AODE", static platform::Registrar registrarA("AODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
#endif #endif