const functions

This commit is contained in:
Ricardo Montañana Gómez 2023-08-08 01:53:41 +02:00
parent ef1bffcac3
commit 323444b74a
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
27 changed files with 109 additions and 87 deletions

3
.vscode/launch.json vendored
View File

@ -25,8 +25,7 @@
"program": "${workspaceFolder}/build/src/Platform/main",
"args": [
"-m",
"AODE",
"--discretize",
"AODELd",
"-p",
"/Users/rmontanana/Code/discretizbench/datasets",
"--stratified",

View File

@ -9,7 +9,7 @@ namespace bayesnet {
models.push_back(std::make_unique<SPODE>(i));
}
}
vector<string> AODE::graph(const string& title)
vector<string> AODE::graph(const string& title) const
{
return Ensemble::graph(title);
}

View File

@ -9,7 +9,7 @@ namespace bayesnet {
public:
AODE();
virtual ~AODE() {};
vector<string> graph(const string& title = "AODE") override;
vector<string> graph(const string& title = "AODE") const override;
};
}
#endif

View File

@ -1,37 +1,46 @@
#include "AODELd.h"
#include "Models.h"
namespace bayesnet {
using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_;
className = className_;
states = states_;
buildModel();
trainModel();
n_models = models.size();
fitted = true;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
fit_local_discretization(states, y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
Ensemble::fit(dataset, features, className, states);
return *this;
}
void AODELd::buildModel()
{
models.clear();
cout << "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaah!" << endl;
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODELd>(i));
models.push_back(Models::instance().create("SPODELd"));
models[i]->test();
}
n_models = models.size();
}
void AODELd::trainModel()
{
cout << "dataset: " << dataset.sizes() << endl;
cout << "features: " << features.size() << endl;
cout << "className: " << className << endl;
cout << "states: " << states.size() << endl;
for (const auto& model : models) {
model->fit(dataset, features, className, states);
model->test();
}
}
Tensor AODELd::predict(Tensor& X)
{
return Ensemble::predict(X);
}
vector<string> AODELd::graph(const string& name)
vector<string> AODELd::graph(const string& name) const
{
return Ensemble::graph(name);
}

View File

@ -7,15 +7,14 @@
namespace bayesnet {
using namespace std;
class AODELd : public Ensemble, public Proposal {
private:
protected:
void trainModel() override;
void buildModel() override;
public:
AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) override;
virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "AODE") override;
Tensor predict(Tensor& X) override;
vector<string> graph(const string& name = "AODE") const override;
static inline string version() { return "0.0.1"; };
};
}

View File

@ -5,6 +5,8 @@
namespace bayesnet {
using namespace std;
class BaseClassifier {
protected:
virtual void trainModel() = 0;
public:
// X is nxm vector, y is nx1 vector
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
@ -16,14 +18,14 @@ namespace bayesnet {
vector<int> virtual predict(vector<vector<int>>& X) = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes() = 0;
int virtual getNumberOfEdges() = 0;
int virtual getNumberOfStates() = 0;
vector<string> virtual show() = 0;
vector<string> virtual graph(const string& title = "") = 0;
int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() const = 0;
vector<string> virtual show() const = 0;
vector<string> virtual graph(const string& title = "") const = 0;
const string inline getVersion() const { return "0.1.0"; };
vector<string> virtual topological_order() = 0;
void virtual dump_cpt() = 0;
void virtual dump_cpt()const = 0;
};
}
#endif

View File

@ -1,5 +1,7 @@
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc Mst.cc Proposal.cc)
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNet mdlp ArffFiles "${TORCH_LIBRARIES}")

View File

@ -112,7 +112,7 @@ namespace bayesnet {
}
return model.score(X, y);
}
vector<string> Classifier::show()
vector<string> Classifier::show() const
{
return model.show();
}
@ -124,16 +124,16 @@ namespace bayesnet {
}
model.addNode(className);
}
int Classifier::getNumberOfNodes()
int Classifier::getNumberOfNodes() const
{
// Features does not include class
return fitted ? model.getFeatures().size() + 1 : 0;
}
int Classifier::getNumberOfEdges()
int Classifier::getNumberOfEdges() const
{
return fitted ? model.getEdges().size() : 0;
return fitted ? model.getNumEdges() : 0;
}
int Classifier::getNumberOfStates()
int Classifier::getNumberOfStates() const
{
return fitted ? model.getStates() : 0;
}
@ -141,7 +141,7 @@ namespace bayesnet {
{
return model.topological_sort();
}
void Classifier::dump_cpt()
void Classifier::dump_cpt() const
{
model.dump_cpt();
}

View File

@ -23,7 +23,7 @@ namespace bayesnet {
map<string, vector<int>> states;
void checkFitParameters();
virtual void buildModel() = 0;
virtual void trainModel();
void trainModel() override;
public:
Classifier(Network model);
virtual ~Classifier() = default;
@ -31,16 +31,16 @@ namespace bayesnet {
Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) override;
void addNodes();
int getNumberOfNodes() override;
int getNumberOfEdges() override;
int getNumberOfStates() override;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
Tensor predict(Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
vector<string> show() override;
vector<string> topological_order() override;
void dump_cpt() override;
vector<string> show() const override;
vector<string> topological_order() override;
void dump_cpt() const override;
};
}
#endif

View File

@ -94,7 +94,7 @@ namespace bayesnet {
}
return (double)correct / y_pred.size();
}
vector<string> Ensemble::show()
vector<string> Ensemble::show() const
{
auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) {
@ -103,7 +103,7 @@ namespace bayesnet {
}
return result;
}
vector<string> Ensemble::graph(const string& title)
vector<string> Ensemble::graph(const string& title) const
{
auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) {
@ -112,7 +112,7 @@ namespace bayesnet {
}
return result;
}
int Ensemble::getNumberOfNodes()
int Ensemble::getNumberOfNodes() const
{
int nodes = 0;
for (auto i = 0; i < n_models; ++i) {
@ -120,7 +120,7 @@ namespace bayesnet {
}
return nodes;
}
int Ensemble::getNumberOfEdges()
int Ensemble::getNumberOfEdges() const
{
int edges = 0;
for (auto i = 0; i < n_models; ++i) {
@ -128,7 +128,7 @@ namespace bayesnet {
}
return edges;
}
int Ensemble::getNumberOfStates()
int Ensemble::getNumberOfStates() const
{
int nstates = 0;
for (auto i = 0; i < n_models; ++i) {

View File

@ -23,16 +23,16 @@ namespace bayesnet {
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
int getNumberOfNodes() override;
int getNumberOfEdges() override;
int getNumberOfStates() override;
vector<string> show() override;
vector<string> graph(const string& title) override;
vector<string> topological_order() override
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
vector<string> show() const override;
vector<string> graph(const string& title) const override;
vector<string> topological_order() override
{
return vector<string>();
}
void dump_cpt() override
void dump_cpt() const override
{
}
};

View File

@ -79,7 +79,7 @@ namespace bayesnet {
exit_cond = num == n_edges || candidates.size(0) == 0;
}
}
vector<string> KDB::graph(const string& title)
vector<string> KDB::graph(const string& title) const
{
string header{ title };
if (title == "KDB") {

View File

@ -15,7 +15,7 @@ namespace bayesnet {
public:
explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {};
vector<string> graph(const string& name = "KDB") override;
vector<string> graph(const string& name = "KDB") const override;
};
}
#endif

View File

@ -23,7 +23,7 @@ namespace bayesnet {
auto Xt = prepareX(X);
return KDB::predict(Xt);
}
vector<string> KDBLd::graph(const string& name)
vector<string> KDBLd::graph(const string& name) const
{
return KDB::graph(name);
}

View File

@ -11,7 +11,7 @@ namespace bayesnet {
explicit KDBLd(int k);
virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") override;
vector<string> graph(const string& name = "KDB") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};

View File

@ -43,15 +43,15 @@ namespace bayesnet {
}
nodes[name] = std::make_unique<Node>(name);
}
vector<string> Network::getFeatures()
vector<string> Network::getFeatures() const
{
return features;
}
int Network::getClassNumStates()
int Network::getClassNumStates() const
{
return classNumStates;
}
int Network::getStates()
int Network::getStates() const
{
int result = 0;
for (auto& node : nodes) {
@ -59,7 +59,7 @@ namespace bayesnet {
}
return result;
}
string Network::getClassName()
string Network::getClassName() const
{
return className;
}
@ -343,7 +343,7 @@ namespace bayesnet {
transform(result.begin(), result.end(), result.begin(), [sum](double& value) { return value / sum; });
return result;
}
vector<string> Network::show()
vector<string> Network::show() const
{
vector<string> result;
// Draw the network
@ -356,7 +356,7 @@ namespace bayesnet {
}
return result;
}
vector<string> Network::graph(const string& title)
vector<string> Network::graph(const string& title) const
{
auto output = vector<string>();
auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
@ -370,7 +370,7 @@ namespace bayesnet {
output.push_back("}\n");
return output;
}
vector<pair<string, string>> Network::getEdges()
vector<pair<string, string>> Network::getEdges() const
{
auto edges = vector<pair<string, string>>();
for (const auto& node : nodes) {
@ -382,6 +382,10 @@ namespace bayesnet {
}
return edges;
}
int Network::getNumEdges() const
{
return getEdges().size();
}
vector<string> Network::topological_sort()
{
/* Check if al the fathers of every node are before the node */
@ -420,7 +424,7 @@ namespace bayesnet {
}
return result;
}
void Network::dump_cpt()
void Network::dump_cpt() const
{
for (auto& node : nodes) {
cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl;

View File

@ -37,11 +37,12 @@ namespace bayesnet {
void addNode(const string&);
void addEdge(const string&, const string&);
map<string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures();
int getStates();
vector<pair<string, string>> getEdges();
int getClassNumStates();
string getClassName();
vector<string> getFeatures() const;
int getStates() const;
vector<pair<string, string>> getEdges() const;
int getNumEdges() const;
int getClassNumStates() const;
string getClassName() const;
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
void fit(const torch::Tensor&, const torch::Tensor&, const vector<string>&, const string&);
void fit(const torch::Tensor&, const vector<string>&, const string&);
@ -54,10 +55,10 @@ namespace bayesnet {
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const vector<vector<int>>&, const vector<int>&);
vector<string> topological_sort();
vector<string> show();
vector<string> graph(const string& title); // Returns a vector of strings representing the graph in graphviz format
vector<string> show() const;
vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format
void initialize();
void dump_cpt();
void dump_cpt() const;
inline string version() { return "0.1.0"; }
};
}

View File

@ -2,7 +2,7 @@
#include "ArffFiles.h"
namespace bayesnet {
Proposal::Proposal(torch::Tensor& dataset_, vector<string>& features_, string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_), m(dataset_.size(1)), n(dataset_.size(0) - 1) {}
Proposal::Proposal(torch::Tensor& dataset_, vector<string>& features_, string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
Proposal::~Proposal()
{
for (auto& [key, value] : discretizers) {
@ -32,9 +32,9 @@ namespace bayesnet {
indices.push_back(-1); // Add class index
transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); });
// Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y)
vector<string> yJoinParents(indices.size());
vector<string> yJoinParents(Xf.size(1));
for (auto idx : indices) {
for (int i = 0; i < n; ++i) {
for (int i = 0; i < Xf.size(1); ++i) {
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
}
}
@ -64,10 +64,13 @@ namespace bayesnet {
//Update new states of the feature/node
states[pFeatures[index]] = xStates;
}
model.fit(pDataset, pFeatures, pClassName);
}
}
void Proposal::fit_local_discretization(map<string, vector<int>>& states, torch::Tensor& y)
{
int m = Xf.size(1);
int n = Xf.size(0);
pDataset = torch::zeros({ n + 1, m }, kInt32);
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// discretize input data by feature(row)

View File

@ -19,7 +19,6 @@ namespace bayesnet {
torch::Tensor Xf; // X continuous nxm tensor
torch::Tensor y; // y discrete nx1 tensor
map<string, mdlp::CPPFImdlp*> discretizers;
int m, n;
private:
torch::Tensor& pDataset; // (n+1)xm tensor
vector<string>& pFeatures;

View File

@ -17,7 +17,7 @@ namespace bayesnet {
}
}
}
vector<string> SPODE::graph(const string& name)
vector<string> SPODE::graph(const string& name) const
{
return model.graph(name);
}

View File

@ -11,7 +11,7 @@ namespace bayesnet {
public:
explicit SPODE(int root);
virtual ~SPODE() {};
vector<string> graph(const string& name = "SPODE") override;
vector<string> graph(const string& name = "SPODE") const override;
};
}
#endif

View File

@ -2,10 +2,11 @@
namespace bayesnet {
using namespace std;
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) { cout << "SPODELd constructor" << endl; }
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
cout << "YOOOOOOOOOOOOOOOOOOOo" << endl;
features = features_;
className = className_;
Xf = X_;
@ -16,7 +17,6 @@ namespace bayesnet {
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(dataset, features, className, states);
localDiscretizationProposal(states, model);
//model.fit(SPODE::Xv, SPODE::yv, features, className);
return *this;
}
Tensor SPODELd::predict(Tensor& X)
@ -24,7 +24,11 @@ namespace bayesnet {
auto Xt = prepareX(X);
return SPODE::predict(Xt);
}
vector<string> SPODELd::graph(const string& name)
void SPODELd::test()
{
cout << "SPODELd test" << endl;
}
vector<string> SPODELd::graph(const string& name) const
{
return SPODE::graph(name);
}

View File

@ -6,12 +6,12 @@
namespace bayesnet {
using namespace std;
class SPODELd : public SPODE, public Proposal {
private:
public:
void test();
explicit SPODELd(int root);
virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") override;
vector<string> graph(const string& name = "SPODE") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};

View File

@ -34,7 +34,7 @@ namespace bayesnet {
model.addEdge(className, feature);
}
}
vector<string> TAN::graph(const string& title)
vector<string> TAN::graph(const string& title) const
{
return model.graph(title);
}

View File

@ -11,7 +11,7 @@ namespace bayesnet {
public:
TAN();
virtual ~TAN() {};
vector<string> graph(const string& name = "TAN") override;
vector<string> graph(const string& name = "TAN") const override;
};
}
#endif

View File

@ -16,15 +16,15 @@ namespace bayesnet {
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
TAN::fit(dataset, features, className, states);
localDiscretizationProposal(states, model);
//model.fit(dataset, features, className);
return *this;
}
Tensor TANLd::predict(Tensor& X)
{
auto Xt = prepareX(X);
return TAN::predict(Xt);
}
vector<string> TANLd::graph(const string& name)
vector<string> TANLd::graph(const string& name) const
{
return TAN::graph(name);
}

View File

@ -11,7 +11,7 @@ namespace bayesnet {
TANLd();
virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") override;
vector<string> graph(const string& name = "TAN") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};