Compare commits

..

2 Commits

Author SHA1 Message Date
506ef34c6f Add report output to main 2023-08-05 20:29:05 +02:00
7f45495837 Refactor New classifiers to extract predict 2023-08-05 18:39:48 +02:00
12 changed files with 135 additions and 53 deletions

View File

@@ -35,7 +35,6 @@ namespace bayesnet {
}
// 2. Compute class conditional mutual information I(Xi;XjIC), f or each
auto conditionalEdgeWeights = metrics.conditionalEdge();
cout << "Conditional edge weights: " << conditionalEdgeWeights << endl;
// 3. Let the used variable list, S, be empty.
vector<int> S;
// 4. Let the DAG network being constructed, BN, begin with a single

View File

@@ -10,15 +10,12 @@ namespace bayesnet {
className = className_;
Xf = X_;
y = y_;
model.initialize();
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
fit_local_discretization(states, y);
generateTensorXFromVector();
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
cout << "KDBNew: Fitting model" << endl;
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
KDB::fit(KDB::Xv, KDB::yv, features, className, states);
cout << "KDBNew: Model fitted" << endl;
localDiscretizationProposal(states, model);
generateTensorXFromVector();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
@@ -26,20 +23,10 @@ namespace bayesnet {
model.fit(KDB::Xv, KDB::yv, features, className);
return *this;
}
void KDBNew::train()
{
KDB::train();
}
Tensor KDBNew::predict(Tensor& X)
{
auto Xtd = torch::zeros_like(X, torch::kInt32);
for (int i = 0; i < X.size(0); ++i) {
auto Xt = vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xd = discretizers[features[i]]->transform(Xt);
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
}
cout << "KDBNew Xtd: " << Xtd.sizes() << endl;
return KDB::predict(Xtd);
auto Xt = prepareX(X);
return KDB::predict(Xt);
}
vector<string> KDBNew::graph(const string& name)
{

View File

@@ -13,7 +13,6 @@ namespace bayesnet {
KDBNew& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") override;
Tensor predict(Tensor& X) override;
void train() override;
static inline string version() { return "0.0.1"; };
};
}

View File

@@ -47,25 +47,25 @@ namespace bayesnet {
//
//
//
auto tmp = discretizers[feature]->transform(xvf);
Xv[index] = tmp;
auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
//Update new states of the feature/node
states[feature] = xStates;
// auto tmp = discretizers[feature]->transform(xvf);
// Xv[index] = tmp;
// auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
// iota(xStates.begin(), xStates.end(), 0);
// //Update new states of the feature/node
// states[feature] = xStates;
}
if (upgrade) {
// Discretize again X (only the affected indices) with the new fitted discretizers
for (auto index : indicesToReDiscretize) {
auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
Xv[index] = discretizers[pFeatures[index]]->transform(Xt);
auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
//Update new states of the feature/node
states[pFeatures[index]] = xStates;
}
}
// if (upgrade) {
// // Discretize again X (only the affected indices) with the new fitted discretizers
// for (auto index : indicesToReDiscretize) {
// auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
// auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
// Xv[index] = discretizers[pFeatures[index]]->transform(Xt);
// auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
// iota(xStates.begin(), xStates.end(), 0);
// //Update new states of the feature/node
// states[pFeatures[index]] = xStates;
// }
// }
}
void Proposal::fit_local_discretization(map<string, vector<int>>& states, torch::Tensor& y)
{
@@ -89,4 +89,14 @@ namespace bayesnet {
iota(yStates.begin(), yStates.end(), 0);
states[pClassName] = yStates;
}
torch::Tensor Proposal::prepareX(torch::Tensor& X)
{
auto Xtd = torch::zeros_like(X, torch::kInt32);
for (int i = 0; i < X.size(0); ++i) {
auto Xt = vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xd = discretizers[pFeatures[i]]->transform(Xt);
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
}
return Xtd;
}
}

View File

@@ -5,6 +5,7 @@
#include <torch/torch.h>
#include "Network.h"
#include "CPPFImdlp.h"
#include "Classifier.h"
namespace bayesnet {
class Proposal {
@@ -12,6 +13,7 @@ namespace bayesnet {
Proposal(vector<vector<int>>& Xv_, vector<int>& yv_, vector<string>& features_, string& className_);
virtual ~Proposal();
protected:
torch::Tensor prepareX(torch::Tensor& X);
void localDiscretizationProposal(map<string, vector<int>>& states, Network& model);
void fit_local_discretization(map<string, vector<int>>& states, torch::Tensor& y);
torch::Tensor Xf; // X continuous nxm tensor

View File

@@ -15,9 +15,7 @@ namespace bayesnet {
generateTensorXFromVector();
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
cout << "TANNew: Fitting model" << endl;
TAN::fit(TAN::Xv, TAN::yv, features, className, states);
cout << "TANNew: Model fitted" << endl;
localDiscretizationProposal(states, model);
generateTensorXFromVector();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
@@ -27,14 +25,8 @@ namespace bayesnet {
}
Tensor TANNew::predict(Tensor& X)
{
auto Xtd = torch::zeros_like(X, torch::kInt32);
for (int i = 0; i < X.size(0); ++i) {
auto Xt = vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xd = discretizers[features[i]]->transform(Xt);
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
}
cout << "TANNew Xtd: " << Xtd.sizes() << endl;
return TAN::predict(Xtd);
auto Xt = prepareX(X);
return TAN::predict(Xt);
}
vector<string> TANNew::graph(const string& name)
{

View File

@@ -4,5 +4,5 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc Report.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,6 +1,7 @@
#include "Experiment.h"
#include "Datasets.h"
#include "Models.h"
#include "Report.h"
namespace platform {
using json = nlohmann::json;
@@ -86,6 +87,13 @@ namespace platform {
file.close();
}
void Experiment::report()
{
json data = build_json();
Report report(data);
report.show();
}
void Experiment::show()
{
json data = build_json();
@@ -146,11 +154,6 @@ namespace platform {
auto y_test = y.index({ test_t });
cout << nfold + 1 << ", " << flush;
clf->fit(X_train, y_train, features, className, states);
cout << endl;
auto lines = clf->show();
for (auto line : lines) {
cout << line << endl;
}
nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates();

View File

@@ -108,6 +108,7 @@ namespace platform {
void cross_validation(const string& path, const string& fileName);
void go(vector<string> filesToProcess, const string& path);
void show();
void report();
};
}
#endif

66
src/Platform/Report.cc Normal file
View File

@@ -0,0 +1,66 @@
#include "Report.h"
namespace platform {
string headerLine(const string& text)
{
int n = MAXL - text.length() - 3;
return "* " + text + string(n, ' ') + "*\n";
}
string Report::fromVector(const string& key)
{
string result = "";
for (auto& item : data[key]) {
result += to_string(item) + ", ";
}
return "[" + result.substr(0, result.length() - 2) + "]";
}
string fVector(const json& data)
{
string result = "";
for (const auto& item : data) {
result += to_string(item) + ", ";
}
return "[" + result.substr(0, result.length() - 2) + "]";
}
void Report::show()
{
header();
body();
}
void Report::header()
{
cout << string(MAXL, '*') << endl;
cout << headerLine("Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>());
cout << headerLine(data["title"].get<string>());
cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
cout << headerLine("Execution took " + to_string(data["duration"].get<float>()) + " seconds, " + to_string(data["duration"].get<float>() / 3600) + " hours, on " + data["platform"].get<string>());
cout << headerLine("Score is " + data["score_name"].get<string>());
cout << string(MAXL, '*') << endl;
cout << endl;
}
void Report::body()
{
cout << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl;
cout << "============================== ====== ===== === ======= ======= ======= =============== ================= ===============" << endl;
for (const auto& r : data["results"]) {
cout << setw(30) << left << r["dataset"].get<string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(7) << right << r["nodes"].get<float>() << " ";
cout << setw(7) << right << r["leaves"].get<float>() << " ";
cout << setw(7) << right << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score_test"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_test_std"].get<double>() << " ";
cout << setw(10) << right << setprecision(6) << fixed << r["test_time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["test_time_std"].get<double>() << " ";
cout << " " << r["hyperparameters"].get<string>();
cout << endl;
cout << string(MAXL, '*') << endl;
cout << headerLine("Train scores: " + fVector(r["scores_train"]));
cout << headerLine("Test scores: " + fVector(r["scores_test"]));
cout << headerLine("Train times: " + fVector(r["times_train"]));
cout << headerLine("Test times: " + fVector(r["times_test"]));
cout << string(MAXL, '*') << endl;
}
}
}

23
src/Platform/Report.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef REPORT_H
#define REPORT_H
#include <string>
#include <iostream>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
const int MAXL = 121;
namespace platform {
using namespace std;
class Report {
public:
explicit Report(json data_) { data = data_; };
virtual ~Report() = default;
void show();
private:
void header();
void body();
string fromVector(const string& key);
json data;
};
};
#endif

View File

@@ -116,7 +116,7 @@ int main(int argc, char** argv)
if (saveResults)
experiment.save(PATH_RESULTS);
else
experiment.show();
experiment.report();
cout << "Done!" << endl;
return 0;
}