Compare commits

...

31 Commits

Author SHA1 Message Date
1a534888d6 Fix report format 2023-08-19 23:30:44 +02:00
59ffd179f4 Fix report format 2023-08-19 21:26:48 +02:00
9972738deb Add list datasets and add locale format 2023-08-19 19:05:16 +02:00
bafcb26bb6 Add manage to build target 2023-08-18 13:43:53 +02:00
2d7999d5f2 Add manage to release targets 2023-08-18 13:43:13 +02:00
a6bb22dfb5 Complete first BoostAODE 2023-08-18 11:50:34 +02:00
704dc937be Remove FeatureSel, add SelectKBest to BayesMetrics 2023-08-16 19:05:18 +02:00
a3e665eed6 make weights double 2023-08-16 12:46:09 +02:00
918a7b4180 Remove unneeded output 2023-08-16 12:36:38 +02:00
80b20f35b4 Fix weights mistakes in computation 2023-08-16 12:32:51 +02:00
4d4780c1d5 Add BoostAODE model based on AODE 2023-08-15 16:16:04 +02:00
fa612c531e Complete Adding weights to Models 2023-08-15 15:59:56 +02:00
24b68f9ae2 Add weigths as parameter 2023-08-15 15:04:56 +02:00
a062ebf445 Merge pull request 'reports' (#4) from reports into boostAode
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/4
2023-08-14 16:58:48 +00:00
2a3fc9aa45 Add colors and enhace input control 2023-08-14 17:03:06 +02:00
55d21294d5 Add class Paths and enhance input 2023-08-14 00:40:31 +02:00
3691cb4a61 Add totals and filter by scoreName and model 2023-08-13 18:13:00 +02:00
054567c65a Add sorting capacity 2023-08-13 17:10:18 +02:00
2729b92f06 Summary list 2023-08-13 16:19:17 +02:00
f26ea1f0ac Add weights to BayesMetrics 2023-08-13 12:56:06 +02:00
af0419c9da First approx with const 1 weights 2023-08-13 00:59:02 +02:00
90c92e5c56 Merge pull request 'Add states as result in Proposal methods' (#3) from optimize_memory into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/3
2023-08-12 14:16:55 +00:00
182b52a887 Add states as result in Proposal methods 2023-08-12 16:16:17 +02:00
6679b90a82 Merge pull request 'optimize_memory' (#2) from optimize_memory into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/2
2023-08-12 14:15:03 +00:00
405887f833 Solved Ld poor results 2023-08-12 11:49:18 +02:00
3a85481a5a Redo pass states to Network Fit needed in crossval
fix mistake in headerline (report)
2023-08-12 11:10:53 +02:00
0ad5505c16 Spodeld working with poor accuracy 2023-08-10 02:06:18 +02:00
323444b74a const functions 2023-08-08 01:53:41 +02:00
ef1bffcac3 Fixed normal classifiers 2023-08-07 13:50:11 +02:00
06db8f51ce Refactor library and models to lighten data stored
Refactro Ensemble to inherit from Classifier insted of BaseClassifier
2023-08-07 12:49:37 +02:00
e74565ba01 update clang-tidy 2023-08-07 00:44:12 +02:00
58 changed files with 1157 additions and 443 deletions

View File

@@ -13,5 +13,4 @@ HeaderFilterRegex: 'src/*'
AnalyzeTemporaryDtors: false AnalyzeTemporaryDtors: false
WarningsAsErrors: '' WarningsAsErrors: ''
FormatStyle: file FormatStyle: file
FormatStyleOptions: ''
... ...

22
.vscode/launch.json vendored
View File

@@ -25,15 +25,35 @@
"program": "${workspaceFolder}/build/src/Platform/main", "program": "${workspaceFolder}/build/src/Platform/main",
"args": [ "args": [
"-m", "-m",
"AODELd", "BoostAODE",
"-p", "-p",
"/Users/rmontanana/Code/discretizbench/datasets", "/Users/rmontanana/Code/discretizbench/datasets",
"--discretize",
"--stratified", "--stratified",
"-d", "-d",
"iris" "iris"
], ],
"cwd": "/Users/rmontanana/Code/discretizbench", "cwd": "/Users/rmontanana/Code/discretizbench",
}, },
{
"type": "lldb",
"request": "launch",
"name": "manage",
"program": "${workspaceFolder}/build/src/Platform/manage",
"args": [
"-n",
"20"
],
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "list",
"program": "${workspaceFolder}/build/src/Platform/list",
"args": [],
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{ {
"name": "Build & debug active file", "name": "Build & debug active file",
"type": "cppdbg", "type": "cppdbg",

23
.vscode/tasks.json vendored
View File

@@ -32,6 +32,29 @@
], ],
"group": "build", "group": "build",
"detail": "Task generated by Debugger." "detail": "Task generated by Debugger."
},
{
"type": "cppbuild",
"label": "C/C++: g++ build active file",
"command": "/usr/bin/g++",
"args": [
"-fdiagnostics-color=always",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "Task generated by Debugger."
} }
] ]
} }

View File

@@ -15,7 +15,7 @@ dependency: ## Create a dependency graph diagram of the project (build/dependenc
cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
build: ## Build the main and BayesNetSample build: ## Build the main and BayesNetSample
cmake --build build -t main -t BayesNetSample -j 32 cmake --build build -t main -t BayesNetSample -t manage -t list -j 32
clean: ## Clean the debug info clean: ## Clean the debug info
@echo ">>> Cleaning Debug BayesNet ..."; @echo ">>> Cleaning Debug BayesNet ...";
@@ -35,7 +35,7 @@ release: ## Build a Release version of the project
@if [ -d ./build ]; then rm -rf ./build; fi @if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build; @mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \ cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
cmake --build build -t main -t BayesNetSample -j 32; cmake --build build -t main -t BayesNetSample -t manage -t list -j 32;
@echo ">>> Done"; @echo ">>> Done";
test: ## Run tests test: ## Run tests

View File

@@ -1,2 +1 @@
add_library(ArffFiles ArffFiles.cc) add_library(ArffFiles ArffFiles.cc)
#target_link_libraries(BayesNet "${TORCH_LIBRARIES}")

View File

@@ -141,43 +141,58 @@ int main(int argc, char** argv)
/* /*
* Begin Processing * Begin Processing
*/ */
auto handler = ArffFiles(); auto ypred = torch::tensor({ 1,2,3,2,2,3,4,5,2,1 });
handler.load(complete_file_name, class_last); auto y = torch::tensor({ 0,0,0,0,2,3,4,0,0,0 });
// Get Dataset X, y auto weights = torch::ones({ 10 }, kDouble);
vector<mdlp::samples_t>& X = handler.getX(); auto mask = ypred == y;
mdlp::labels_t& y = handler.getY(); cout << "ypred:" << ypred << endl;
// Get className & Features cout << "y:" << y << endl;
auto className = handler.getClassName(); cout << "weights:" << weights << endl;
vector<string> features; cout << "mask:" << mask << endl;
auto attributes = handler.getAttributes(); double value_to_add = 0.5;
transform(attributes.begin(), attributes.end(), back_inserter(features), weights += mask.to(torch::kDouble) * value_to_add;
[](const pair<string, string>& item) { return item.first; }); cout << "New weights:" << weights << endl;
// Discretize Dataset auto masked_weights = weights * mask.to(weights.dtype());
auto [Xd, maxes] = discretize(X, y, features); double sum_of_weights = masked_weights.sum().item<double>();
maxes[className] = *max_element(y.begin(), y.end()) + 1; cout << "Sum of weights: " << sum_of_weights << endl;
map<string, vector<int>> states; //weights.index_put_({ mask }, weights + 10);
for (auto feature : features) { // auto handler = ArffFiles();
states[feature] = vector<int>(maxes[feature]); // handler.load(complete_file_name, class_last);
} // // Get Dataset X, y
states[className] = vector<int>(maxes[className]); // vector<mdlp::samples_t>& X = handler.getX();
auto clf = platform::Models::instance()->create(model_name); // mdlp::labels_t& y = handler.getY();
clf->fit(Xd, y, features, className, states); // // Get className & Features
if (dump_cpt) { // auto className = handler.getClassName();
cout << "--- CPT Tables ---" << endl; // vector<string> features;
clf->dump_cpt(); // auto attributes = handler.getAttributes();
} // transform(attributes.begin(), attributes.end(), back_inserter(features),
auto lines = clf->show(); // [](const pair<string, string>& item) { return item.first; });
for (auto line : lines) { // // Discretize Dataset
cout << line << endl; // auto [Xd, maxes] = discretize(X, y, features);
} // maxes[className] = *max_element(y.begin(), y.end()) + 1;
cout << "--- Topological Order ---" << endl; // map<string, vector<int>> states;
auto order = clf->topological_order(); // for (auto feature : features) {
for (auto name : order) { // states[feature] = vector<int>(maxes[feature]);
cout << name << ", "; // }
} // states[className] = vector<int>(maxes[className]);
cout << "end." << endl; // auto clf = platform::Models::instance()->create(model_name);
auto score = clf->score(Xd, y); // clf->fit(Xd, y, features, className, states);
cout << "Score: " << score << endl; // if (dump_cpt) {
// cout << "--- CPT Tables ---" << endl;
// clf->dump_cpt();
// }
// auto lines = clf->show();
// for (auto line : lines) {
// cout << line << endl;
// }
// cout << "--- Topological Order ---" << endl;
// auto order = clf->topological_order();
// for (auto name : order) {
// cout << name << ", ";
// }
// cout << "end." << endl;
// auto score = clf->score(Xd, y);
// cout << "Score: " << score << endl;
// auto graph = clf->graph(); // auto graph = clf->graph();
// auto dot_file = model_name + "_" + file_name; // auto dot_file = model_name + "_" + file_name;
// ofstream file(dot_file + ".dot"); // ofstream file(dot_file + ".dot");

View File

@@ -2,14 +2,16 @@
namespace bayesnet { namespace bayesnet {
AODE::AODE() : Ensemble() {} AODE::AODE() : Ensemble() {}
void AODE::train() void AODE::buildModel(const torch::Tensor& weights)
{ {
models.clear(); models.clear();
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i)); models.push_back(std::make_unique<SPODE>(i));
} }
n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
} }
vector<string> AODE::graph(const string& title) vector<string> AODE::graph(const string& title) const
{ {
return Ensemble::graph(title); return Ensemble::graph(title);
} }

View File

@@ -5,11 +5,11 @@
namespace bayesnet { namespace bayesnet {
class AODE : public Ensemble { class AODE : public Ensemble {
protected: protected:
void train() override; void buildModel(const torch::Tensor& weights) override;
public: public:
AODE(); AODE();
virtual ~AODE() {}; virtual ~AODE() {};
vector<string> graph(const string& title = "AODE") override; vector<string> graph(const string& title = "AODE") const override;
}; };
} }
#endif #endif

View File

@@ -1,33 +1,39 @@
#include "AODELd.h" #include "AODELd.h"
#include "Models.h"
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(Ensemble::Xv, Ensemble::yv, features, className) {} AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_; features = features_;
className = className_; className = className_;
states = states_; Xf = X_;
train(); y = y_;
for (const auto& model : models) { // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
model->fit(X_, y_, features_, className_, states_); states = fit_local_discretization(y);
} // We have discretized the input data
n_models = models.size(); // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
fitted = true; Ensemble::fit(dataset, features, className, states);
return *this; return *this;
} }
void AODELd::train() void AODELd::buildModel(const torch::Tensor& weights)
{ {
models.clear(); models.clear();
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODELd>(i)); models.push_back(std::make_unique<SPODELd>(i));
} }
n_models = models.size();
} }
Tensor AODELd::predict(Tensor& X) void AODELd::trainModel(const torch::Tensor& weights)
{ {
return Ensemble::predict(X); for (const auto& model : models) {
model->fit(Xf, y, features, className, states);
}
} }
vector<string> AODELd::graph(const string& name) vector<string> AODELd::graph(const string& name) const
{ {
return Ensemble::graph(name); return Ensemble::graph(name);
} }

View File

@@ -7,13 +7,14 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
class AODELd : public Ensemble, public Proposal { class AODELd : public Ensemble, public Proposal {
protected:
void trainModel(const torch::Tensor& weights) override;
void buildModel(const torch::Tensor& weights) override;
public: public:
AODELd(); AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) override;
virtual ~AODELd() = default; virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; vector<string> graph(const string& name = "AODE") const override;
vector<string> graph(const string& name = "AODE") override;
Tensor predict(Tensor& X) override;
void train() override;
static inline string version() { return "0.0.1"; }; static inline string version() { return "0.0.1"; };
}; };
} }

View File

@@ -5,24 +5,28 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
class BaseClassifier { class BaseClassifier {
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
public: public:
// X is nxm vector, y is nx1 vector // X is nxm vector, y is nx1 vector
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor // X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0; virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default; virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0; torch::Tensor virtual predict(torch::Tensor& X) = 0;
vector<int> virtual predict(vector<vector<int>>& X) = 0; vector<int> virtual predict(vector<vector<int>>& X) = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0; float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes() = 0; int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges() = 0; int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() = 0; int virtual getNumberOfStates() const = 0;
vector<string> virtual show() = 0; vector<string> virtual show() const = 0;
vector<string> virtual graph(const string& title = "") = 0; vector<string> virtual graph(const string& title = "") const = 0;
const string inline getVersion() const { return "0.1.0"; }; const string inline getVersion() const { return "0.1.0"; };
vector<string> virtual topological_order() = 0; vector<string> virtual topological_order() = 0;
void virtual dump_cpt() = 0; void virtual dump_cpt()const = 0;
}; };
} }
#endif #endif

View File

@@ -2,7 +2,7 @@
#include "Mst.h" #include "Mst.h"
namespace bayesnet { namespace bayesnet {
//samples is nxm tensor used to fit the model //samples is nxm tensor used to fit the model
Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates) Metrics::Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates)
: samples(samples) : samples(samples)
, features(features) , features(features)
, className(className) , className(className)
@@ -21,6 +21,31 @@ namespace bayesnet {
} }
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
} }
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, unsigned k)
{
auto n = samples.size(0) - 1;
if (k == 0) {
k = n;
}
// compute scores
scoresKBest.reserve(n);
auto label = samples.index({ -1, "..." });
for (int i = 0; i < n; ++i) {
scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
featuresKBest.push_back(i);
}
// sort & reduce scores and features
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
{ return scoresKBest[i] > scoresKBest[j]; });
sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
featuresKBest.resize(k);
scoresKBest.resize(k);
return featuresKBest;
}
vector<double> Metrics::getScoresKBest() const
{
return scoresKBest;
}
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source) vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
{ {
vector<pair<string, string>> result; vector<pair<string, string>> result;
@@ -32,17 +57,18 @@ namespace bayesnet {
} }
return result; return result;
} }
torch::Tensor Metrics::conditionalEdge() torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
{ {
auto result = vector<double>(); auto result = vector<double>();
auto source = vector<string>(features); auto source = vector<string>(features);
source.push_back(className); source.push_back(className);
auto combinations = doCombinations(source); auto combinations = doCombinations(source);
double totalWeight = weights.sum().item<double>();
// Compute class prior // Compute class prior
auto margin = torch::zeros({ classNumStates }); auto margin = torch::zeros({ classNumStates }, torch::kFloat);
for (int value = 0; value < classNumStates; ++value) { for (int value = 0; value < classNumStates; ++value) {
auto mask = samples.index({ -1, "..." }) == value; auto mask = samples.index({ -1, "..." }) == value;
margin[value] = mask.sum().item<float>() / samples.size(1); margin[value] = mask.sum().item<double>() / samples.size(1);
} }
for (auto [first, second] : combinations) { for (auto [first, second] : combinations) {
int index_first = find(features.begin(), features.end(), first) - features.begin(); int index_first = find(features.begin(), features.end(), first) - features.begin();
@@ -52,8 +78,9 @@ namespace bayesnet {
auto mask = samples.index({ -1, "..." }) == value; auto mask = samples.index({ -1, "..." }) == value;
auto first_dataset = samples.index({ index_first, mask }); auto first_dataset = samples.index({ index_first, mask });
auto second_dataset = samples.index({ index_second, mask }); auto second_dataset = samples.index({ index_second, mask });
auto mi = mutualInformation(first_dataset, second_dataset); auto weights_dataset = weights.index({ mask });
auto pb = margin[value].item<float>(); auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
auto pb = margin[value].item<double>();
accumulated += pb * mi; accumulated += pb * mi;
} }
result.push_back(accumulated); result.push_back(accumulated);
@@ -70,31 +97,32 @@ namespace bayesnet {
return matrix; return matrix;
} }
// To use in Python // To use in Python
vector<float> Metrics::conditionalEdgeWeights() vector<float> Metrics::conditionalEdgeWeights(vector<float>& weights_)
{ {
auto matrix = conditionalEdge(); const torch::Tensor weights = torch::tensor(weights_);
auto matrix = conditionalEdge(weights);
std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel()); std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
return v; return v;
} }
double Metrics::entropy(torch::Tensor& feature) double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
{ {
torch::Tensor counts = feature.bincount(); torch::Tensor counts = feature.bincount(weights);
int totalWeight = counts.sum().item<int>(); double totalWeight = counts.sum().item<double>();
torch::Tensor probs = counts.to(torch::kFloat) / totalWeight; torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
torch::Tensor logProbs = torch::log(probs); torch::Tensor logProbs = torch::log(probs);
torch::Tensor entropy = -probs * logProbs; torch::Tensor entropy = -probs * logProbs;
return entropy.nansum().item<double>(); return entropy.nansum().item<double>();
} }
// H(Y|X) = sum_{x in X} p(x) H(Y|X=x) // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
double Metrics::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature) double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{ {
int numSamples = firstFeature.sizes()[0]; int numSamples = firstFeature.sizes()[0];
torch::Tensor featureCounts = secondFeature.bincount(); torch::Tensor featureCounts = secondFeature.bincount(weights);
unordered_map<int, unordered_map<int, double>> jointCounts; unordered_map<int, unordered_map<int, double>> jointCounts;
double totalWeight = 0; double totalWeight = 0;
for (auto i = 0; i < numSamples; i++) { for (auto i = 0; i < numSamples; i++) {
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1; jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
totalWeight += 1; totalWeight += weights[i].item<float>();
} }
if (totalWeight == 0) if (totalWeight == 0)
return 0; return 0;
@@ -115,16 +143,16 @@ namespace bayesnet {
return entropyValue; return entropyValue;
} }
// I(X;Y) = H(Y) - H(Y|X) // I(X;Y) = H(Y) - H(Y|X)
double Metrics::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature) double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{ {
return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature); return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
} }
/* /*
Compute the maximum spanning tree considering the weights as distances Compute the maximum spanning tree considering the weights as distances
and the indices of the weights as nodes of this square matrix using and the indices of the weights as nodes of this square matrix using
Kruskal algorithm Kruskal algorithm
*/ */
vector<pair<int, int>> Metrics::maximumSpanningTree(vector<string> features, Tensor& weights, int root) vector<pair<int, int>> Metrics::maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root)
{ {
auto mst = MST(features, weights, root); auto mst = MST(features, weights, root);
return mst.maximumSpanningTree(); return mst.maximumSpanningTree();

View File

@@ -12,17 +12,21 @@ namespace bayesnet {
vector<string> features; vector<string> features;
string className; string className;
int classNumStates = 0; int classNumStates = 0;
vector<double> scoresKBest;
vector<int> featuresKBest; // sorted indices of the features
double entropy(const Tensor& feature, const Tensor& weights);
double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<pair<string, string>> doCombinations(const vector<string>&);
public: public:
Metrics() = default; Metrics() = default;
Metrics(Tensor&, vector<string>&, string&, int); Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int); Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates);
double entropy(Tensor&); vector<int> SelectKBestWeighted(const torch::Tensor& weights, unsigned k = 0);
double conditionalEntropy(Tensor&, Tensor&); vector<double> getScoresKBest() const;
double mutualInformation(Tensor&, Tensor&); double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<float> conditionalEdgeWeights(); // To use in Python vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python
Tensor conditionalEdge(); Tensor conditionalEdge(const torch::Tensor& weights);
vector<pair<string, string>> doCombinations(const vector<string>&); vector<pair<int, int>> maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root);
vector<pair<int, int>> maximumSpanningTree(vector<string> features, Tensor& weights, int root);
}; };
} }
#endif #endif

82
src/BayesNet/BoostAODE.cc Normal file
View File

@@ -0,0 +1,82 @@
#include "BoostAODE.h"
#include "BayesMetrics.h"
namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() {}
void BoostAODE::buildModel(const torch::Tensor& weights)
{
// Models shall be built in trainModel
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
models.clear();
n_models = 0;
int max_models = .1 * n > 10 ? .1 * n : n;
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
auto y_ = dataset.index({ -1, "..." });
bool exitCondition = false;
bool repeatSparent = false;
vector<int> featuresUsed;
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == max_models
int numClasses = states[className].size();
while (!exitCondition) {
// Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, n); // Get all the features sorted
auto feature = featureSelection[0];
unique_ptr<Classifier> model;
if (!repeatSparent) {
if (n_models == 0) {
models.resize(n); // Resize for n==nfeatures SPODEs
significanceModels.resize(n);
}
bool found = false;
for (int i = 0; i < featureSelection.size(); ++i) {
if (find(featuresUsed.begin(), featuresUsed.end(), i) != featuresUsed.end()) {
continue;
}
found = true;
feature = i;
featuresUsed.push_back(feature);
n_models++;
break;
}
if (!found) {
exitCondition = true;
continue;
}
}
model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
auto ypred = model->predict(X_);
// Step 3.1: Compute the classifier amout of say
auto mask_wrong = ypred != y_;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double wrongWeights = masked_weights.sum().item<double>();
double significance = wrongWeights == 0 ? 1 : 0.5 * log((1 - wrongWeights) / wrongWeights);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights_ += mask_wrong.to(weights_.dtype()) * exp(significance) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
weights_ = weights_ / totalWeights;
// Step 3.4: Store classifier and its accuracy to weigh its future vote
if (!repeatSparent) {
models[feature] = std::move(model);
significanceModels[feature] = significance;
} else {
models.push_back(std::move(model));
significanceModels.push_back(significance);
n_models++;
}
exitCondition = n_models == max_models;
}
weights.copy_(weights_);
}
vector<string> BoostAODE::graph(const string& title) const
{
return Ensemble::graph(title);
}
}

16
src/BayesNet/BoostAODE.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef BOOSTAODE_H
#define BOOSTAODE_H
#include "Ensemble.h"
#include "SPODE.h"
namespace bayesnet {
class BoostAODE : public Ensemble {
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override;
public:
BoostAODE();
virtual ~BoostAODE() {};
vector<string> graph(const string& title = "BoostAODE") const override;
};
}
#endif

View File

@@ -1,5 +1,8 @@
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc Mst.cc Proposal.cc) KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
target_link_libraries(BayesNet mdlp ArffFiles "${TORCH_LIBRARIES}") Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -5,63 +5,74 @@ namespace bayesnet {
using namespace torch; using namespace torch;
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
Classifier& Classifier::build(vector<string>& features, string className, map<string, vector<int>>& states) Classifier& Classifier::build(vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights)
{ {
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
this->features = features; this->features = features;
this->className = className; this->className = className;
this->states = states; this->states = states;
m = dataset.size(1);
n = dataset.size(0) - 1;
checkFitParameters(); checkFitParameters();
auto n_classes = states[className].size(); auto n_classes = states[className].size();
metrics = Metrics(samples, features, className, n_classes); metrics = Metrics(dataset, features, className, n_classes);
model.initialize(); model.initialize();
train(); buildModel(weights);
if (Xv.empty()) { trainModel(weights);
// fit with tensors
model.fit(X, y, features, className);
} else {
// fit with vectors
model.fit(Xv, yv, features, className);
}
fitted = true; fitted = true;
return *this; return *this;
} }
void Classifier::buildDataset(Tensor& ytmp)
{
try {
auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
dataset = torch::cat({ dataset, yresized }, 0);
}
catch (const std::exception& e) {
std::cerr << e.what() << '\n';
cout << "X dimensions: " << dataset.sizes() << "\n";
cout << "y dimensions: " << ytmp.sizes() << "\n";
exit(1);
}
}
void Classifier::trainModel(const torch::Tensor& weights)
{
model.fit(dataset, weights, features, className, states);
}
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
{ {
this->X = X; dataset = X;
this->y = y; buildDataset(y);
Xv = vector<vector<int>>(); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)); return build(features, className, states, weights);
return build(features, className, states);
}
void Classifier::generateTensorXFromVector()
{
X = torch::zeros({ static_cast<int>(Xv.size()), static_cast<int>(Xv[0].size()) }, kInt32);
for (int i = 0; i < Xv.size(); ++i) {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], kInt32));
}
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{ {
Xv = X; dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, kInt32);
generateTensorXFromVector(); for (int i = 0; i < X.size(); ++i) {
this->y = torch::tensor(y, kInt32); dataset.index_put_({ i, "..." }, torch::tensor(X[i], kInt32));
yv = y; }
return build(features, className, states); auto ytmp = torch::tensor(y, kInt32);
buildDataset(ytmp);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights)
{
this->dataset = dataset;
return build(features, className, states, weights);
} }
void Classifier::checkFitParameters() void Classifier::checkFitParameters()
{ {
auto sizes = X.sizes();
m = sizes[1];
n = sizes[0];
if (m != y.size(0)) {
throw invalid_argument("X and y must have the same number of samples");
}
if (n != features.size()) { if (n != features.size()) {
throw invalid_argument("X and features must have the same number of features"); throw invalid_argument("X " + to_string(n) + " and features " + to_string(features.size()) + " must have the same number of features");
} }
if (states.find(className) == states.end()) { if (states.find(className) == states.end()) {
throw invalid_argument("className not found in states"); throw invalid_argument("className not found in states");
@@ -108,7 +119,7 @@ namespace bayesnet {
} }
return model.score(X, y); return model.score(X, y);
} }
vector<string> Classifier::show() vector<string> Classifier::show() const
{ {
return model.show(); return model.show();
} }
@@ -120,16 +131,16 @@ namespace bayesnet {
} }
model.addNode(className); model.addNode(className);
} }
int Classifier::getNumberOfNodes() int Classifier::getNumberOfNodes() const
{ {
// Features does not include class // Features does not include class
return fitted ? model.getFeatures().size() + 1 : 0; return fitted ? model.getFeatures().size() + 1 : 0;
} }
int Classifier::getNumberOfEdges() int Classifier::getNumberOfEdges() const
{ {
return fitted ? model.getEdges().size() : 0; return fitted ? model.getNumEdges() : 0;
} }
int Classifier::getNumberOfStates() int Classifier::getNumberOfStates() const
{ {
return fitted ? model.getStates() : 0; return fitted ? model.getStates() : 0;
} }
@@ -137,9 +148,8 @@ namespace bayesnet {
{ {
return model.topological_sort(); return model.topological_sort();
} }
void Classifier::dump_cpt() void Classifier::dump_cpt() const
{ {
model.dump_cpt(); model.dump_cpt();
} }
} }

View File

@@ -10,39 +10,38 @@ using namespace torch;
namespace bayesnet { namespace bayesnet {
class Classifier : public BaseClassifier { class Classifier : public BaseClassifier {
private: private:
bool fitted; void buildDataset(torch::Tensor& y);
Classifier& build(vector<string>& features, string className, map<string, vector<int>>& states); Classifier& build(vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights);
protected: protected:
Network model; bool fitted;
int m, n; // m: number of samples, n: number of features int m, n; // m: number of samples, n: number of features
Tensor X; // nxm tensor Network model;
vector<vector<int>> Xv; // nxm vector
Tensor y;
vector<int> yv;
Tensor samples; // (n+1)xm tensor
Metrics metrics; Metrics metrics;
vector<string> features; vector<string> features;
string className; string className;
map<string, vector<int>> states; map<string, vector<int>> states;
Tensor dataset; // (n+1)xm tensor
void checkFitParameters(); void checkFitParameters();
void generateTensorXFromVector(); virtual void buildModel(const torch::Tensor& weights) = 0;
virtual void train() = 0; void trainModel(const torch::Tensor& weights) override;
public: public:
Classifier(Network model); Classifier(Network model);
virtual ~Classifier() = default; virtual ~Classifier() = default;
Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override; Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states, const torch::Tensor& weights) override;
void addNodes(); void addNodes();
int getNumberOfNodes() override; int getNumberOfNodes() const override;
int getNumberOfEdges() override; int getNumberOfEdges() const override;
int getNumberOfStates() override; int getNumberOfStates() const override;
Tensor predict(Tensor& X) override; Tensor predict(Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override; vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override; float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override; float score(vector<vector<int>>& X, vector<int>& y) override;
vector<string> show() override; vector<string> show() const override;
vector<string> topological_order() override; vector<string> topological_order() override;
void dump_cpt() override; void dump_cpt() const override;
}; };
} }
#endif #endif

View File

@@ -3,63 +3,24 @@
namespace bayesnet { namespace bayesnet {
using namespace torch; using namespace torch;
Ensemble::Ensemble() : n_models(0), metrics(Metrics()), fitted(false) {} Ensemble::Ensemble() : Classifier(Network()) {}
Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states)
void Ensemble::trainModel(const torch::Tensor& weights)
{ {
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
this->features = features;
this->className = className;
this->states = states;
auto n_classes = states[className].size();
metrics = Metrics(samples, features, className, n_classes);
// Build models
train();
// Train models
n_models = models.size(); n_models = models.size();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
if (Xv.empty()) { // fit with vectors
// fit with tensors models[i]->fit(dataset, features, className, states);
models[i]->fit(X, y, features, className, states);
} else {
// fit with vectors
models[i]->fit(Xv, yv, features, className, states);
}
} }
fitted = true;
return *this;
}
void Ensemble::generateTensorXFromVector()
{
X = torch::zeros({ static_cast<int>(Xv.size()), static_cast<int>(Xv[0].size()) }, kInt32);
for (int i = 0; i < Xv.size(); ++i) {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], kInt32));
}
}
Ensemble& Ensemble::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = X;
this->y = y;
Xv = vector<vector<int>>();
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
return build(features, className, states);
}
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
Xv = X;
generateTensorXFromVector();
this->y = torch::tensor(y, kInt32);
yv = y;
return build(features, className, states);
} }
vector<int> Ensemble::voting(Tensor& y_pred) vector<int> Ensemble::voting(Tensor& y_pred)
{ {
auto y_pred_ = y_pred.accessor<int, 2>(); auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final; vector<int> y_pred_final;
for (int i = 0; i < y_pred.size(0); ++i) { for (int i = 0; i < y_pred.size(0); ++i) {
vector<float> votes(y_pred.size(1), 0); vector<double> votes(y_pred.size(1), 0);
for (int j = 0; j < y_pred.size(1); ++j) { for (int j = 0; j < y_pred.size(1); ++j) {
votes[y_pred_[i][j]] += 1; votes[y_pred_[i][j]] += significanceModels[j];
} }
// argsort in descending order // argsort in descending order
auto indices = argsort(votes); auto indices = argsort(votes);
@@ -132,9 +93,8 @@ namespace bayesnet {
} }
} }
return (double)correct / y_pred.size(); return (double)correct / y_pred.size();
} }
vector<string> Ensemble::show() vector<string> Ensemble::show() const
{ {
auto result = vector<string>(); auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
@@ -143,7 +103,7 @@ namespace bayesnet {
} }
return result; return result;
} }
vector<string> Ensemble::graph(const string& title) vector<string> Ensemble::graph(const string& title) const
{ {
auto result = vector<string>(); auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
@@ -152,7 +112,7 @@ namespace bayesnet {
} }
return result; return result;
} }
int Ensemble::getNumberOfNodes() int Ensemble::getNumberOfNodes() const
{ {
int nodes = 0; int nodes = 0;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
@@ -160,7 +120,7 @@ namespace bayesnet {
} }
return nodes; return nodes;
} }
int Ensemble::getNumberOfEdges() int Ensemble::getNumberOfEdges() const
{ {
int edges = 0; int edges = 0;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
@@ -168,7 +128,7 @@ namespace bayesnet {
} }
return edges; return edges;
} }
int Ensemble::getNumberOfStates() int Ensemble::getNumberOfStates() const
{ {
int nstates = 0; int nstates = 0;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {

View File

@@ -8,44 +8,32 @@ using namespace std;
using namespace torch; using namespace torch;
namespace bayesnet { namespace bayesnet {
class Ensemble : public BaseClassifier { class Ensemble : public Classifier {
private: private:
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states); Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
protected: protected:
unsigned n_models; unsigned n_models;
bool fitted;
vector<unique_ptr<Classifier>> models; vector<unique_ptr<Classifier>> models;
Tensor X; vector<double> significanceModels;
vector<vector<int>> Xv; void trainModel(const torch::Tensor& weights) override;
Tensor y;
vector<int> yv;
Tensor samples;
Metrics metrics;
vector<string> features;
string className;
map<string, vector<int>> states;
void virtual train() = 0;
vector<int> voting(Tensor& y_pred); vector<int> voting(Tensor& y_pred);
void generateTensorXFromVector();
public: public:
Ensemble(); Ensemble();
virtual ~Ensemble() = default; virtual ~Ensemble() = default;
Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Ensemble& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Tensor predict(Tensor& X) override; Tensor predict(Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override; vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override; float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override; float score(vector<vector<int>>& X, vector<int>& y) override;
int getNumberOfNodes() override; int getNumberOfNodes() const override;
int getNumberOfEdges() override; int getNumberOfEdges() const override;
int getNumberOfStates() override; int getNumberOfStates() const override;
vector<string> show() override; vector<string> show() const override;
vector<string> graph(const string& title) override; vector<string> graph(const string& title) const override;
vector<string> topological_order() override vector<string> topological_order() override
{ {
return vector<string>(); return vector<string>();
} }
void dump_cpt() override void dump_cpt() const override
{ {
} }
}; };

View File

@@ -4,7 +4,7 @@ namespace bayesnet {
using namespace torch; using namespace torch;
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {} KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::train() void KDB::buildModel(const torch::Tensor& weights)
{ {
/* /*
1. For each feature Xi, compute mutual information, I(X;C), 1. For each feature Xi, compute mutual information, I(X;C),
@@ -28,13 +28,14 @@ namespace bayesnet {
// 1. For each feature Xi, compute mutual information, I(X;C), // 1. For each feature Xi, compute mutual information, I(X;C),
// where C is the class. // where C is the class.
addNodes(); addNodes();
vector <float> mi; const Tensor& y = dataset.index({ -1, "..." });
vector<double> mi;
for (auto i = 0; i < features.size(); i++) { for (auto i = 0; i < features.size(); i++) {
Tensor firstFeature = X.index({ i, "..." }); Tensor firstFeature = dataset.index({ i, "..." });
mi.push_back(metrics.mutualInformation(firstFeature, y)); mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
} }
// 2. Compute class conditional mutual information I(Xi;XjIC), f or each // 2. Compute class conditional mutual information I(Xi;XjIC), f or each
auto conditionalEdgeWeights = metrics.conditionalEdge(); auto conditionalEdgeWeights = metrics.conditionalEdge(weights);
// 3. Let the used variable list, S, be empty. // 3. Let the used variable list, S, be empty.
vector<int> S; vector<int> S;
// 4. Let the DAG network being constructed, BN, begin with a single // 4. Let the DAG network being constructed, BN, begin with a single
@@ -78,7 +79,7 @@ namespace bayesnet {
exit_cond = num == n_edges || candidates.size(0) == 0; exit_cond = num == n_edges || candidates.size(0) == 0;
} }
} }
vector<string> KDB::graph(const string& title) vector<string> KDB::graph(const string& title) const
{ {
string header{ title }; string header{ title };
if (title == "KDB") { if (title == "KDB") {

View File

@@ -1,5 +1,6 @@
#ifndef KDB_H #ifndef KDB_H
#define KDB_H #define KDB_H
#include <torch/torch.h>
#include "Classifier.h" #include "Classifier.h"
#include "bayesnetUtils.h" #include "bayesnetUtils.h"
namespace bayesnet { namespace bayesnet {
@@ -11,11 +12,11 @@ namespace bayesnet {
float theta; float theta;
void add_m_edges(int idx, vector<int>& S, Tensor& weights); void add_m_edges(int idx, vector<int>& S, Tensor& weights);
protected: protected:
void train() override; void buildModel(const torch::Tensor& weights) override;
public: public:
explicit KDB(int k, float theta = 0.03); explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {}; virtual ~KDB() {};
vector<string> graph(const string& name = "KDB") override; vector<string> graph(const string& name = "KDB") const override;
}; };
} }
#endif #endif

View File

@@ -2,7 +2,7 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
KDBLd::KDBLd(int k) : KDB(k), Proposal(KDB::Xv, KDB::yv, features, className) {} KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float... // This first part should go in a Classifier method called fit_local_discretization o fit_float...
@@ -11,16 +11,11 @@ namespace bayesnet {
Xf = X_; Xf = X_;
y = y_; y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
fit_local_discretization(states, y); states = fit_local_discretization(y);
generateTensorXFromVector();
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
KDB::fit(KDB::Xv, KDB::yv, features, className, states); KDB::fit(dataset, features, className, states);
localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
generateTensorXFromVector();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
model.fit(KDB::Xv, KDB::yv, features, className);
return *this; return *this;
} }
Tensor KDBLd::predict(Tensor& X) Tensor KDBLd::predict(Tensor& X)
@@ -28,7 +23,7 @@ namespace bayesnet {
auto Xt = prepareX(X); auto Xt = prepareX(X);
return KDB::predict(Xt); return KDB::predict(Xt);
} }
vector<string> KDBLd::graph(const string& name) vector<string> KDBLd::graph(const string& name) const
{ {
return KDB::graph(name); return KDB::graph(name);
} }

View File

@@ -11,7 +11,7 @@ namespace bayesnet {
explicit KDBLd(int k); explicit KDBLd(int k);
virtual ~KDBLd() = default; virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; KDBLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") override; vector<string> graph(const string& name = "KDB") const override;
Tensor predict(Tensor& X) override; Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; }; static inline string version() { return "0.0.1"; };
}; };

View File

@@ -94,7 +94,7 @@ namespace bayesnet {
return result; return result;
} }
MST::MST(vector<string>& features, Tensor& weights, int root) : features(features), weights(weights), root(root) {} MST::MST(const vector<string>& features, const Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
vector<pair<int, int>> MST::maximumSpanningTree() vector<pair<int, int>> MST::maximumSpanningTree()
{ {
auto num_features = features.size(); auto num_features = features.size();

View File

@@ -13,7 +13,7 @@ namespace bayesnet {
int root = 0; int root = 0;
public: public:
MST() = default; MST() = default;
MST(vector<string>& features, Tensor& weights, int root); MST(const vector<string>& features, const Tensor& weights, const int root);
vector<pair<int, int>> maximumSpanningTree(); vector<pair<int, int>> maximumSpanningTree();
}; };
class Graph { class Graph {

View File

@@ -5,7 +5,6 @@
namespace bayesnet { namespace bayesnet {
Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false) {} Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false) {}
Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {} Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other. Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
getmaxThreads()), fitted(other.fitted) getmaxThreads()), fitted(other.fitted)
{ {
@@ -20,7 +19,6 @@ namespace bayesnet {
classNumStates = 0; classNumStates = 0;
fitted = false; fitted = false;
nodes.clear(); nodes.clear();
dataset.clear();
samples = torch::Tensor(); samples = torch::Tensor();
} }
float Network::getmaxThreads() float Network::getmaxThreads()
@@ -44,15 +42,15 @@ namespace bayesnet {
} }
nodes[name] = std::make_unique<Node>(name); nodes[name] = std::make_unique<Node>(name);
} }
vector<string> Network::getFeatures() vector<string> Network::getFeatures() const
{ {
return features; return features;
} }
int Network::getClassNumStates() int Network::getClassNumStates() const
{ {
return classNumStates; return classNumStates;
} }
int Network::getStates() int Network::getStates() const
{ {
int result = 0; int result = 0;
for (auto& node : nodes) { for (auto& node : nodes) {
@@ -60,7 +58,7 @@ namespace bayesnet {
} }
return result; return result;
} }
string Network::getClassName() string Network::getClassName() const
{ {
return className; return className;
} }
@@ -105,8 +103,11 @@ namespace bayesnet {
{ {
return nodes; return nodes;
} }
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector<string>& featureNames, const string& className) void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights)
{ {
if (weights.size(0) != n_samples) {
throw invalid_argument("Weights (" + to_string(weights.size(0)) + ") must have the same number of elements as samples (" + to_string(n_samples) + ") in Network::fit");
}
if (n_samples != n_samples_y) { if (n_samples != n_samples_y) {
throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")"); throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")");
} }
@@ -123,50 +124,56 @@ namespace bayesnet {
if (find(features.begin(), features.end(), feature) == features.end()) { if (find(features.begin(), features.end(), feature) == features.end()) {
throw invalid_argument("Feature " + feature + " not found in Network::features"); throw invalid_argument("Feature " + feature + " not found in Network::features");
} }
if (states.find(feature) == states.end()) {
throw invalid_argument("Feature " + feature + " not found in states");
}
} }
} }
void Network::setStates() void Network::setStates(const map<string, vector<int>>& states)
{ {
// Set states to every Node in the network // Set states to every Node in the network
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
nodes[features[i]]->setNumStates(static_cast<int>(torch::max(samples.index({ i, "..." })).item<int>()) + 1); nodes[features[i]]->setNumStates(states.at(features[i]).size());
} }
classNumStates = nodes[className]->getNumStates(); classNumStates = nodes[className]->getNumStates();
} }
// X comes in nxm, where n is the number of features and m the number of samples // X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& featureNames, const string& className) void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
{ {
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className); checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className; this->className = className;
dataset.clear();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X , ytmp }, 0); samples = torch::cat({ X , ytmp }, 0);
for (int i = 0; i < featureNames.size(); ++i) { for (int i = 0; i < featureNames.size(); ++i) {
auto row_feature = X.index({ i, "..." }); auto row_feature = X.index({ i, "..." });
dataset[featureNames[i]] = vector<int>(row_feature.data_ptr<int>(), row_feature.data_ptr<int>() + row_feature.size(0));;
} }
dataset[className] = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)); completeFit(states, weights);
completeFit(); }
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
{
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className;
this->samples = samples;
completeFit(states, weights);
} }
// input_data comes in nxm, where n is the number of features and m the number of samples // input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className) void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights_, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
{ {
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className); const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
this->className = className; this->className = className;
dataset.clear(); // Build tensor of samples (nxm) (n+1 because of the class)
// Build dataset & tensor of samples (nxm) (n+1 because of the class)
samples = torch::zeros({ static_cast<int>(input_data.size() + 1), static_cast<int>(input_data[0].size()) }, torch::kInt32); samples = torch::zeros({ static_cast<int>(input_data.size() + 1), static_cast<int>(input_data[0].size()) }, torch::kInt32);
for (int i = 0; i < featureNames.size(); ++i) { for (int i = 0; i < featureNames.size(); ++i) {
dataset[featureNames[i]] = input_data[i];
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
} }
dataset[className] = labels;
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(); completeFit(states, weights);
} }
void Network::completeFit() void Network::completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights)
{ {
setStates(); setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads); int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
if (maxThreadsRunning < 1) { if (maxThreadsRunning < 1) {
maxThreadsRunning = 1; maxThreadsRunning = 1;
@@ -179,7 +186,7 @@ namespace bayesnet {
while (nextNodeIndex < nodes.size()) { while (nextNodeIndex < nodes.size()) {
unique_lock<mutex> lock(mtx); unique_lock<mutex> lock(mtx);
cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; }); cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() { threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads, &weights]() {
while (true) { while (true) {
unique_lock<mutex> lock(mtx); unique_lock<mutex> lock(mtx);
if (nextNodeIndex >= nodes.size()) { if (nextNodeIndex >= nodes.size()) {
@@ -188,7 +195,7 @@ namespace bayesnet {
auto& pair = *std::next(nodes.begin(), nextNodeIndex); auto& pair = *std::next(nodes.begin(), nextNodeIndex);
++nextNodeIndex; ++nextNodeIndex;
lock.unlock(); lock.unlock();
pair.second->computeCPT(dataset, laplaceSmoothing); pair.second->computeCPT(samples, features, laplaceSmoothing, weights);
lock.lock(); lock.lock();
nodes[pair.first] = std::move(pair.second); nodes[pair.first] = std::move(pair.second);
lock.unlock(); lock.unlock();
@@ -212,7 +219,7 @@ namespace bayesnet {
torch::Tensor result; torch::Tensor result;
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
for (int i = 0; i < samples.size(1); ++i) { for (int i = 0; i < samples.size(1); ++i) {
auto sample = samples.index({ "...", i }); const Tensor sample = samples.index({ "...", i });
auto psample = predict_sample(sample); auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64); auto temp = torch::tensor(psample, torch::kFloat64);
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64)); // result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
@@ -328,22 +335,22 @@ namespace bayesnet {
mutex mtx; mutex mtx;
for (int i = 0; i < classNumStates; ++i) { for (int i = 0; i < classNumStates; ++i) {
threads.emplace_back([this, &result, &evidence, i, &mtx]() { threads.emplace_back([this, &result, &evidence, i, &mtx]() {
auto completeEvidence = map<string, int>(evidence); auto completeEvidence = map<string, int>(evidence);
completeEvidence[getClassName()] = i; completeEvidence[getClassName()] = i;
double factor = computeFactor(completeEvidence); double factor = computeFactor(completeEvidence);
lock_guard<mutex> lock(mtx); lock_guard<mutex> lock(mtx);
result[i] = factor; result[i] = factor;
}); });
} }
for (auto& thread : threads) { for (auto& thread : threads) {
thread.join(); thread.join();
} }
// Normalize result // Normalize result
double sum = accumulate(result.begin(), result.end(), 0.0); double sum = accumulate(result.begin(), result.end(), 0.0);
transform(result.begin(), result.end(), result.begin(), [sum](double& value) { return value / sum; }); transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result; return result;
} }
vector<string> Network::show() vector<string> Network::show() const
{ {
vector<string> result; vector<string> result;
// Draw the network // Draw the network
@@ -356,7 +363,7 @@ namespace bayesnet {
} }
return result; return result;
} }
vector<string> Network::graph(const string& title) vector<string> Network::graph(const string& title) const
{ {
auto output = vector<string>(); auto output = vector<string>();
auto prefix = "digraph BayesNet {\nlabel=<BayesNet "; auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
@@ -370,7 +377,7 @@ namespace bayesnet {
output.push_back("}\n"); output.push_back("}\n");
return output; return output;
} }
vector<pair<string, string>> Network::getEdges() vector<pair<string, string>> Network::getEdges() const
{ {
auto edges = vector<pair<string, string>>(); auto edges = vector<pair<string, string>>();
for (const auto& node : nodes) { for (const auto& node : nodes) {
@@ -382,6 +389,10 @@ namespace bayesnet {
} }
return edges; return edges;
} }
int Network::getNumEdges() const
{
return getEdges().size();
}
vector<string> Network::topological_sort() vector<string> Network::topological_sort()
{ {
/* Check if al the fathers of every node are before the node */ /* Check if al the fathers of every node are before the node */
@@ -420,10 +431,11 @@ namespace bayesnet {
} }
return result; return result;
} }
void Network::dump_cpt() void Network::dump_cpt() const
{ {
for (auto& node : nodes) { for (auto& node : nodes) {
cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl; cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl;
cout << node.second->getCPT() << endl;
} }
} }
} }

View File

@@ -8,29 +8,23 @@ namespace bayesnet {
class Network { class Network {
private: private:
map<string, unique_ptr<Node>> nodes; map<string, unique_ptr<Node>> nodes;
map<string, vector<int>> dataset;
bool fitted; bool fitted;
float maxThreads = 0.95; float maxThreads = 0.95;
int classNumStates; int classNumStates;
vector<string> features; // Including class vector<string> features; // Including classname
string className; string className;
int laplaceSmoothing = 1; double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&); bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
vector<double> predict_sample(const vector<int>&); vector<double> predict_sample(const vector<int>&);
vector<double> predict_sample(const torch::Tensor&); vector<double> predict_sample(const torch::Tensor&);
vector<double> exactInference(map<string, int>&); vector<double> exactInference(map<string, int>&);
double computeFactor(map<string, int>&); double computeFactor(map<string, int>&);
double mutual_info(torch::Tensor&, torch::Tensor&); void completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights);
double entropy(torch::Tensor&); void checkFitData(int n_features, int n_samples, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights);
double conditionalEntropy(torch::Tensor&, torch::Tensor&); void setStates(const map<string, vector<int>>&);
double mutualInformation(torch::Tensor&, torch::Tensor&);
void completeFit();
void checkFitData(int n_features, int n_samples, int n_samples_y, const vector<string>& featureNames, const string& className);
void setStates();
public: public:
Network(); Network();
explicit Network(float, int);
explicit Network(float); explicit Network(float);
explicit Network(Network&); explicit Network(Network&);
torch::Tensor& getSamples(); torch::Tensor& getSamples();
@@ -38,26 +32,26 @@ namespace bayesnet {
void addNode(const string&); void addNode(const string&);
void addEdge(const string&, const string&); void addEdge(const string&, const string&);
map<string, std::unique_ptr<Node>>& getNodes(); map<string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures(); vector<string> getFeatures() const;
int getStates(); int getStates() const;
vector<pair<string, string>> getEdges(); vector<pair<string, string>> getEdges() const;
int getClassNumStates(); int getNumEdges() const;
string getClassName(); int getClassNumStates() const;
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&); string getClassName() const;
void fit(torch::Tensor&, torch::Tensor&, const vector<string>&, const string&); void fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
vector<int> predict(const vector<vector<int>>&); // Return mx1 vector of predictions vector<int> predict(const vector<vector<int>>&); // Return mx1 vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
//Computes the conditional edge weight of variable index u and v conditioned on class_node
torch::Tensor conditionalEdgeWeight();
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
vector<vector<double>> predict_proba(const vector<vector<int>>&); // Return mxn vector of probabilities vector<vector<double>> predict_proba(const vector<vector<int>>&); // Return mxn vector of probabilities
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const vector<vector<int>>&, const vector<int>&); double score(const vector<vector<int>>&, const vector<int>&);
vector<string> topological_sort(); vector<string> topological_sort();
vector<string> show(); vector<string> show() const;
vector<string> graph(const string& title); // Returns a vector of strings representing the graph in graphviz format vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format
void initialize(); void initialize();
void dump_cpt(); void dump_cpt() const;
inline string version() { return "0.1.0"; } inline string version() { return "0.1.0"; }
}; };
} }

View File

@@ -84,7 +84,7 @@ namespace bayesnet {
} }
return result; return result;
} }
void Node::computeCPT(map<string, vector<int>>& dataset, const int laplaceSmoothing) void Node::computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
{ {
dimensions.clear(); dimensions.clear();
// Get dimensions of the CPT // Get dimensions of the CPT
@@ -94,12 +94,24 @@ namespace bayesnet {
// Create a tensor of zeros with the dimensions of the CPT // Create a tensor of zeros with the dimensions of the CPT
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing; cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
// Fill table with counts // Fill table with counts
for (int n_sample = 0; n_sample < dataset[name].size(); ++n_sample) { auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) {
throw logic_error("Feature " + name + " not found in dataset");
}
int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
torch::List<c10::optional<torch::Tensor>> coordinates; torch::List<c10::optional<torch::Tensor>> coordinates;
coordinates.push_back(torch::tensor(dataset[name][n_sample])); coordinates.push_back(dataset.index({ name_index, n_sample }));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&dataset, &n_sample](const auto& parent) { return torch::tensor(dataset[parent->getName()][n_sample]); }); for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName());
if (pos == features.end()) {
throw logic_error("Feature parent " + parent->getName() + " not found in dataset");
}
int parent_index = pos - features.begin();
coordinates.push_back(dataset.index({ parent_index, n_sample }));
}
// Increment the count of the corresponding coordinate // Increment the count of the corresponding coordinate
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + 1); cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>());
} }
// Normalize the counts // Normalize the counts
cpTable = cpTable / cpTable.sum(0); cpTable = cpTable / cpTable.sum(0);

View File

@@ -26,7 +26,7 @@ namespace bayesnet {
vector<Node*>& getParents(); vector<Node*>& getParents();
vector<Node*>& getChildren(); vector<Node*>& getChildren();
torch::Tensor& getCPT(); torch::Tensor& getCPT();
void computeCPT(map<string, vector<int>>&, const int); void computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
int getNumStates() const; int getNumStates() const;
void setNumStates(int); void setNumStates(int);
unsigned minFill(); unsigned minFill();

View File

@@ -2,21 +2,21 @@
#include "ArffFiles.h" #include "ArffFiles.h"
namespace bayesnet { namespace bayesnet {
Proposal::Proposal(vector<vector<int>>& Xv_, vector<int>& yv_, vector<string>& features_, string& className_) : Xv(Xv_), yv(yv_), pFeatures(features_), pClassName(className_) {} Proposal::Proposal(torch::Tensor& dataset_, vector<string>& features_, string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
Proposal::~Proposal() Proposal::~Proposal()
{ {
for (auto& [key, value] : discretizers) { for (auto& [key, value] : discretizers) {
delete value; delete value;
} }
} }
void Proposal::localDiscretizationProposal(map<string, vector<int>>& states, Network& model) map<string, vector<int>> Proposal::localDiscretizationProposal(const map<string, vector<int>>& oldStates, Network& model)
{ {
// order of local discretization is important. no good 0, 1, 2... // order of local discretization is important. no good 0, 1, 2...
// although we rediscretize features after the local discretization of every feature // although we rediscretize features after the local discretization of every feature
auto order = model.topological_sort(); auto order = model.topological_sort();
auto& nodes = model.getNodes(); auto& nodes = model.getNodes();
map<string, vector<int>> states = oldStates;
vector<int> indicesToReDiscretize; vector<int> indicesToReDiscretize;
auto n_samples = Xf.size(1);
bool upgrade = false; // Flag to check if we need to upgrade the model bool upgrade = false; // Flag to check if we need to upgrade the model
for (auto feature : order) { for (auto feature : order) {
auto nodeParents = nodes[feature]->getParents(); auto nodeParents = nodes[feature]->getParents();
@@ -30,13 +30,13 @@ namespace bayesnet {
parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end()); parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end());
// Get the indices of the parents // Get the indices of the parents
vector<int> indices; vector<int> indices;
indices.push_back(-1); // Add class index
transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); }); transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); });
// Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y) // Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y)
vector<string> yJoinParents; vector<string> yJoinParents(Xf.size(1));
transform(yv.begin(), yv.end(), back_inserter(yJoinParents), [&](const auto& p) {return to_string(p); });
for (auto idx : indices) { for (auto idx : indices) {
for (int i = 0; i < n_samples; ++i) { for (int i = 0; i < Xf.size(1); ++i) {
yJoinParents[i] += to_string(Xv[idx][i]); yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
} }
} }
auto arff = ArffFiles(); auto arff = ArffFiles();
@@ -59,26 +59,32 @@ namespace bayesnet {
for (auto index : indicesToReDiscretize) { for (auto index : indicesToReDiscretize) {
auto Xt_ptr = Xf.index({ index }).data_ptr<float>(); auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1)); auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
Xv[index] = discretizers[pFeatures[index]]->transform(Xt); pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt)));
auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1); auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0); iota(xStates.begin(), xStates.end(), 0);
//Update new states of the feature/node //Update new states of the feature/node
states[pFeatures[index]] = xStates; states[pFeatures[index]] = xStates;
} }
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
model.fit(pDataset, weights, pFeatures, pClassName, states);
} }
return states;
} }
void Proposal::fit_local_discretization(map<string, vector<int>>& states, torch::Tensor& y) map<string, vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
{ {
// Sharing Xv and yv with Classifier // Discretize the continuous input data and build pDataset (Classifier::dataset)
Xv = vector<vector<int>>(); int m = Xf.size(1);
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)); int n = Xf.size(0);
map<string, vector<int>> states;
pDataset = torch::zeros({ n + 1, m }, kInt32);
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// discretize input data by feature(row) // discretize input data by feature(row)
for (int i = 0; i < pFeatures.size(); ++i) { for (auto i = 0; i < pFeatures.size(); ++i) {
auto* discretizer = new mdlp::CPPFImdlp(); auto* discretizer = new mdlp::CPPFImdlp();
auto Xt_ptr = Xf.index({ i }).data_ptr<float>(); auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1)); auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
discretizer->fit(Xt, yv); discretizer->fit(Xt, yv);
Xv.push_back(discretizer->transform(Xt)); pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
auto xStates = vector<int>(discretizer->getCutPoints().size() + 1); auto xStates = vector<int>(discretizer->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0); iota(xStates.begin(), xStates.end(), 0);
states[pFeatures[i]] = xStates; states[pFeatures[i]] = xStates;
@@ -88,6 +94,8 @@ namespace bayesnet {
auto yStates = vector<int>(n_classes); auto yStates = vector<int>(n_classes);
iota(yStates.begin(), yStates.end(), 0); iota(yStates.begin(), yStates.end(), 0);
states[pClassName] = yStates; states[pClassName] = yStates;
pDataset.index_put_({ n, "..." }, y);
return states;
} }
torch::Tensor Proposal::prepareX(torch::Tensor& X) torch::Tensor Proposal::prepareX(torch::Tensor& X)
{ {

View File

@@ -10,20 +10,20 @@
namespace bayesnet { namespace bayesnet {
class Proposal { class Proposal {
public: public:
Proposal(vector<vector<int>>& Xv_, vector<int>& yv_, vector<string>& features_, string& className_); Proposal(torch::Tensor& pDataset, vector<string>& features_, string& className_);
virtual ~Proposal(); virtual ~Proposal();
protected: protected:
torch::Tensor prepareX(torch::Tensor& X); torch::Tensor prepareX(torch::Tensor& X);
void localDiscretizationProposal(map<string, vector<int>>& states, Network& model); map<string, vector<int>> localDiscretizationProposal(const map<string, vector<int>>& states, Network& model);
void fit_local_discretization(map<string, vector<int>>& states, torch::Tensor& y); map<string, vector<int>> fit_local_discretization(const torch::Tensor& y);
torch::Tensor Xf; // X continuous nxm tensor torch::Tensor Xf; // X continuous nxm tensor
torch::Tensor y; // y discrete nx1 tensor
map<string, mdlp::CPPFImdlp*> discretizers; map<string, mdlp::CPPFImdlp*> discretizers;
private: private:
torch::Tensor& pDataset; // (n+1)xm tensor
vector<string>& pFeatures; vector<string>& pFeatures;
string& pClassName; string& pClassName;
vector<vector<int>>& Xv; // X discrete nxm vector
vector<int>& yv;
}; };
} }
#endif #endif

View File

@@ -4,7 +4,7 @@ namespace bayesnet {
SPODE::SPODE(int root) : Classifier(Network()), root(root) {} SPODE::SPODE(int root) : Classifier(Network()), root(root) {}
void SPODE::train() void SPODE::buildModel(const torch::Tensor& weights)
{ {
// 0. Add all nodes to the model // 0. Add all nodes to the model
addNodes(); addNodes();
@@ -17,7 +17,7 @@ namespace bayesnet {
} }
} }
} }
vector<string> SPODE::graph(const string& name) vector<string> SPODE::graph(const string& name) const
{ {
return model.graph(name); return model.graph(name);
} }

View File

@@ -7,11 +7,11 @@ namespace bayesnet {
private: private:
int root; int root;
protected: protected:
void train() override; void buildModel(const torch::Tensor& weights) override;
public: public:
explicit SPODE(int root); explicit SPODE(int root);
virtual ~SPODE() {}; virtual ~SPODE() {};
vector<string> graph(const string& name = "SPODE") override; vector<string> graph(const string& name = "SPODE") const override;
}; };
} }
#endif #endif

View File

@@ -2,7 +2,7 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
SPODELd::SPODELd(int root) : SPODE(root), Proposal(SPODE::Xv, SPODE::yv, features, className) {} SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float... // This first part should go in a Classifier method called fit_local_discretization o fit_float...
@@ -11,24 +11,35 @@ namespace bayesnet {
Xf = X_; Xf = X_;
y = y_; y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
fit_local_discretization(states, y); states = fit_local_discretization(y);
generateTensorXFromVector();
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(SPODE::Xv, SPODE::yv, features, className, states); SPODE::fit(dataset, features, className, states);
localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
generateTensorXFromVector();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
model.fit(SPODE::Xv, SPODE::yv, features, className);
return *this; return *this;
} }
SPODELd& SPODELd::fit(torch::Tensor& dataset, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
y = dataset.index({ -1, "..." }).clone();
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
features = features_;
className = className_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor SPODELd::predict(Tensor& X) Tensor SPODELd::predict(Tensor& X)
{ {
auto Xt = prepareX(X); auto Xt = prepareX(X);
return SPODE::predict(Xt); return SPODE::predict(Xt);
} }
vector<string> SPODELd::graph(const string& name) vector<string> SPODELd::graph(const string& name) const
{ {
return SPODE::graph(name); return SPODE::graph(name);
} }

View File

@@ -6,12 +6,12 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
class SPODELd : public SPODE, public Proposal { class SPODELd : public SPODE, public Proposal {
private:
public: public:
explicit SPODELd(int root); explicit SPODELd(int root);
virtual ~SPODELd() = default; virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; SPODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") override; SPODELd& fit(torch::Tensor& dataset, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") const override;
Tensor predict(Tensor& X) override; Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; }; static inline string version() { return "0.0.1"; };
}; };

View File

@@ -5,25 +5,25 @@ namespace bayesnet {
TAN::TAN() : Classifier(Network()) {} TAN::TAN() : Classifier(Network()) {}
void TAN::train() void TAN::buildModel(const torch::Tensor& weights)
{ {
// 0. Add all nodes to the model // 0. Add all nodes to the model
addNodes(); addNodes();
// 1. Compute mutual information between each feature and the class and set the root node // 1. Compute mutual information between each feature and the class and set the root node
// as the highest mutual information with the class // as the highest mutual information with the class
auto mi = vector <pair<int, float >>(); auto mi = vector <pair<int, float >>();
Tensor class_dataset = samples.index({ -1, "..." }); Tensor class_dataset = dataset.index({ -1, "..." });
for (int i = 0; i < static_cast<int>(features.size()); ++i) { for (int i = 0; i < static_cast<int>(features.size()); ++i) {
Tensor feature_dataset = samples.index({ i, "..." }); Tensor feature_dataset = dataset.index({ i, "..." });
auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset); auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights);
mi.push_back({ i, mi_value }); mi.push_back({ i, mi_value });
} }
sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;}); sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;});
auto root = mi[mi.size() - 1].first; auto root = mi[mi.size() - 1].first;
// 2. Compute mutual information between each feature and the class // 2. Compute mutual information between each feature and the class
auto weights = metrics.conditionalEdge(); auto weights_matrix = metrics.conditionalEdge(weights);
// 3. Compute the maximum spanning tree // 3. Compute the maximum spanning tree
auto mst = metrics.maximumSpanningTree(features, weights, root); auto mst = metrics.maximumSpanningTree(features, weights_matrix, root);
// 4. Add edges from the maximum spanning tree to the model // 4. Add edges from the maximum spanning tree to the model
for (auto i = 0; i < mst.size(); ++i) { for (auto i = 0; i < mst.size(); ++i) {
auto [from, to] = mst[i]; auto [from, to] = mst[i];
@@ -34,7 +34,7 @@ namespace bayesnet {
model.addEdge(className, feature); model.addEdge(className, feature);
} }
} }
vector<string> TAN::graph(const string& title) vector<string> TAN::graph(const string& title) const
{ {
return model.graph(title); return model.graph(title);
} }

View File

@@ -7,11 +7,11 @@ namespace bayesnet {
class TAN : public Classifier { class TAN : public Classifier {
private: private:
protected: protected:
void train() override; void buildModel(const torch::Tensor& weights) override;
public: public:
TAN(); TAN();
virtual ~TAN() {}; virtual ~TAN() {};
vector<string> graph(const string& name = "TAN") override; vector<string> graph(const string& name = "TAN") const override;
}; };
} }
#endif #endif

View File

@@ -2,7 +2,7 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
TANLd::TANLd() : TAN(), Proposal(TAN::Xv, TAN::yv, features, className) {} TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_) TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
{ {
// This first part should go in a Classifier method called fit_local_discretization o fit_float... // This first part should go in a Classifier method called fit_local_discretization o fit_float...
@@ -11,24 +11,20 @@ namespace bayesnet {
Xf = X_; Xf = X_;
y = y_; y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
fit_local_discretization(states, y); states = fit_local_discretization(y);
generateTensorXFromVector();
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
TAN::fit(TAN::Xv, TAN::yv, features, className, states); TAN::fit(dataset, features, className, states);
localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
generateTensorXFromVector();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
model.fit(TAN::Xv, TAN::yv, features, className);
return *this; return *this;
} }
Tensor TANLd::predict(Tensor& X) Tensor TANLd::predict(Tensor& X)
{ {
auto Xt = prepareX(X); auto Xt = prepareX(X);
return TAN::predict(Xt); return TAN::predict(Xt);
} }
vector<string> TANLd::graph(const string& name) vector<string> TANLd::graph(const string& name) const
{ {
return TAN::graph(name); return TAN::graph(name);
} }

View File

@@ -11,7 +11,7 @@ namespace bayesnet {
TANLd(); TANLd();
virtual ~TANLd() = default; virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override; TANLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") override; vector<string> graph(const string& name = "TAN") const override;
Tensor predict(Tensor& X) override; Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; }; static inline string version() { return "0.0.1"; };
}; };

View File

@@ -4,7 +4,7 @@ namespace bayesnet {
using namespace std; using namespace std;
using namespace torch; using namespace torch;
// Return the indices in descending order // Return the indices in descending order
vector<int> argsort(vector<float>& nums) vector<int> argsort(vector<double>& nums)
{ {
int n = nums.size(); int n = nums.size();
vector<int> indices(n); vector<int> indices(n);

View File

@@ -5,7 +5,7 @@
namespace bayesnet { namespace bayesnet {
using namespace std; using namespace std;
using namespace torch; using namespace torch;
vector<int> argsort(vector<float>& nums); vector<int> argsort(vector<double>& nums);
vector<vector<int>> tensorToVector(Tensor& tensor); vector<vector<int>> tensorToVector(Tensor& tensor);
} }
#endif //BAYESNET_UTILS_H #endif //BAYESNET_UTILS_H

10
src/Platform/BestResult.h Normal file
View File

@@ -0,0 +1,10 @@
#ifndef BESTRESULT_H
#define BESTRESULT_H
#include <string>
class BestResult {
public:
static std::string title() { return "STree_default (linear-ovo)"; }
static double score() { return 22.109799; }
static std::string scoreName() { return "accuracy"; }
};
#endif

View File

@@ -5,4 +5,8 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc Report.cc) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc Report.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") add_executable(manage manage.cc Results.cc Report.cc)
add_executable(list list.cc platformUtils Datasets.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(manage "${TORCH_LIBRARIES}")
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")

14
src/Platform/Colors.h Normal file
View File

@@ -0,0 +1,14 @@
#ifndef COLORS_H
#define COLORS_H
class Colors {
public:
static std::string MAGENTA() { return "\033[1;35m"; }
static std::string BLUE() { return "\033[1;34m"; }
static std::string CYAN() { return "\033[1;36m"; }
static std::string GREEN() { return "\033[1;32m"; }
static std::string YELLOW() { return "\033[1;33m"; }
static std::string RED() { return "\033[1;31m"; }
static std::string WHITE() { return "\033[1;37m"; }
static std::string RESET() { return "\033[0m"; }
};
#endif // COLORS_H

View File

@@ -24,75 +24,110 @@ namespace platform {
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; }); transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result; return result;
} }
vector<string> Datasets::getFeatures(string name) vector<string> Datasets::getFeatures(const string& name) const
{ {
if (datasets[name]->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets[name]->getFeatures(); return datasets.at(name)->getFeatures();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
map<string, vector<int>> Datasets::getStates(string name) map<string, vector<int>> Datasets::getStates(const string& name) const
{ {
if (datasets[name]->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets[name]->getStates(); return datasets.at(name)->getStates();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
string Datasets::getClassName(string name) void Datasets::loadDataset(const string& name) const
{ {
if (datasets[name]->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets[name]->getClassName(); return;
} else {
datasets.at(name)->load();
}
}
string Datasets::getClassName(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getClassName();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
int Datasets::getNSamples(string name) int Datasets::getNSamples(const string& name) const
{ {
if (datasets[name]->isLoaded()) { if (datasets.at(name)->isLoaded()) {
return datasets[name]->getNSamples(); return datasets.at(name)->getNSamples();
} else { } else {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(string name) int Datasets::getNClasses(const string& name)
{
if (datasets.at(name)->isLoaded()) {
auto className = datasets.at(name)->getClassName();
if (discretize) {
auto states = getStates(name);
return states.at(className).size();
}
auto [Xv, yv] = getVectors(name);
return *max_element(yv.begin(), yv.end()) + 1;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
vector<int> Datasets::getClassesCounts(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
auto [Xv, yv] = datasets.at(name)->getVectors();
vector<int> counts(*max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) {
counts[y]++;
}
return counts;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(const string& name)
{ {
if (!datasets[name]->isLoaded()) { if (!datasets[name]->isLoaded()) {
datasets[name]->load(); datasets[name]->load();
} }
return datasets[name]->getVectors(); return datasets[name]->getVectors();
} }
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(string name) pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(const string& name)
{ {
if (!datasets[name]->isLoaded()) { if (!datasets[name]->isLoaded()) {
datasets[name]->load(); datasets[name]->load();
} }
return datasets[name]->getVectorsDiscretized(); return datasets[name]->getVectorsDiscretized();
} }
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(string name) pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const string& name)
{ {
if (!datasets[name]->isLoaded()) { if (!datasets[name]->isLoaded()) {
datasets[name]->load(); datasets[name]->load();
} }
return datasets[name]->getTensors(); return datasets[name]->getTensors();
} }
bool Datasets::isDataset(const string& name) bool Datasets::isDataset(const string& name) const
{ {
return datasets.find(name) != datasets.end(); return datasets.find(name) != datasets.end();
} }
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType) Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{ {
} }
string Dataset::getName() string Dataset::getName() const
{ {
return name; return name;
} }
string Dataset::getClassName() string Dataset::getClassName() const
{ {
return className; return className;
} }
vector<string> Dataset::getFeatures() vector<string> Dataset::getFeatures() const
{ {
if (loaded) { if (loaded) {
return features; return features;
@@ -100,7 +135,7 @@ namespace platform {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
int Dataset::getNFeatures() int Dataset::getNFeatures() const
{ {
if (loaded) { if (loaded) {
return n_features; return n_features;
@@ -108,7 +143,7 @@ namespace platform {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
int Dataset::getNSamples() int Dataset::getNSamples() const
{ {
if (loaded) { if (loaded) {
return n_samples; return n_samples;
@@ -116,7 +151,7 @@ namespace platform {
throw invalid_argument("Dataset not loaded."); throw invalid_argument("Dataset not loaded.");
} }
} }
map<string, vector<int>> Dataset::getStates() map<string, vector<int>> Dataset::getStates() const
{ {
if (loaded) { if (loaded) {
return states; return states;

View File

@@ -29,15 +29,15 @@ namespace platform {
public: public:
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&); explicit Dataset(const Dataset&);
string getName(); string getName() const;
string getClassName(); string getClassName() const;
vector<string> getFeatures(); vector<string> getFeatures() const;
map<string, vector<int>> getStates(); map<string, vector<int>> getStates() const;
pair<vector<vector<float>>&, vector<int>&> getVectors(); pair<vector<vector<float>>&, vector<int>&> getVectors();
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(); pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
pair<torch::Tensor&, torch::Tensor&> getTensors(); pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures(); int getNFeatures() const;
int getNSamples(); int getNSamples() const;
void load(); void load();
const bool inline isLoaded() const { return loaded; }; const bool inline isLoaded() const { return loaded; };
}; };
@@ -51,14 +51,17 @@ namespace platform {
public: public:
explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); }; explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
vector<string> getNames(); vector<string> getNames();
vector<string> getFeatures(string name); vector<string> getFeatures(const string& name) const;
int getNSamples(string name); int getNSamples(const string& name) const;
string getClassName(string name); string getClassName(const string& name) const;
map<string, vector<int>> getStates(string name); int getNClasses(const string& name);
pair<vector<vector<float>>&, vector<int>&> getVectors(string name); vector<int> getClassesCounts(const string& name) const;
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(string name); map<string, vector<int>> getStates(const string& name) const;
pair<torch::Tensor&, torch::Tensor&> getTensors(string name); pair<vector<vector<float>>&, vector<int>&> getVectors(const string& name);
bool isDataset(const string& name); pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(const string& name);
pair<torch::Tensor&, torch::Tensor&> getTensors(const string& name);
bool isDataset(const string& name) const;
void loadDataset(const string& name) const;
}; };
}; };

View File

@@ -10,6 +10,7 @@
#include "KDBLd.h" #include "KDBLd.h"
#include "SPODELd.h" #include "SPODELd.h"
#include "AODELd.h" #include "AODELd.h"
#include "BoostAODE.h"
namespace platform { namespace platform {
class Models { class Models {
private: private:

11
src/Platform/Paths.h Normal file
View File

@@ -0,0 +1,11 @@
#ifndef PATHS_H
#define PATHS_H
#include <string>
namespace platform {
class Paths {
public:
static std::string datasets() { return "datasets/"; }
static std::string results() { return "results/"; }
};
}
#endif

View File

@@ -1,66 +1,115 @@
#include <sstream>
#include <locale>
#include "Report.h" #include "Report.h"
#include "BestResult.h"
namespace platform { namespace platform {
string headerLine(const string& text) string headerLine(const string& text)
{ {
int n = MAXL - text.length() - 3; int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n;
return "* " + text + string(n, ' ') + "*\n"; return "* " + text + string(n, ' ') + "*\n";
} }
string Report::fromVector(const string& key) string Report::fromVector(const string& key)
{ {
string result = ""; stringstream oss;
string sep = "";
oss << "[";
for (auto& item : data[key]) { for (auto& item : data[key]) {
result += to_string(item) + ", "; oss << sep << item.get<double>();
sep = ", ";
} }
return "[" + result.substr(0, result.length() - 2) + "]"; oss << "]";
return oss.str();
} }
string fVector(const json& data) string fVector(const string& title, const json& data, const int width, const int precision)
{ {
string result = ""; stringstream oss;
string sep = "";
oss << title << "[";
for (const auto& item : data) { for (const auto& item : data) {
result += to_string(item) + ", "; oss << sep << fixed << setw(width) << setprecision(precision) << item.get<double>();
sep = ", ";
} }
return "[" + result.substr(0, result.length() - 2) + "]"; oss << "]";
return oss.str();
} }
void Report::show() void Report::show()
{ {
header(); header();
body(); body();
footer();
} }
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
void Report::header() void Report::header()
{ {
cout << string(MAXL, '*') << endl; locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
stringstream oss;
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
cout << headerLine("Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>()); cout << headerLine("Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>());
cout << headerLine(data["title"].get<string>()); cout << headerLine(data["title"].get<string>());
cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False")); cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
cout << headerLine("Execution took " + to_string(data["duration"].get<float>()) + " seconds, " + to_string(data["duration"].get<float>() / 3600) + " hours, on " + data["platform"].get<string>()); oss << "Execution took " << setprecision(2) << fixed << data["duration"].get<float>() << " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<string>();
cout << headerLine(oss.str());
cout << headerLine("Score is " + data["score_name"].get<string>()); cout << headerLine("Score is " + data["score_name"].get<string>());
cout << string(MAXL, '*') << endl; cout << string(MAXL, '*') << endl;
cout << endl; cout << endl;
} }
void Report::body() void Report::body()
{ {
cout << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl; cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl;
cout << "============================== ====== ===== === ======= ======= ======= =============== ================= ===============" << endl; cout << "============================== ====== ===== === ========= ========= ========= =============== ================== ===============" << endl;
json lastResult;
totalScore = 0;
bool odd = true;
for (const auto& r : data["results"]) { for (const auto& r : data["results"]) {
cout << setw(30) << left << r["dataset"].get<string>() << " "; auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color << setw(30) << left << r["dataset"].get<string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " "; cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " "; cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " "; cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(7) << setprecision(2) << fixed << r["nodes"].get<float>() << " "; cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " ";
cout << setw(7) << setprecision(2) << fixed << r["leaves"].get<float>() << " "; cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " ";
cout << setw(7) << setprecision(2) << fixed << r["depth"].get<float>() << " "; cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score_test"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_test_std"].get<double>() << " "; cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>() << " ";
cout << setw(10) << right << setprecision(6) << fixed << r["test_time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["test_time_std"].get<double>() << " "; cout << setw(11) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " ";
cout << " " << r["hyperparameters"].get<string>(); try {
cout << r["hyperparameters"].get<string>();
}
catch (const exception& err) {
cout << r["hyperparameters"];
}
cout << endl; cout << endl;
lastResult = r;
totalScore += r["score"].get<double>();
odd = !odd;
}
if (data["results"].size() == 1) {
cout << string(MAXL, '*') << endl; cout << string(MAXL, '*') << endl;
cout << headerLine("Train scores: " + fVector(r["scores_train"])); cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
cout << headerLine("Test scores: " + fVector(r["scores_test"])); cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
cout << headerLine("Train times: " + fVector(r["times_train"])); cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
cout << headerLine("Test times: " + fVector(r["times_test"])); cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
cout << string(MAXL, '*') << endl; cout << string(MAXL, '*') << endl;
} }
} }
void Report::footer()
{
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
auto score = data["score_name"].get<string>();
if (score == BestResult::scoreName()) {
stringstream oss;
oss << score << " compared to " << BestResult::title() << " .: " << totalScore / BestResult::score();
cout << headerLine(oss.str());
}
cout << string(MAXL, '*') << endl << Colors::RESET();
}
} }

View File

@@ -3,9 +3,10 @@
#include <string> #include <string>
#include <iostream> #include <iostream>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "Colors.h"
using json = nlohmann::json; using json = nlohmann::json;
const int MAXL = 121; const int MAXL = 128;
namespace platform { namespace platform {
using namespace std; using namespace std;
class Report { class Report {
@@ -16,8 +17,10 @@ namespace platform {
private: private:
void header(); void header();
void body(); void body();
void footer();
string fromVector(const string& key); string fromVector(const string& key);
json data; json data;
double totalScore; // Total score of all results in a report
}; };
}; };
#endif #endif

239
src/Platform/Results.cc Normal file
View File

@@ -0,0 +1,239 @@
#include <filesystem>
#include "platformUtils.h"
#include "Results.h"
#include "Report.h"
#include "BestResult.h"
#include "Colors.h"
namespace platform {
Result::Result(const string& path, const string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
if (scoreName == BestResult::scoreName()) {
score /= BestResult::score();
}
title = data["title"];
duration = data["duration"];
model = data["model"];
}
json Result::load() const
{
ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
void Results::load()
{
using std::filesystem::directory_iterator;
for (const auto& file : directory_iterator(path)) {
auto filename = file.path().filename().string();
if (filename.find(".json") != string::npos && filename.find("results_") == 0) {
auto result = Result(path, filename);
bool addResult = true;
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName())
addResult = false;
if (addResult)
files.push_back(result);
}
}
}
string Result::to_string() const
{
stringstream oss;
oss << date << " ";
oss << setw(12) << left << model << " ";
oss << setw(11) << left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
oss << setw(9) << setprecision(3) << fixed << duration << " ";
oss << setw(50) << left << title << " ";
return oss.str();
}
void Results::show() const
{
cout << Colors::GREEN() << "Results found: " << files.size() << endl;
cout << "-------------------" << endl;
auto i = 0;
cout << " # Date Model Score Name Score Duration Title" << endl;
cout << "=== ========== ============ =========== =========== ========= =============================================================" << endl;
bool odd = true;
for (const auto& result : files) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << result.to_string() << endl;
if (i == max && max != 0) {
break;
}
odd = !odd;
}
}
int Results::getIndex(const string& intent) const
{
string color;
if (intent == "delete") {
color = Colors::RED();
} else {
color = Colors::YELLOW();
}
cout << color << "Choose result to " << intent << " (cancel=-1): ";
string line;
getline(cin, line);
int index = stoi(line);
if (index >= -1 && index < static_cast<int>(files.size())) {
return index;
}
cout << "Invalid index" << endl;
return -1;
}
void Results::report(const int index) const
{
cout << Colors::YELLOW() << "Reporting " << files.at(index).getFilename() << endl;
auto data = files.at(index).load();
Report report(data);
report.show();
}
void Results::menu()
{
char option;
int index;
bool finished = false;
string filename, line, options = "qldhsr";
while (!finished) {
cout << Colors::RESET() << "Choose option (quit='q', list='l', delete='d', hide='h', sort='s', report='r'): ";
getline(cin, line);
if (line.size() == 0)
continue;
if (options.find(line[0]) != string::npos) {
if (line.size() > 1) {
cout << "Invalid option" << endl;
continue;
}
option = line[0];
} else {
index = stoi(line);
if (index >= 0 && index < files.size()) {
report(index);
} else {
cout << "Invalid option" << endl;
}
continue;
}
switch (option) {
case 'q':
finished = true;
break;
case 'l':
show();
break;
case 'd':
index = getIndex("delete");
if (index == -1)
break;
filename = files[index].getFilename();
cout << "Deleting " << filename << endl;
remove((path + "/" + filename).c_str());
files.erase(files.begin() + index);
cout << "File: " + filename + " deleted!" << endl;
show();
break;
case 'h':
index = getIndex("hide");
if (index == -1)
break;
filename = files[index].getFilename();
cout << "Hiding " << filename << endl;
rename((path + "/" + filename).c_str(), (path + "/." + filename).c_str());
files.erase(files.begin() + index);
show();
menu();
break;
case 's':
sortList();
show();
break;
case 'r':
index = getIndex("report");
if (index == -1)
break;
report(index);
break;
default:
cout << "Invalid option" << endl;
}
}
}
void Results::sortList()
{
cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): ";
string line;
char option;
getline(cin, line);
if (line.size() == 0)
return;
if (line.size() > 1) {
cout << "Invalid option" << endl;
return;
}
option = line[0];
switch (option) {
case 'd':
sortDate();
break;
case 's':
sortScore();
break;
case 'u':
sortDuration();
break;
case 'm':
sortModel();
break;
default:
cout << "Invalid option" << endl;
}
}
void Results::sortDate()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDate() > b.getDate();
});
}
void Results::sortModel()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getModel() > b.getModel();
});
}
void Results::sortDuration()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDuration() > b.getDuration();
});
}
void Results::sortScore()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getScore() > b.getScore();
});
}
void Results::manage()
{
if (files.size() == 0) {
cout << "No results found!" << endl;
exit(0);
}
show();
menu();
cout << "Done!" << endl;
}
}

56
src/Platform/Results.h Normal file
View File

@@ -0,0 +1,56 @@
#ifndef RESULTS_H
#define RESULTS_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using namespace std;
using json = nlohmann::json;
class Result {
public:
Result(const string& path, const string& filename);
json load() const;
string to_string() const;
string getFilename() const { return filename; };
string getDate() const { return date; };
double getScore() const { return score; };
string getTitle() const { return title; };
double getDuration() const { return duration; };
string getModel() const { return model; };
string getScoreName() const { return scoreName; };
private:
string path;
string filename;
string date;
double score;
string title;
double duration;
string model;
string scoreName;
};
class Results {
public:
Results(const string& path, const int max, const string& model, const string& score) : path(path), max(max), model(model), scoreName(score) { load(); };
void manage();
private:
string path;
int max;
string model;
string scoreName;
vector<Result> files;
void load(); // Loads the list of results
void show() const;
void report(const int index) const;
int getIndex(const string& intent) const;
void menu();
void sortList();
void sortDate();
void sortScore();
void sortModel();
void sortDuration();
};
};
#endif

57
src/Platform/list.cc Normal file
View File

@@ -0,0 +1,57 @@
#include <iostream>
#include <locale>
#include "Paths.h"
#include "Colors.h"
#include "Datasets.h"
using namespace std;
const int BALANCE_LENGTH = 75;
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
void outputBalance(const string& balance)
{
auto temp = string(balance);
while (temp.size() > BALANCE_LENGTH - 1) {
auto part = temp.substr(0, BALANCE_LENGTH);
cout << part << endl;
cout << setw(48) << " ";
temp = temp.substr(BALANCE_LENGTH);
}
cout << temp << endl;
}
int main(int argc, char** argv)
{
auto data = platform::Datasets(platform::Paths().datasets(), false);
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << endl;
string balanceBars = string(BALANCE_LENGTH, '=');
cout << "============================== ====== ===== === " << balanceBars << endl;
bool odd = true;
for (const auto& dataset : data.getNames()) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color << setw(30) << left << dataset << " ";
data.loadDataset(dataset);
auto nSamples = data.getNSamples(dataset);
cout << setw(6) << right << nSamples << " ";
cout << setw(5) << right << data.getFeatures(dataset).size() << " ";
cout << setw(3) << right << data.getNClasses(dataset) << " ";
stringstream oss;
string sep = "";
for (auto number : data.getClassesCounts(dataset)) {
oss << sep << setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
sep = " / ";
}
outputBalance(oss.str());
odd = !odd;
}
cout << Colors::RESET() << endl;
return 0;
}

View File

@@ -6,20 +6,19 @@
#include "DotEnv.h" #include "DotEnv.h"
#include "Models.h" #include "Models.h"
#include "modelRegister.h" #include "modelRegister.h"
#include "Paths.h"
using namespace std; using namespace std;
const string PATH_RESULTS = "results";
const string PATH_DATASETS = "datasets";
argparse::ArgumentParser manageArguments(int argc, char** argv) argparse::ArgumentParser manageArguments(int argc, char** argv)
{ {
auto env = platform::DotEnv(); auto env = platform::DotEnv();
argparse::ArgumentParser program("BayesNetSample"); argparse::ArgumentParser program("main");
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name"); program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("-p", "--path") program.add_argument("-p", "--path")
.help("folder where the data files are located, default") .help("folder where the data files are located, default")
.default_value(string{ PATH_DATASETS } .default_value(string{ platform::Paths::datasets() });
);
program.add_argument("-m", "--model") program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString()) .help("Model to use " + platform::Models::instance()->toString())
.action([](const std::string& value) { .action([](const std::string& value) {
@@ -104,7 +103,7 @@ int main(int argc, char** argv)
*/ */
auto env = platform::DotEnv(); auto env = platform::DotEnv();
auto experiment = platform::Experiment(); auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0"); experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy"); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
for (auto seed : seeds) { for (auto seed : seeds) {
@@ -115,7 +114,7 @@ int main(int argc, char** argv)
experiment.go(filesToTest, path); experiment.go(filesToTest, path);
experiment.setDuration(timer.getDuration()); experiment.setDuration(timer.getDuration());
if (saveResults) if (saveResults)
experiment.save(PATH_RESULTS); experiment.save(platform::Paths::results());
else else
experiment.report(); experiment.report();
cout << "Done!" << endl; cout << "Done!" << endl;

41
src/Platform/manage.cc Normal file
View File

@@ -0,0 +1,41 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "platformUtils.h"
#include "Paths.h"
#include "Results.h"
using namespace std;
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
argparse::ArgumentParser program("manage");
program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>();
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
try {
program.parse_args(argc, argv);
auto number = program.get<int>("number");
if (number < 0) {
throw runtime_error("Number of results must be greater than or equal to 0");
}
auto model = program.get<string>("model");
auto score = program.get<string>("score");
}
catch (const exception& err) {
cerr << err.what() << endl;
cerr << program;
exit(1);
}
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
auto number = program.get<int>("number");
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto results = platform::Results(platform::Paths::results(), number, model, score);
results.manage();
return 0;
}

View File

@@ -16,4 +16,6 @@ static platform::Registrar registrarA("AODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
static platform::Registrar registrarALD("AODELd", static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
static platform::Registrar registrarBA("BoostAODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
#endif #endif

View File

@@ -1,4 +1,5 @@
#include "platformUtils.h" #include "platformUtils.h"
#include "Paths.h"
using namespace torch; using namespace torch;
@@ -85,7 +86,7 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name) tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name)
{ {
auto handler = ArffFiles(); auto handler = ArffFiles();
handler.load(PATH + static_cast<string>(name) + ".arff"); handler.load(platform::Paths::datasets() + static_cast<string>(name) + ".arff");
// Get Dataset X, y // Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX(); vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY(); mdlp::labels_t& y = handler.getY();