Add status to classifier and Experiment
This commit is contained in:
parent
64fc7bd9dd
commit
5a7c8f1818
386
sample/sample.cc
386
sample/sample.cc
@ -58,180 +58,226 @@ pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vect
|
|||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
map<string, bool> datasets = {
|
torch::Tensor weights_ = torch::full({ 10 }, 1.0 / 10, torch::kFloat64);
|
||||||
{"diabetes", true},
|
torch::Tensor y_ = torch::tensor({ 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }, torch::kInt32);
|
||||||
{"ecoli", true},
|
torch::Tensor ypred = torch::tensor({ 1, 1, 1, 0, 0, 1, 1, 1, 1, 0 }, torch::kInt32);
|
||||||
{"glass", true},
|
cout << "Initial weights_: " << endl;
|
||||||
{"iris", true},
|
for (int i = 0; i < 10; i++) {
|
||||||
{"kdd_JapaneseVowels", false},
|
cout << weights_.index({ i }).item<double>() << ", ";
|
||||||
{"letter", true},
|
|
||||||
{"liver-disorders", true},
|
|
||||||
{"mfeat-factors", true},
|
|
||||||
};
|
|
||||||
auto valid_datasets = vector<string>();
|
|
||||||
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
|
|
||||||
[](const pair<string, bool>& pair) { return pair.first; });
|
|
||||||
argparse::ArgumentParser program("BayesNetSample");
|
|
||||||
program.add_argument("-d", "--dataset")
|
|
||||||
.help("Dataset file name")
|
|
||||||
.action([valid_datasets](const std::string& value) {
|
|
||||||
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
|
|
||||||
}
|
|
||||||
);
|
|
||||||
program.add_argument("-p", "--path")
|
|
||||||
.help(" folder where the data files are located, default")
|
|
||||||
.default_value(string{ PATH }
|
|
||||||
);
|
|
||||||
program.add_argument("-m", "--model")
|
|
||||||
.help("Model to use " + platform::Models::instance()->toString())
|
|
||||||
.action([](const std::string& value) {
|
|
||||||
static const vector<string> choices = platform::Models::instance()->getNames();
|
|
||||||
if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
|
|
||||||
}
|
|
||||||
);
|
|
||||||
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
|
|
||||||
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
|
|
||||||
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
|
|
||||||
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
|
|
||||||
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
|
|
||||||
try {
|
|
||||||
auto k = stoi(value);
|
|
||||||
if (k < 2) {
|
|
||||||
throw runtime_error("Number of folds must be greater than 1");
|
|
||||||
}
|
|
||||||
return k;
|
|
||||||
}
|
|
||||||
catch (const runtime_error& err) {
|
|
||||||
throw runtime_error(err.what());
|
|
||||||
}
|
|
||||||
catch (...) {
|
|
||||||
throw runtime_error("Number of folds must be an integer");
|
|
||||||
}});
|
|
||||||
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
|
|
||||||
bool class_last, stratified, tensors, dump_cpt;
|
|
||||||
string model_name, file_name, path, complete_file_name;
|
|
||||||
int nFolds, seed;
|
|
||||||
try {
|
|
||||||
program.parse_args(argc, argv);
|
|
||||||
file_name = program.get<string>("dataset");
|
|
||||||
path = program.get<string>("path");
|
|
||||||
model_name = program.get<string>("model");
|
|
||||||
complete_file_name = path + file_name + ".arff";
|
|
||||||
stratified = program.get<bool>("stratified");
|
|
||||||
tensors = program.get<bool>("tensors");
|
|
||||||
nFolds = program.get<int>("folds");
|
|
||||||
seed = program.get<int>("seed");
|
|
||||||
dump_cpt = program.get<bool>("dumpcpt");
|
|
||||||
class_last = datasets[file_name];
|
|
||||||
if (!file_exists(complete_file_name)) {
|
|
||||||
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch (const exception& err) {
|
cout << "end." << endl;
|
||||||
cerr << err.what() << endl;
|
cout << "y_: " << endl;
|
||||||
cerr << program;
|
for (int i = 0; i < 10; i++) {
|
||||||
exit(1);
|
cout << y_.index({ i }).item<int>() << ", ";
|
||||||
}
|
}
|
||||||
|
cout << "end." << endl;
|
||||||
|
cout << "ypred: " << endl;
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
cout << ypred.index({ i }).item<int>() << ", ";
|
||||||
|
}
|
||||||
|
cout << "end." << endl;
|
||||||
|
auto mask_wrong = ypred != y_;
|
||||||
|
auto mask_right = ypred == y_;
|
||||||
|
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
|
||||||
|
double epsilon_t = masked_weights.sum().item<double>();
|
||||||
|
cout << "epsilon_t: " << epsilon_t << endl;
|
||||||
|
double wt = (1 - epsilon_t) / epsilon_t;
|
||||||
|
cout << "wt: " << wt << endl;
|
||||||
|
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
|
||||||
|
cout << "alpha_t: " << alpha_t << endl;
|
||||||
|
// Step 3.2: Update weights for next classifier
|
||||||
|
// Step 3.2.1: Update weights of wrong samples
|
||||||
|
cout << "exp(alpha_t): " << exp(alpha_t) << endl;
|
||||||
|
cout << "exp(-alpha_t): " << exp(-alpha_t) << endl;
|
||||||
|
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
|
||||||
|
// Step 3.2.2: Update weights of right samples
|
||||||
|
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
|
||||||
|
// Step 3.3: Normalise the weights
|
||||||
|
double totalWeights = torch::sum(weights_).item<double>();
|
||||||
|
cout << "totalWeights: " << totalWeights << endl;
|
||||||
|
cout << "Before normalization: " << endl;
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
cout << weights_.index({ i }).item<double>() << endl;
|
||||||
|
}
|
||||||
|
weights_ = weights_ / totalWeights;
|
||||||
|
cout << "After normalization: " << endl;
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
cout << weights_.index({ i }).item<double>() << endl;
|
||||||
|
}
|
||||||
|
// map<string, bool> datasets = {
|
||||||
|
// {"diabetes", true},
|
||||||
|
// {"ecoli", true},
|
||||||
|
// {"glass", true},
|
||||||
|
// {"iris", true},
|
||||||
|
// {"kdd_JapaneseVowels", false},
|
||||||
|
// {"letter", true},
|
||||||
|
// {"liver-disorders", true},
|
||||||
|
// {"mfeat-factors", true},
|
||||||
|
// };
|
||||||
|
// auto valid_datasets = vector<string>();
|
||||||
|
// transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
|
||||||
|
// [](const pair<string, bool>& pair) { return pair.first; });
|
||||||
|
// argparse::ArgumentParser program("BayesNetSample");
|
||||||
|
// program.add_argument("-d", "--dataset")
|
||||||
|
// .help("Dataset file name")
|
||||||
|
// .action([valid_datasets](const std::string& value) {
|
||||||
|
// if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
|
||||||
|
// return value;
|
||||||
|
// }
|
||||||
|
// throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
|
||||||
|
// }
|
||||||
|
// );
|
||||||
|
// program.add_argument("-p", "--path")
|
||||||
|
// .help(" folder where the data files are located, default")
|
||||||
|
// .default_value(string{ PATH }
|
||||||
|
// );
|
||||||
|
// program.add_argument("-m", "--model")
|
||||||
|
// .help("Model to use " + platform::Models::instance()->toString())
|
||||||
|
// .action([](const std::string& value) {
|
||||||
|
// static const vector<string> choices = platform::Models::instance()->getNames();
|
||||||
|
// if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
||||||
|
// return value;
|
||||||
|
// }
|
||||||
|
// throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
|
||||||
|
// }
|
||||||
|
// );
|
||||||
|
// program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
|
||||||
|
// program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
|
||||||
|
// program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
|
||||||
|
// program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
|
||||||
|
// program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
|
||||||
|
// try {
|
||||||
|
// auto k = stoi(value);
|
||||||
|
// if (k < 2) {
|
||||||
|
// throw runtime_error("Number of folds must be greater than 1");
|
||||||
|
// }
|
||||||
|
// return k;
|
||||||
|
// }
|
||||||
|
// catch (const runtime_error& err) {
|
||||||
|
// throw runtime_error(err.what());
|
||||||
|
// }
|
||||||
|
// catch (...) {
|
||||||
|
// throw runtime_error("Number of folds must be an integer");
|
||||||
|
// }});
|
||||||
|
// program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
|
||||||
|
// bool class_last, stratified, tensors, dump_cpt;
|
||||||
|
// string model_name, file_name, path, complete_file_name;
|
||||||
|
// int nFolds, seed;
|
||||||
|
// try {
|
||||||
|
// program.parse_args(argc, argv);
|
||||||
|
// file_name = program.get<string>("dataset");
|
||||||
|
// path = program.get<string>("path");
|
||||||
|
// model_name = program.get<string>("model");
|
||||||
|
// complete_file_name = path + file_name + ".arff";
|
||||||
|
// stratified = program.get<bool>("stratified");
|
||||||
|
// tensors = program.get<bool>("tensors");
|
||||||
|
// nFolds = program.get<int>("folds");
|
||||||
|
// seed = program.get<int>("seed");
|
||||||
|
// dump_cpt = program.get<bool>("dumpcpt");
|
||||||
|
// class_last = datasets[file_name];
|
||||||
|
// if (!file_exists(complete_file_name)) {
|
||||||
|
// throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// catch (const exception& err) {
|
||||||
|
// cerr << err.what() << endl;
|
||||||
|
// cerr << program;
|
||||||
|
// exit(1);
|
||||||
|
// }
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Begin Processing
|
* Begin Processing
|
||||||
*/
|
*/
|
||||||
auto handler = ArffFiles();
|
// auto handler = ArffFiles();
|
||||||
handler.load(complete_file_name, class_last);
|
// handler.load(complete_file_name, class_last);
|
||||||
// Get Dataset X, y
|
// // Get Dataset X, y
|
||||||
vector<mdlp::samples_t>& X = handler.getX();
|
// vector<mdlp::samples_t>& X = handler.getX();
|
||||||
mdlp::labels_t& y = handler.getY();
|
// mdlp::labels_t& y = handler.getY();
|
||||||
// Get className & Features
|
// // Get className & Features
|
||||||
auto className = handler.getClassName();
|
// auto className = handler.getClassName();
|
||||||
vector<string> features;
|
// vector<string> features;
|
||||||
auto attributes = handler.getAttributes();
|
// auto attributes = handler.getAttributes();
|
||||||
transform(attributes.begin(), attributes.end(), back_inserter(features),
|
// transform(attributes.begin(), attributes.end(), back_inserter(features),
|
||||||
[](const pair<string, string>& item) { return item.first; });
|
// [](const pair<string, string>& item) { return item.first; });
|
||||||
// Discretize Dataset
|
// // Discretize Dataset
|
||||||
auto [Xd, maxes] = discretize(X, y, features);
|
// auto [Xd, maxes] = discretize(X, y, features);
|
||||||
maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
// maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
||||||
map<string, vector<int>> states;
|
// map<string, vector<int>> states;
|
||||||
for (auto feature : features) {
|
// for (auto feature : features) {
|
||||||
states[feature] = vector<int>(maxes[feature]);
|
// states[feature] = vector<int>(maxes[feature]);
|
||||||
}
|
// }
|
||||||
states[className] = vector<int>(maxes[className]);
|
// states[className] = vector<int>(maxes[className]);
|
||||||
auto clf = platform::Models::instance()->create(model_name);
|
// auto clf = platform::Models::instance()->create(model_name);
|
||||||
clf->fit(Xd, y, features, className, states);
|
// clf->fit(Xd, y, features, className, states);
|
||||||
if (dump_cpt) {
|
// if (dump_cpt) {
|
||||||
cout << "--- CPT Tables ---" << endl;
|
// cout << "--- CPT Tables ---" << endl;
|
||||||
clf->dump_cpt();
|
// clf->dump_cpt();
|
||||||
}
|
// }
|
||||||
auto lines = clf->show();
|
// auto lines = clf->show();
|
||||||
for (auto line : lines) {
|
// for (auto line : lines) {
|
||||||
cout << line << endl;
|
// cout << line << endl;
|
||||||
}
|
// }
|
||||||
cout << "--- Topological Order ---" << endl;
|
// cout << "--- Topological Order ---" << endl;
|
||||||
auto order = clf->topological_order();
|
// auto order = clf->topological_order();
|
||||||
for (auto name : order) {
|
// for (auto name : order) {
|
||||||
cout << name << ", ";
|
// cout << name << ", ";
|
||||||
}
|
// }
|
||||||
cout << "end." << endl;
|
// cout << "end." << endl;
|
||||||
auto score = clf->score(Xd, y);
|
// auto score = clf->score(Xd, y);
|
||||||
cout << "Score: " << score << endl;
|
// cout << "Score: " << score << endl;
|
||||||
auto graph = clf->graph();
|
// auto graph = clf->graph();
|
||||||
auto dot_file = model_name + "_" + file_name;
|
// auto dot_file = model_name + "_" + file_name;
|
||||||
ofstream file(dot_file + ".dot");
|
// ofstream file(dot_file + ".dot");
|
||||||
file << graph;
|
// file << graph;
|
||||||
file.close();
|
// file.close();
|
||||||
cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
|
// cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
|
||||||
cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
|
// cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
|
||||||
string stratified_string = stratified ? " Stratified" : "";
|
// string stratified_string = stratified ? " Stratified" : "";
|
||||||
cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
|
// cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
|
||||||
cout << "==========================================" << endl;
|
// cout << "==========================================" << endl;
|
||||||
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
// torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
||||||
torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
// torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
// for (int i = 0; i < features.size(); ++i) {
|
||||||
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
// Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||||
}
|
// }
|
||||||
float total_score = 0, total_score_train = 0, score_train, score_test;
|
// float total_score = 0, total_score_train = 0, score_train, score_test;
|
||||||
platform::Fold* fold;
|
// platform::Fold* fold;
|
||||||
if (stratified)
|
// if (stratified)
|
||||||
fold = new platform::StratifiedKFold(nFolds, y, seed);
|
// fold = new platform::StratifiedKFold(nFolds, y, seed);
|
||||||
else
|
// else
|
||||||
fold = new platform::KFold(nFolds, y.size(), seed);
|
// fold = new platform::KFold(nFolds, y.size(), seed);
|
||||||
for (auto i = 0; i < nFolds; ++i) {
|
// for (auto i = 0; i < nFolds; ++i) {
|
||||||
auto [train, test] = fold->getFold(i);
|
// auto [train, test] = fold->getFold(i);
|
||||||
cout << "Fold: " << i + 1 << endl;
|
// cout << "Fold: " << i + 1 << endl;
|
||||||
if (tensors) {
|
// if (tensors) {
|
||||||
auto ttrain = torch::tensor(train, torch::kInt64);
|
// auto ttrain = torch::tensor(train, torch::kInt64);
|
||||||
auto ttest = torch::tensor(test, torch::kInt64);
|
// auto ttest = torch::tensor(test, torch::kInt64);
|
||||||
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
// torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
||||||
torch::Tensor ytraint = yt.index({ ttrain });
|
// torch::Tensor ytraint = yt.index({ ttrain });
|
||||||
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
// torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
||||||
torch::Tensor ytestt = yt.index({ ttest });
|
// torch::Tensor ytestt = yt.index({ ttest });
|
||||||
clf->fit(Xtraint, ytraint, features, className, states);
|
// clf->fit(Xtraint, ytraint, features, className, states);
|
||||||
auto temp = clf->predict(Xtraint);
|
// auto temp = clf->predict(Xtraint);
|
||||||
score_train = clf->score(Xtraint, ytraint);
|
// score_train = clf->score(Xtraint, ytraint);
|
||||||
score_test = clf->score(Xtestt, ytestt);
|
// score_test = clf->score(Xtestt, ytestt);
|
||||||
} else {
|
// } else {
|
||||||
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
// auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
||||||
auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
// auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
||||||
clf->fit(Xtrain, ytrain, features, className, states);
|
// clf->fit(Xtrain, ytrain, features, className, states);
|
||||||
score_train = clf->score(Xtrain, ytrain);
|
// score_train = clf->score(Xtrain, ytrain);
|
||||||
score_test = clf->score(Xtest, ytest);
|
// score_test = clf->score(Xtest, ytest);
|
||||||
}
|
// }
|
||||||
if (dump_cpt) {
|
// if (dump_cpt) {
|
||||||
cout << "--- CPT Tables ---" << endl;
|
// cout << "--- CPT Tables ---" << endl;
|
||||||
clf->dump_cpt();
|
// clf->dump_cpt();
|
||||||
}
|
// }
|
||||||
total_score_train += score_train;
|
// total_score_train += score_train;
|
||||||
total_score += score_test;
|
// total_score += score_test;
|
||||||
cout << "Score Train: " << score_train << endl;
|
// cout << "Score Train: " << score_train << endl;
|
||||||
cout << "Score Test : " << score_test << endl;
|
// cout << "Score Test : " << score_test << endl;
|
||||||
cout << "-------------------------------------------------------------------------------" << endl;
|
// cout << "-------------------------------------------------------------------------------" << endl;
|
||||||
}
|
// }
|
||||||
cout << "**********************************************************************************" << endl;
|
// cout << "**********************************************************************************" << endl;
|
||||||
cout << "Average Score Train: " << total_score_train / nFolds << endl;
|
// cout << "Average Score Train: " << total_score_train / nFolds << endl;
|
||||||
cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
|
// cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
|
||||||
}
|
}
|
@ -5,6 +5,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
enum status_t { NORMAL, WARNING, ERROR };
|
||||||
class BaseClassifier {
|
class BaseClassifier {
|
||||||
protected:
|
protected:
|
||||||
virtual void trainModel(const torch::Tensor& weights) = 0;
|
virtual void trainModel(const torch::Tensor& weights) = 0;
|
||||||
@ -18,6 +19,7 @@ namespace bayesnet {
|
|||||||
virtual ~BaseClassifier() = default;
|
virtual ~BaseClassifier() = default;
|
||||||
torch::Tensor virtual predict(torch::Tensor& X) = 0;
|
torch::Tensor virtual predict(torch::Tensor& X) = 0;
|
||||||
vector<int> virtual predict(vector<vector<int>>& X) = 0;
|
vector<int> virtual predict(vector<vector<int>>& X) = 0;
|
||||||
|
status_t virtual getStatus() const = 0;
|
||||||
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
|
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
|
||||||
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
|
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
|
||||||
int virtual getNumberOfNodes()const = 0;
|
int virtual getNumberOfNodes()const = 0;
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include "BoostAODE.h"
|
#include "BoostAODE.h"
|
||||||
#include <set>
|
#include <set>
|
||||||
#include "BayesMetrics.h"
|
#include "BayesMetrics.h"
|
||||||
|
#include "Colors.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
BoostAODE::BoostAODE() : Ensemble() {}
|
BoostAODE::BoostAODE() : Ensemble() {}
|
||||||
@ -64,22 +65,26 @@ namespace bayesnet {
|
|||||||
auto ypred = model->predict(X_);
|
auto ypred = model->predict(X_);
|
||||||
// Step 3.1: Compute the classifier amout of say
|
// Step 3.1: Compute the classifier amout of say
|
||||||
auto mask_wrong = ypred != y_;
|
auto mask_wrong = ypred != y_;
|
||||||
|
auto mask_right = ypred == y_;
|
||||||
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
|
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
|
||||||
double wrongWeights = masked_weights.sum().item<double>();
|
double epsilon_t = masked_weights.sum().item<double>();
|
||||||
double significance = wrongWeights == 0 ? 1 : 0.5 * log((1 - wrongWeights) / wrongWeights);
|
double wt = (1 - epsilon_t) / epsilon_t;
|
||||||
|
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
|
||||||
// Step 3.2: Update weights for next classifier
|
// Step 3.2: Update weights for next classifier
|
||||||
// Step 3.2.1: Update weights of wrong samples
|
// Step 3.2.1: Update weights of wrong samples
|
||||||
weights_ += mask_wrong.to(weights_.dtype()) * exp(significance) * weights_;
|
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
|
||||||
|
// Step 3.2.2: Update weights of right samples
|
||||||
|
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
|
||||||
// Step 3.3: Normalise the weights
|
// Step 3.3: Normalise the weights
|
||||||
double totalWeights = torch::sum(weights_).item<double>();
|
double totalWeights = torch::sum(weights_).item<double>();
|
||||||
weights_ = weights_ / totalWeights;
|
weights_ = weights_ / totalWeights;
|
||||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||||
models.push_back(std::move(model));
|
models.push_back(std::move(model));
|
||||||
significanceModels.push_back(significance);
|
significanceModels.push_back(alpha_t);
|
||||||
exitCondition = n_models == maxModels && repeatSparent;
|
exitCondition = n_models == maxModels && repeatSparent || epsilon_t > 0.5;
|
||||||
}
|
}
|
||||||
if (featuresUsed.size() != features.size()) {
|
if (featuresUsed.size() != features.size()) {
|
||||||
cout << "Warning: BoostAODE did not use all the features" << endl;
|
status = WARNING;
|
||||||
}
|
}
|
||||||
weights.copy_(weights_);
|
weights.copy_(weights_);
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,7 @@ namespace bayesnet {
|
|||||||
string className;
|
string className;
|
||||||
map<string, vector<int>> states;
|
map<string, vector<int>> states;
|
||||||
Tensor dataset; // (n+1)xm tensor
|
Tensor dataset; // (n+1)xm tensor
|
||||||
|
status_t status = NORMAL;
|
||||||
void checkFitParameters();
|
void checkFitParameters();
|
||||||
virtual void buildModel(const torch::Tensor& weights) = 0;
|
virtual void buildModel(const torch::Tensor& weights) = 0;
|
||||||
void trainModel(const torch::Tensor& weights) override;
|
void trainModel(const torch::Tensor& weights) override;
|
||||||
@ -37,6 +38,7 @@ namespace bayesnet {
|
|||||||
int getNumberOfEdges() const override;
|
int getNumberOfEdges() const override;
|
||||||
int getNumberOfStates() const override;
|
int getNumberOfStates() const override;
|
||||||
Tensor predict(Tensor& X) override;
|
Tensor predict(Tensor& X) override;
|
||||||
|
status_t getStatus() const override { return status; }
|
||||||
vector<int> predict(vector<vector<int>>& X) override;
|
vector<int> predict(vector<vector<int>>& X) override;
|
||||||
float score(Tensor& X, Tensor& y) override;
|
float score(Tensor& X, Tensor& y) override;
|
||||||
float score(vector<vector<int>>& X, vector<int>& y) override;
|
float score(vector<vector<int>>& X, vector<int>& y) override;
|
||||||
|
@ -111,6 +111,26 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string getColor(bayesnet::status_t status)
|
||||||
|
{
|
||||||
|
switch (status) {
|
||||||
|
case bayesnet::NORMAL:
|
||||||
|
return Colors::GREEN();
|
||||||
|
case bayesnet::WARNING:
|
||||||
|
return Colors::YELLOW();
|
||||||
|
case bayesnet::ERROR:
|
||||||
|
return Colors::RED();
|
||||||
|
default:
|
||||||
|
return Colors::RESET();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void showProgress(int fold, const string& color, const string& phase)
|
||||||
|
{
|
||||||
|
string prefix = phase == "a" ? "" : "\b\b\b\b";
|
||||||
|
cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
|
||||||
|
|
||||||
|
}
|
||||||
void Experiment::cross_validation(const string& path, const string& fileName)
|
void Experiment::cross_validation(const string& path, const string& fileName)
|
||||||
{
|
{
|
||||||
auto datasets = platform::Datasets(path, discretized, platform::ARFF);
|
auto datasets = platform::Datasets(path, discretized, platform::ARFF);
|
||||||
@ -159,23 +179,24 @@ namespace platform {
|
|||||||
auto y_train = y.index({ train_t });
|
auto y_train = y.index({ train_t });
|
||||||
auto X_test = X.index({ "...", test_t });
|
auto X_test = X.index({ "...", test_t });
|
||||||
auto y_test = y.index({ test_t });
|
auto y_test = y.index({ test_t });
|
||||||
cout << nfold + 1 << "(a)" << flush;
|
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
|
||||||
// Train model
|
// Train model
|
||||||
clf->fit(X_train, y_train, features, className, states);
|
clf->fit(X_train, y_train, features, className, states);
|
||||||
cout << "\b\bb)" << flush;
|
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
|
||||||
nodes[item] = clf->getNumberOfNodes();
|
nodes[item] = clf->getNumberOfNodes();
|
||||||
edges[item] = clf->getNumberOfEdges();
|
edges[item] = clf->getNumberOfEdges();
|
||||||
num_states[item] = clf->getNumberOfStates();
|
num_states[item] = clf->getNumberOfStates();
|
||||||
train_time[item] = train_timer.getDuration();
|
train_time[item] = train_timer.getDuration();
|
||||||
|
// Score train
|
||||||
auto accuracy_train_value = clf->score(X_train, y_train);
|
auto accuracy_train_value = clf->score(X_train, y_train);
|
||||||
cout << "\b\bc)" << flush;
|
|
||||||
// Test model
|
// Test model
|
||||||
|
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
|
||||||
test_timer.start();
|
test_timer.start();
|
||||||
auto accuracy_test_value = clf->score(X_test, y_test);
|
auto accuracy_test_value = clf->score(X_test, y_test);
|
||||||
cout << "\b\b\b, " << flush;
|
|
||||||
test_time[item] = test_timer.getDuration();
|
test_time[item] = test_timer.getDuration();
|
||||||
accuracy_train[item] = accuracy_train_value;
|
accuracy_train[item] = accuracy_train_value;
|
||||||
accuracy_test[item] = accuracy_test_value;
|
accuracy_test[item] = accuracy_test_value;
|
||||||
|
cout << "\b\b\b, " << flush;
|
||||||
// Store results and times in vector
|
// Store results and times in vector
|
||||||
result.addScoreTrain(accuracy_train_value);
|
result.addScoreTrain(accuracy_train_value);
|
||||||
result.addScoreTest(accuracy_test_value);
|
result.addScoreTest(accuracy_test_value);
|
||||||
|
Loading…
Reference in New Issue
Block a user