Complete first BoostAODE
This commit is contained in:
parent
704dc937be
commit
a6bb22dfb5
199
sample/sample.cc
199
sample/sample.cc
@ -141,96 +141,111 @@ int main(int argc, char** argv)
|
|||||||
/*
|
/*
|
||||||
* Begin Processing
|
* Begin Processing
|
||||||
*/
|
*/
|
||||||
auto handler = ArffFiles();
|
auto ypred = torch::tensor({ 1,2,3,2,2,3,4,5,2,1 });
|
||||||
handler.load(complete_file_name, class_last);
|
auto y = torch::tensor({ 0,0,0,0,2,3,4,0,0,0 });
|
||||||
// Get Dataset X, y
|
auto weights = torch::ones({ 10 }, kDouble);
|
||||||
vector<mdlp::samples_t>& X = handler.getX();
|
auto mask = ypred == y;
|
||||||
mdlp::labels_t& y = handler.getY();
|
cout << "ypred:" << ypred << endl;
|
||||||
// Get className & Features
|
cout << "y:" << y << endl;
|
||||||
auto className = handler.getClassName();
|
cout << "weights:" << weights << endl;
|
||||||
vector<string> features;
|
cout << "mask:" << mask << endl;
|
||||||
auto attributes = handler.getAttributes();
|
double value_to_add = 0.5;
|
||||||
transform(attributes.begin(), attributes.end(), back_inserter(features),
|
weights += mask.to(torch::kDouble) * value_to_add;
|
||||||
[](const pair<string, string>& item) { return item.first; });
|
cout << "New weights:" << weights << endl;
|
||||||
// Discretize Dataset
|
auto masked_weights = weights * mask.to(weights.dtype());
|
||||||
auto [Xd, maxes] = discretize(X, y, features);
|
double sum_of_weights = masked_weights.sum().item<double>();
|
||||||
maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
cout << "Sum of weights: " << sum_of_weights << endl;
|
||||||
map<string, vector<int>> states;
|
//weights.index_put_({ mask }, weights + 10);
|
||||||
for (auto feature : features) {
|
// auto handler = ArffFiles();
|
||||||
states[feature] = vector<int>(maxes[feature]);
|
// handler.load(complete_file_name, class_last);
|
||||||
}
|
// // Get Dataset X, y
|
||||||
states[className] = vector<int>(maxes[className]);
|
// vector<mdlp::samples_t>& X = handler.getX();
|
||||||
auto clf = platform::Models::instance()->create(model_name);
|
// mdlp::labels_t& y = handler.getY();
|
||||||
clf->fit(Xd, y, features, className, states);
|
// // Get className & Features
|
||||||
if (dump_cpt) {
|
// auto className = handler.getClassName();
|
||||||
cout << "--- CPT Tables ---" << endl;
|
// vector<string> features;
|
||||||
clf->dump_cpt();
|
// auto attributes = handler.getAttributes();
|
||||||
}
|
// transform(attributes.begin(), attributes.end(), back_inserter(features),
|
||||||
auto lines = clf->show();
|
// [](const pair<string, string>& item) { return item.first; });
|
||||||
for (auto line : lines) {
|
// // Discretize Dataset
|
||||||
cout << line << endl;
|
// auto [Xd, maxes] = discretize(X, y, features);
|
||||||
}
|
// maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
||||||
cout << "--- Topological Order ---" << endl;
|
// map<string, vector<int>> states;
|
||||||
auto order = clf->topological_order();
|
// for (auto feature : features) {
|
||||||
for (auto name : order) {
|
// states[feature] = vector<int>(maxes[feature]);
|
||||||
cout << name << ", ";
|
// }
|
||||||
}
|
// states[className] = vector<int>(maxes[className]);
|
||||||
cout << "end." << endl;
|
// auto clf = platform::Models::instance()->create(model_name);
|
||||||
auto score = clf->score(Xd, y);
|
// clf->fit(Xd, y, features, className, states);
|
||||||
cout << "Score: " << score << endl;
|
// if (dump_cpt) {
|
||||||
auto graph = clf->graph();
|
// cout << "--- CPT Tables ---" << endl;
|
||||||
auto dot_file = model_name + "_" + file_name;
|
// clf->dump_cpt();
|
||||||
ofstream file(dot_file + ".dot");
|
// }
|
||||||
file << graph;
|
// auto lines = clf->show();
|
||||||
file.close();
|
// for (auto line : lines) {
|
||||||
cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
|
// cout << line << endl;
|
||||||
cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
|
// }
|
||||||
string stratified_string = stratified ? " Stratified" : "";
|
// cout << "--- Topological Order ---" << endl;
|
||||||
cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
|
// auto order = clf->topological_order();
|
||||||
cout << "==========================================" << endl;
|
// for (auto name : order) {
|
||||||
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
// cout << name << ", ";
|
||||||
torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
// }
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
// cout << "end." << endl;
|
||||||
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
// auto score = clf->score(Xd, y);
|
||||||
}
|
// cout << "Score: " << score << endl;
|
||||||
float total_score = 0, total_score_train = 0, score_train, score_test;
|
// auto graph = clf->graph();
|
||||||
Fold* fold;
|
// auto dot_file = model_name + "_" + file_name;
|
||||||
if (stratified)
|
// ofstream file(dot_file + ".dot");
|
||||||
fold = new StratifiedKFold(nFolds, y, seed);
|
// file << graph;
|
||||||
else
|
// file.close();
|
||||||
fold = new KFold(nFolds, y.size(), seed);
|
// cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
|
||||||
for (auto i = 0; i < nFolds; ++i) {
|
// cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
|
||||||
auto [train, test] = fold->getFold(i);
|
// string stratified_string = stratified ? " Stratified" : "";
|
||||||
cout << "Fold: " << i + 1 << endl;
|
// cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
|
||||||
if (tensors) {
|
// cout << "==========================================" << endl;
|
||||||
auto ttrain = torch::tensor(train, torch::kInt64);
|
// torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
||||||
auto ttest = torch::tensor(test, torch::kInt64);
|
// torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
||||||
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
// for (int i = 0; i < features.size(); ++i) {
|
||||||
torch::Tensor ytraint = yt.index({ ttrain });
|
// Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||||
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
// }
|
||||||
torch::Tensor ytestt = yt.index({ ttest });
|
// float total_score = 0, total_score_train = 0, score_train, score_test;
|
||||||
clf->fit(Xtraint, ytraint, features, className, states);
|
// Fold* fold;
|
||||||
auto temp = clf->predict(Xtraint);
|
// if (stratified)
|
||||||
score_train = clf->score(Xtraint, ytraint);
|
// fold = new StratifiedKFold(nFolds, y, seed);
|
||||||
score_test = clf->score(Xtestt, ytestt);
|
// else
|
||||||
} else {
|
// fold = new KFold(nFolds, y.size(), seed);
|
||||||
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
// for (auto i = 0; i < nFolds; ++i) {
|
||||||
auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
// auto [train, test] = fold->getFold(i);
|
||||||
clf->fit(Xtrain, ytrain, features, className, states);
|
// cout << "Fold: " << i + 1 << endl;
|
||||||
score_train = clf->score(Xtrain, ytrain);
|
// if (tensors) {
|
||||||
score_test = clf->score(Xtest, ytest);
|
// auto ttrain = torch::tensor(train, torch::kInt64);
|
||||||
}
|
// auto ttest = torch::tensor(test, torch::kInt64);
|
||||||
if (dump_cpt) {
|
// torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
||||||
cout << "--- CPT Tables ---" << endl;
|
// torch::Tensor ytraint = yt.index({ ttrain });
|
||||||
clf->dump_cpt();
|
// torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
||||||
}
|
// torch::Tensor ytestt = yt.index({ ttest });
|
||||||
total_score_train += score_train;
|
// clf->fit(Xtraint, ytraint, features, className, states);
|
||||||
total_score += score_test;
|
// auto temp = clf->predict(Xtraint);
|
||||||
cout << "Score Train: " << score_train << endl;
|
// score_train = clf->score(Xtraint, ytraint);
|
||||||
cout << "Score Test : " << score_test << endl;
|
// score_test = clf->score(Xtestt, ytestt);
|
||||||
cout << "-------------------------------------------------------------------------------" << endl;
|
// } else {
|
||||||
}
|
// auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
||||||
cout << "**********************************************************************************" << endl;
|
// auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
||||||
cout << "Average Score Train: " << total_score_train / nFolds << endl;
|
// clf->fit(Xtrain, ytrain, features, className, states);
|
||||||
cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
|
// score_train = clf->score(Xtrain, ytrain);
|
||||||
|
// score_test = clf->score(Xtest, ytest);
|
||||||
|
// }
|
||||||
|
// if (dump_cpt) {
|
||||||
|
// cout << "--- CPT Tables ---" << endl;
|
||||||
|
// clf->dump_cpt();
|
||||||
|
// }
|
||||||
|
// total_score_train += score_train;
|
||||||
|
// total_score += score_test;
|
||||||
|
// cout << "Score Train: " << score_train << endl;
|
||||||
|
// cout << "Score Test : " << score_test << endl;
|
||||||
|
// cout << "-------------------------------------------------------------------------------" << endl;
|
||||||
|
// }
|
||||||
|
// cout << "**********************************************************************************" << endl;
|
||||||
|
// cout << "Average Score Train: " << total_score_train / nFolds << endl;
|
||||||
|
// cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
|
||||||
}
|
}
|
@ -8,6 +8,8 @@ namespace bayesnet {
|
|||||||
for (int i = 0; i < features.size(); ++i) {
|
for (int i = 0; i < features.size(); ++i) {
|
||||||
models.push_back(std::make_unique<SPODE>(i));
|
models.push_back(std::make_unique<SPODE>(i));
|
||||||
}
|
}
|
||||||
|
n_models = models.size();
|
||||||
|
significanceModels = vector<double>(n_models, 1.0);
|
||||||
}
|
}
|
||||||
vector<string> AODE::graph(const string& title) const
|
vector<string> AODE::graph(const string& title) const
|
||||||
{
|
{
|
||||||
|
@ -23,7 +23,7 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, unsigned k)
|
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, unsigned k)
|
||||||
{
|
{
|
||||||
auto n = samples.size(1);
|
auto n = samples.size(0) - 1;
|
||||||
if (k == 0) {
|
if (k == 0) {
|
||||||
k = n;
|
k = n;
|
||||||
}
|
}
|
||||||
|
@ -5,30 +5,79 @@ namespace bayesnet {
|
|||||||
BoostAODE::BoostAODE() : Ensemble() {}
|
BoostAODE::BoostAODE() : Ensemble() {}
|
||||||
void BoostAODE::buildModel(const torch::Tensor& weights)
|
void BoostAODE::buildModel(const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
models.clear();
|
// models.clear();
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
// for (int i = 0; i < features.size(); ++i) {
|
||||||
models.push_back(std::make_unique<SPODE>(i));
|
// models.push_back(std::make_unique<SPODE>(i));
|
||||||
}
|
// }
|
||||||
|
// n_models = models.size();
|
||||||
}
|
}
|
||||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
// End building vectors
|
models.clear();
|
||||||
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kDouble);
|
n_models = 0;
|
||||||
|
int max_models = .1 * n > 10 ? .1 * n : n;
|
||||||
|
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||||
auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
||||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, n); // Get all the features sorted
|
auto y_ = dataset.index({ -1, "..." });
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
bool exitCondition = false;
|
||||||
models[i].fit(dataset, features, className, states, weights_);
|
bool repeatSparent = true;
|
||||||
auto ypred = models[i].predict(X_);
|
vector<int> featuresUsed;
|
||||||
// em = np.sum(weights * (y_pred != self.y_)) / np.sum(weights)
|
// Step 0: Set the finish condition
|
||||||
// am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
|
// if not repeatSparent a finish condition is run out of features
|
||||||
// # Step 3.2: Update weights for next classifier
|
// n_models == max_models
|
||||||
// weights = [
|
int numClasses = states[className].size();
|
||||||
// wm * np.exp(am * (ym != yp))
|
while (!exitCondition) {
|
||||||
// for wm, ym, yp in zip(weights, self.y_, y_pred)
|
// Step 1: Build ranking with mutual information
|
||||||
// ]
|
auto featureSelection = metrics.SelectKBestWeighted(weights_, n); // Get all the features sorted
|
||||||
// # Step 4: Add the new model
|
auto feature = featureSelection[0];
|
||||||
// self.estimators_.append(estimator)
|
unique_ptr<Classifier> model;
|
||||||
|
if (!repeatSparent) {
|
||||||
|
if (n_models == 0) {
|
||||||
|
models.resize(n); // Resize for n==nfeatures SPODEs
|
||||||
|
significanceModels.resize(n);
|
||||||
|
}
|
||||||
|
bool found = false;
|
||||||
|
for (int i = 0; i < featureSelection.size(); ++i) {
|
||||||
|
if (find(featuresUsed.begin(), featuresUsed.end(), i) != featuresUsed.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
found = true;
|
||||||
|
feature = i;
|
||||||
|
featuresUsed.push_back(feature);
|
||||||
|
n_models++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
exitCondition = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
model = std::make_unique<SPODE>(feature);
|
||||||
|
model->fit(dataset, features, className, states, weights_);
|
||||||
|
auto ypred = model->predict(X_);
|
||||||
|
// Step 3.1: Compute the classifier amout of say
|
||||||
|
auto mask_wrong = ypred != y_;
|
||||||
|
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
|
||||||
|
double wrongWeights = masked_weights.sum().item<double>();
|
||||||
|
double significance = wrongWeights == 0 ? 1 : 0.5 * log((1 - wrongWeights) / wrongWeights);
|
||||||
|
// Step 3.2: Update weights for next classifier
|
||||||
|
// Step 3.2.1: Update weights of wrong samples
|
||||||
|
weights_ += mask_wrong.to(weights_.dtype()) * exp(significance) * weights_;
|
||||||
|
// Step 3.3: Normalise the weights
|
||||||
|
double totalWeights = torch::sum(weights_).item<double>();
|
||||||
|
weights_ = weights_ / totalWeights;
|
||||||
|
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||||
|
if (!repeatSparent) {
|
||||||
|
models[feature] = std::move(model);
|
||||||
|
significanceModels[feature] = significance;
|
||||||
|
} else {
|
||||||
|
models.push_back(std::move(model));
|
||||||
|
significanceModels.push_back(significance);
|
||||||
|
n_models++;
|
||||||
|
}
|
||||||
|
exitCondition = n_models == max_models;
|
||||||
}
|
}
|
||||||
|
weights.copy_(weights_);
|
||||||
}
|
}
|
||||||
vector<string> BoostAODE::graph(const string& title) const
|
vector<string> BoostAODE::graph(const string& title) const
|
||||||
{
|
{
|
||||||
|
@ -18,9 +18,9 @@ namespace bayesnet {
|
|||||||
auto y_pred_ = y_pred.accessor<int, 2>();
|
auto y_pred_ = y_pred.accessor<int, 2>();
|
||||||
vector<int> y_pred_final;
|
vector<int> y_pred_final;
|
||||||
for (int i = 0; i < y_pred.size(0); ++i) {
|
for (int i = 0; i < y_pred.size(0); ++i) {
|
||||||
vector<float> votes(y_pred.size(1), 0);
|
vector<double> votes(y_pred.size(1), 0);
|
||||||
for (int j = 0; j < y_pred.size(1); ++j) {
|
for (int j = 0; j < y_pred.size(1); ++j) {
|
||||||
votes[y_pred_[i][j]] += 1;
|
votes[y_pred_[i][j]] += significanceModels[j];
|
||||||
}
|
}
|
||||||
// argsort in descending order
|
// argsort in descending order
|
||||||
auto indices = argsort(votes);
|
auto indices = argsort(votes);
|
||||||
|
@ -14,6 +14,7 @@ namespace bayesnet {
|
|||||||
protected:
|
protected:
|
||||||
unsigned n_models;
|
unsigned n_models;
|
||||||
vector<unique_ptr<Classifier>> models;
|
vector<unique_ptr<Classifier>> models;
|
||||||
|
vector<double> significanceModels;
|
||||||
void trainModel(const torch::Tensor& weights) override;
|
void trainModel(const torch::Tensor& weights) override;
|
||||||
vector<int> voting(Tensor& y_pred);
|
vector<int> voting(Tensor& y_pred);
|
||||||
public:
|
public:
|
||||||
|
@ -29,7 +29,7 @@ namespace bayesnet {
|
|||||||
// where C is the class.
|
// where C is the class.
|
||||||
addNodes();
|
addNodes();
|
||||||
const Tensor& y = dataset.index({ -1, "..." });
|
const Tensor& y = dataset.index({ -1, "..." });
|
||||||
vector <float> mi;
|
vector<double> mi;
|
||||||
for (auto i = 0; i < features.size(); i++) {
|
for (auto i = 0; i < features.size(); i++) {
|
||||||
Tensor firstFeature = dataset.index({ i, "..." });
|
Tensor firstFeature = dataset.index({ i, "..." });
|
||||||
mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
|
mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
|
||||||
|
@ -4,7 +4,7 @@ namespace bayesnet {
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace torch;
|
using namespace torch;
|
||||||
// Return the indices in descending order
|
// Return the indices in descending order
|
||||||
vector<int> argsort(vector<float>& nums)
|
vector<int> argsort(vector<double>& nums)
|
||||||
{
|
{
|
||||||
int n = nums.size();
|
int n = nums.size();
|
||||||
vector<int> indices(n);
|
vector<int> indices(n);
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace torch;
|
using namespace torch;
|
||||||
vector<int> argsort(vector<float>& nums);
|
vector<int> argsort(vector<double>& nums);
|
||||||
vector<vector<int>> tensorToVector(Tensor& tensor);
|
vector<vector<int>> tensorToVector(Tensor& tensor);
|
||||||
}
|
}
|
||||||
#endif //BAYESNET_UTILS_H
|
#endif //BAYESNET_UTILS_H
|
Loading…
Reference in New Issue
Block a user