Add hyperparameters and processing order to Boost
This commit is contained in:
parent
4964aab722
commit
6b317accf1
4
.vscode/launch.json
vendored
4
.vscode/launch.json
vendored
@ -31,7 +31,9 @@
|
|||||||
"--discretize",
|
"--discretize",
|
||||||
"--stratified",
|
"--stratified",
|
||||||
"-d",
|
"-d",
|
||||||
"iris"
|
"glass",
|
||||||
|
"--hyperparameters",
|
||||||
|
"{\"repeatSparent\": true, \"maxModels\": 12}"
|
||||||
],
|
],
|
||||||
"cwd": "/Users/rmontanana/Code/discretizbench",
|
"cwd": "/Users/rmontanana/Code/discretizbench",
|
||||||
},
|
},
|
||||||
|
@ -21,25 +21,39 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||||
}
|
}
|
||||||
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, unsigned k)
|
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
|
||||||
{
|
{
|
||||||
|
// Return the K Best features
|
||||||
auto n = samples.size(0) - 1;
|
auto n = samples.size(0) - 1;
|
||||||
if (k == 0) {
|
if (k == 0) {
|
||||||
k = n;
|
k = n;
|
||||||
}
|
}
|
||||||
// compute scores
|
// compute scores
|
||||||
scoresKBest.reserve(n);
|
scoresKBest.clear();
|
||||||
|
featuresKBest.clear();
|
||||||
auto label = samples.index({ -1, "..." });
|
auto label = samples.index({ -1, "..." });
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
|
scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
|
||||||
featuresKBest.push_back(i);
|
featuresKBest.push_back(i);
|
||||||
}
|
}
|
||||||
// sort & reduce scores and features
|
// sort & reduce scores and features
|
||||||
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
|
if (ascending) {
|
||||||
{ return scoresKBest[i] > scoresKBest[j]; });
|
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
|
||||||
sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
|
{ return scoresKBest[i] < scoresKBest[j]; });
|
||||||
featuresKBest.resize(k);
|
sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
|
||||||
scoresKBest.resize(k);
|
if (k < n) {
|
||||||
|
for (int i = 0; i < n - k; ++i) {
|
||||||
|
featuresKBest.erase(featuresKBest.begin());
|
||||||
|
scoresKBest.erase(scoresKBest.begin());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
|
||||||
|
{ return scoresKBest[i] > scoresKBest[j]; });
|
||||||
|
sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
|
||||||
|
featuresKBest.resize(k);
|
||||||
|
scoresKBest.resize(k);
|
||||||
|
}
|
||||||
return featuresKBest;
|
return featuresKBest;
|
||||||
}
|
}
|
||||||
vector<double> Metrics::getScoresKBest() const
|
vector<double> Metrics::getScoresKBest() const
|
||||||
|
@ -21,7 +21,7 @@ namespace bayesnet {
|
|||||||
Metrics() = default;
|
Metrics() = default;
|
||||||
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
|
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
|
||||||
Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates);
|
Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates);
|
||||||
vector<int> SelectKBestWeighted(const torch::Tensor& weights, unsigned k = 0);
|
vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending=false, unsigned k = 0);
|
||||||
vector<double> getScoresKBest() const;
|
vector<double> getScoresKBest() const;
|
||||||
double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
|
double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
|
||||||
vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python
|
vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
#include "BoostAODE.h"
|
#include "BoostAODE.h"
|
||||||
|
#include <set>
|
||||||
#include "BayesMetrics.h"
|
#include "BayesMetrics.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
BoostAODE::BoostAODE() : Ensemble(), repeatSparent(false) {}
|
BoostAODE::BoostAODE() : Ensemble() {}
|
||||||
void BoostAODE::buildModel(const torch::Tensor& weights)
|
void BoostAODE::buildModel(const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
// Models shall be built in trainModel
|
// Models shall be built in trainModel
|
||||||
@ -12,40 +13,41 @@ namespace bayesnet {
|
|||||||
if (hyperparameters.contains("repeatSparent")) {
|
if (hyperparameters.contains("repeatSparent")) {
|
||||||
repeatSparent = hyperparameters["repeatSparent"];
|
repeatSparent = hyperparameters["repeatSparent"];
|
||||||
}
|
}
|
||||||
|
if (hyperparameters.contains("maxModels")) {
|
||||||
|
maxModels = hyperparameters["maxModels"];
|
||||||
|
}
|
||||||
|
if (hyperparameters.contains("ascending")) {
|
||||||
|
ascending = hyperparameters["ascending"];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
models.clear();
|
models.clear();
|
||||||
n_models = 0;
|
n_models = 0;
|
||||||
int max_models = .1 * n > 10 ? .1 * n : n;
|
if (maxModels == 0)
|
||||||
|
maxModels = .1 * n > 10 ? .1 * n : n;
|
||||||
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||||
auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
||||||
auto y_ = dataset.index({ -1, "..." });
|
auto y_ = dataset.index({ -1, "..." });
|
||||||
bool exitCondition = false;
|
bool exitCondition = false;
|
||||||
vector<int> featuresUsed;
|
unordered_set<int> featuresUsed;
|
||||||
// Step 0: Set the finish condition
|
// Step 0: Set the finish condition
|
||||||
// if not repeatSparent a finish condition is run out of features
|
// if not repeatSparent a finish condition is run out of features
|
||||||
// n_models == max_models
|
// n_models == maxModels
|
||||||
int numClasses = states[className].size();
|
int numClasses = states[className].size();
|
||||||
while (!exitCondition) {
|
while (!exitCondition) {
|
||||||
// Step 1: Build ranking with mutual information
|
// Step 1: Build ranking with mutual information
|
||||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, n); // Get all the features sorted
|
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||||
auto feature = featureSelection[0];
|
|
||||||
unique_ptr<Classifier> model;
|
unique_ptr<Classifier> model;
|
||||||
if (!repeatSparent) {
|
auto feature = featureSelection[0];
|
||||||
if (n_models == 0) {
|
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
|
||||||
models.resize(n); // Resize for n==nfeatures SPODEs
|
|
||||||
significanceModels.resize(n);
|
|
||||||
}
|
|
||||||
bool found = false;
|
bool found = false;
|
||||||
for (int i = 0; i < featureSelection.size(); ++i) {
|
for (auto feat : featureSelection) {
|
||||||
if (find(featuresUsed.begin(), featuresUsed.end(), i) != featuresUsed.end()) {
|
if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
found = true;
|
found = true;
|
||||||
feature = i;
|
feature = feat;
|
||||||
featuresUsed.push_back(feature);
|
|
||||||
n_models++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
@ -53,7 +55,9 @@ namespace bayesnet {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
featuresUsed.insert(feature);
|
||||||
model = std::make_unique<SPODE>(feature);
|
model = std::make_unique<SPODE>(feature);
|
||||||
|
n_models++;
|
||||||
model->fit(dataset, features, className, states, weights_);
|
model->fit(dataset, features, className, states, weights_);
|
||||||
auto ypred = model->predict(X_);
|
auto ypred = model->predict(X_);
|
||||||
// Step 3.1: Compute the classifier amout of say
|
// Step 3.1: Compute the classifier amout of say
|
||||||
@ -68,15 +72,12 @@ namespace bayesnet {
|
|||||||
double totalWeights = torch::sum(weights_).item<double>();
|
double totalWeights = torch::sum(weights_).item<double>();
|
||||||
weights_ = weights_ / totalWeights;
|
weights_ = weights_ / totalWeights;
|
||||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||||
if (!repeatSparent) {
|
models.push_back(std::move(model));
|
||||||
models[feature] = std::move(model);
|
significanceModels.push_back(significance);
|
||||||
significanceModels[feature] = significance;
|
exitCondition = n_models == maxModels;
|
||||||
} else {
|
}
|
||||||
models.push_back(std::move(model));
|
if (featuresUsed.size() != features.size()) {
|
||||||
significanceModels.push_back(significance);
|
cout << "Warning: BoostAODE did not use all the features" << endl;
|
||||||
n_models++;
|
|
||||||
}
|
|
||||||
exitCondition = n_models == max_models;
|
|
||||||
}
|
}
|
||||||
weights.copy_(weights_);
|
weights.copy_(weights_);
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,9 @@ namespace bayesnet {
|
|||||||
void buildModel(const torch::Tensor& weights) override;
|
void buildModel(const torch::Tensor& weights) override;
|
||||||
void trainModel(const torch::Tensor& weights) override;
|
void trainModel(const torch::Tensor& weights) override;
|
||||||
private:
|
private:
|
||||||
bool repeatSparent;
|
bool repeatSparent=false;
|
||||||
|
int maxModels=0;
|
||||||
|
bool ascending=false; //Process KBest features ascending or descending order
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
Loading…
Reference in New Issue
Block a user