Complete first BoostAODE

This commit is contained in:
2023-08-18 11:50:34 +02:00
parent 704dc937be
commit a6bb22dfb5
9 changed files with 184 additions and 117 deletions

View File

@@ -8,6 +8,8 @@ namespace bayesnet {
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i));
}
n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
}
vector<string> AODE::graph(const string& title) const
{

View File

@@ -23,7 +23,7 @@ namespace bayesnet {
}
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, unsigned k)
{
auto n = samples.size(1);
auto n = samples.size(0) - 1;
if (k == 0) {
k = n;
}

View File

@@ -5,30 +5,79 @@ namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() {}
void BoostAODE::buildModel(const torch::Tensor& weights)
{
models.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i));
}
// models.clear();
// for (int i = 0; i < features.size(); ++i) {
// models.push_back(std::make_unique<SPODE>(i));
// }
// n_models = models.size();
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
// End building vectors
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kDouble);
models.clear();
n_models = 0;
int max_models = .1 * n > 10 ? .1 * n : n;
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
auto featureSelection = metrics.SelectKBestWeighted(weights_, n); // Get all the features sorted
for (int i = 0; i < features.size(); ++i) {
models[i].fit(dataset, features, className, states, weights_);
auto ypred = models[i].predict(X_);
// em = np.sum(weights * (y_pred != self.y_)) / np.sum(weights)
// am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
// # Step 3.2: Update weights for next classifier
// weights = [
// wm * np.exp(am * (ym != yp))
// for wm, ym, yp in zip(weights, self.y_, y_pred)
// ]
// # Step 4: Add the new model
// self.estimators_.append(estimator)
auto y_ = dataset.index({ -1, "..." });
bool exitCondition = false;
bool repeatSparent = true;
vector<int> featuresUsed;
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == max_models
int numClasses = states[className].size();
while (!exitCondition) {
// Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, n); // Get all the features sorted
auto feature = featureSelection[0];
unique_ptr<Classifier> model;
if (!repeatSparent) {
if (n_models == 0) {
models.resize(n); // Resize for n==nfeatures SPODEs
significanceModels.resize(n);
}
bool found = false;
for (int i = 0; i < featureSelection.size(); ++i) {
if (find(featuresUsed.begin(), featuresUsed.end(), i) != featuresUsed.end()) {
continue;
}
found = true;
feature = i;
featuresUsed.push_back(feature);
n_models++;
break;
}
if (!found) {
exitCondition = true;
continue;
}
}
model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
auto ypred = model->predict(X_);
// Step 3.1: Compute the classifier amout of say
auto mask_wrong = ypred != y_;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double wrongWeights = masked_weights.sum().item<double>();
double significance = wrongWeights == 0 ? 1 : 0.5 * log((1 - wrongWeights) / wrongWeights);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights_ += mask_wrong.to(weights_.dtype()) * exp(significance) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
weights_ = weights_ / totalWeights;
// Step 3.4: Store classifier and its accuracy to weigh its future vote
if (!repeatSparent) {
models[feature] = std::move(model);
significanceModels[feature] = significance;
} else {
models.push_back(std::move(model));
significanceModels.push_back(significance);
n_models++;
}
exitCondition = n_models == max_models;
}
weights.copy_(weights_);
}
vector<string> BoostAODE::graph(const string& title) const
{

View File

@@ -18,9 +18,9 @@ namespace bayesnet {
auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final;
for (int i = 0; i < y_pred.size(0); ++i) {
vector<float> votes(y_pred.size(1), 0);
vector<double> votes(y_pred.size(1), 0);
for (int j = 0; j < y_pred.size(1); ++j) {
votes[y_pred_[i][j]] += 1;
votes[y_pred_[i][j]] += significanceModels[j];
}
// argsort in descending order
auto indices = argsort(votes);

View File

@@ -14,6 +14,7 @@ namespace bayesnet {
protected:
unsigned n_models;
vector<unique_ptr<Classifier>> models;
vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override;
vector<int> voting(Tensor& y_pred);
public:

View File

@@ -29,7 +29,7 @@ namespace bayesnet {
// where C is the class.
addNodes();
const Tensor& y = dataset.index({ -1, "..." });
vector <float> mi;
vector<double> mi;
for (auto i = 0; i < features.size(); i++) {
Tensor firstFeature = dataset.index({ i, "..." });
mi.push_back(metrics.mutualInformation(firstFeature, y, weights));

View File

@@ -4,7 +4,7 @@ namespace bayesnet {
using namespace std;
using namespace torch;
// Return the indices in descending order
vector<int> argsort(vector<float>& nums)
vector<int> argsort(vector<double>& nums)
{
int n = nums.size();
vector<int> indices(n);

View File

@@ -5,7 +5,7 @@
namespace bayesnet {
using namespace std;
using namespace torch;
vector<int> argsort(vector<float>& nums);
vector<int> argsort(vector<double>& nums);
vector<vector<int>> tensorToVector(Tensor& tensor);
}
#endif //BAYESNET_UTILS_H