diff --git a/.vscode/launch.json b/.vscode/launch.json index cade330..4ac137c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -31,7 +31,9 @@ "--discretize", "--stratified", "-d", - "iris" + "glass", + "--hyperparameters", + "{\"repeatSparent\": true, \"maxModels\": 12}" ], "cwd": "/Users/rmontanana/Code/discretizbench", }, diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesNet/BayesMetrics.cc index 2c08836..1ce46e1 100644 --- a/src/BayesNet/BayesMetrics.cc +++ b/src/BayesNet/BayesMetrics.cc @@ -21,25 +21,39 @@ namespace bayesnet { } samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); } - vector Metrics::SelectKBestWeighted(const torch::Tensor& weights, unsigned k) + vector Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k) { + // Return the K Best features auto n = samples.size(0) - 1; if (k == 0) { k = n; } // compute scores - scoresKBest.reserve(n); + scoresKBest.clear(); + featuresKBest.clear(); auto label = samples.index({ -1, "..." }); for (int i = 0; i < n; ++i) { scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights)); featuresKBest.push_back(i); } // sort & reduce scores and features - sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j) - { return scoresKBest[i] > scoresKBest[j]; }); - sort(scoresKBest.begin(), scoresKBest.end(), std::greater()); - featuresKBest.resize(k); - scoresKBest.resize(k); + if (ascending) { + sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j) + { return scoresKBest[i] < scoresKBest[j]; }); + sort(scoresKBest.begin(), scoresKBest.end(), std::less()); + if (k < n) { + for (int i = 0; i < n - k; ++i) { + featuresKBest.erase(featuresKBest.begin()); + scoresKBest.erase(scoresKBest.begin()); + } + } + } else { + sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j) + { return scoresKBest[i] > scoresKBest[j]; }); + sort(scoresKBest.begin(), scoresKBest.end(), std::greater()); + featuresKBest.resize(k); + scoresKBest.resize(k); + } return featuresKBest; } vector Metrics::getScoresKBest() const diff --git a/src/BayesNet/BayesMetrics.h b/src/BayesNet/BayesMetrics.h index 70d33e9..01841a7 100644 --- a/src/BayesNet/BayesMetrics.h +++ b/src/BayesNet/BayesMetrics.h @@ -21,7 +21,7 @@ namespace bayesnet { Metrics() = default; Metrics(const torch::Tensor& samples, const vector& features, const string& className, const int classNumStates); Metrics(const vector>& vsamples, const vector& labels, const vector& features, const string& className, const int classNumStates); - vector SelectKBestWeighted(const torch::Tensor& weights, unsigned k = 0); + vector SelectKBestWeighted(const torch::Tensor& weights, bool ascending=false, unsigned k = 0); vector getScoresKBest() const; double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights); vector conditionalEdgeWeights(vector& weights); // To use in Python diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc index 1b8681f..80fd20c 100644 --- a/src/BayesNet/BoostAODE.cc +++ b/src/BayesNet/BoostAODE.cc @@ -1,8 +1,9 @@ #include "BoostAODE.h" +#include #include "BayesMetrics.h" namespace bayesnet { - BoostAODE::BoostAODE() : Ensemble(), repeatSparent(false) {} + BoostAODE::BoostAODE() : Ensemble() {} void BoostAODE::buildModel(const torch::Tensor& weights) { // Models shall be built in trainModel @@ -12,40 +13,41 @@ namespace bayesnet { if (hyperparameters.contains("repeatSparent")) { repeatSparent = hyperparameters["repeatSparent"]; } + if (hyperparameters.contains("maxModels")) { + maxModels = hyperparameters["maxModels"]; + } + if (hyperparameters.contains("ascending")) { + ascending = hyperparameters["ascending"]; + } } void BoostAODE::trainModel(const torch::Tensor& weights) { models.clear(); n_models = 0; - int max_models = .1 * n > 10 ? .1 * n : n; + if (maxModels == 0) + maxModels = .1 * n > 10 ? .1 * n : n; Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }); auto y_ = dataset.index({ -1, "..." }); bool exitCondition = false; - vector featuresUsed; + unordered_set featuresUsed; // Step 0: Set the finish condition // if not repeatSparent a finish condition is run out of features - // n_models == max_models + // n_models == maxModels int numClasses = states[className].size(); while (!exitCondition) { // Step 1: Build ranking with mutual information - auto featureSelection = metrics.SelectKBestWeighted(weights_, n); // Get all the features sorted - auto feature = featureSelection[0]; + auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted unique_ptr model; - if (!repeatSparent) { - if (n_models == 0) { - models.resize(n); // Resize for n==nfeatures SPODEs - significanceModels.resize(n); - } + auto feature = featureSelection[0]; + if (!repeatSparent || featuresUsed.size() < featureSelection.size()) { bool found = false; - for (int i = 0; i < featureSelection.size(); ++i) { - if (find(featuresUsed.begin(), featuresUsed.end(), i) != featuresUsed.end()) { + for (auto feat : featureSelection) { + if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) { continue; } found = true; - feature = i; - featuresUsed.push_back(feature); - n_models++; + feature = feat; break; } if (!found) { @@ -53,7 +55,9 @@ namespace bayesnet { continue; } } + featuresUsed.insert(feature); model = std::make_unique(feature); + n_models++; model->fit(dataset, features, className, states, weights_); auto ypred = model->predict(X_); // Step 3.1: Compute the classifier amout of say @@ -68,15 +72,12 @@ namespace bayesnet { double totalWeights = torch::sum(weights_).item(); weights_ = weights_ / totalWeights; // Step 3.4: Store classifier and its accuracy to weigh its future vote - if (!repeatSparent) { - models[feature] = std::move(model); - significanceModels[feature] = significance; - } else { - models.push_back(std::move(model)); - significanceModels.push_back(significance); - n_models++; - } - exitCondition = n_models == max_models; + models.push_back(std::move(model)); + significanceModels.push_back(significance); + exitCondition = n_models == maxModels; + } + if (featuresUsed.size() != features.size()) { + cout << "Warning: BoostAODE did not use all the features" << endl; } weights.copy_(weights_); } diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h index 290b1a2..508086b 100644 --- a/src/BayesNet/BoostAODE.h +++ b/src/BayesNet/BoostAODE.h @@ -13,7 +13,9 @@ namespace bayesnet { void buildModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override; private: - bool repeatSparent; + bool repeatSparent=false; + int maxModels=0; + bool ascending=false; //Process KBest features ascending or descending order }; } #endif \ No newline at end of file