Remove FeatureSel, add SelectKBest to BayesMetrics
This commit is contained in:
@@ -21,6 +21,31 @@ namespace bayesnet {
|
||||
}
|
||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||
}
|
||||
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, unsigned k)
|
||||
{
|
||||
auto n = samples.size(1);
|
||||
if (k == 0) {
|
||||
k = n;
|
||||
}
|
||||
// compute scores
|
||||
scoresKBest.reserve(n);
|
||||
auto label = samples.index({ -1, "..." });
|
||||
for (int i = 0; i < n; ++i) {
|
||||
scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
|
||||
featuresKBest.push_back(i);
|
||||
}
|
||||
// sort & reduce scores and features
|
||||
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
|
||||
{ return scoresKBest[i] > scoresKBest[j]; });
|
||||
sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
|
||||
featuresKBest.resize(k);
|
||||
scoresKBest.resize(k);
|
||||
return featuresKBest;
|
||||
}
|
||||
vector<double> Metrics::getScoresKBest() const
|
||||
{
|
||||
return scoresKBest;
|
||||
}
|
||||
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
|
||||
{
|
||||
vector<pair<string, string>> result;
|
||||
|
@@ -12,6 +12,8 @@ namespace bayesnet {
|
||||
vector<string> features;
|
||||
string className;
|
||||
int classNumStates = 0;
|
||||
vector<double> scoresKBest;
|
||||
vector<int> featuresKBest; // sorted indices of the features
|
||||
double entropy(const Tensor& feature, const Tensor& weights);
|
||||
double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
|
||||
vector<pair<string, string>> doCombinations(const vector<string>&);
|
||||
@@ -19,6 +21,8 @@ namespace bayesnet {
|
||||
Metrics() = default;
|
||||
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
|
||||
Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates);
|
||||
vector<int> SelectKBestWeighted(const torch::Tensor& weights, unsigned k = 0);
|
||||
vector<double> getScoresKBest() const;
|
||||
double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
|
||||
vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python
|
||||
Tensor conditionalEdge(const torch::Tensor& weights);
|
||||
|
@@ -1,36 +1,35 @@
|
||||
#include "BoostAODE.h"
|
||||
#include "FeatureSelect.h"
|
||||
#include "BayesMetrics.h"
|
||||
|
||||
namespace bayesnet {
|
||||
BoostAODE::BoostAODE() : Ensemble() {}
|
||||
void BoostAODE::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
models.clear();
|
||||
int n_samples = dataset.size(1);
|
||||
int n_features = dataset.size(0);
|
||||
features::samples_t vsamples;
|
||||
for (auto i = 0; i < n_samples; ++i) {
|
||||
auto row = dataset.index({ "...", i });
|
||||
// convert row to std::vector<int>
|
||||
auto vrow = vector<int>(row.data_ptr<int>(), row.data_ptr<int>() + row.numel());
|
||||
vsamples.push_back(vrow);
|
||||
}
|
||||
auto vweights = features::weights_t(n_samples, 1.0 / n_samples);
|
||||
auto row = dataset.index({ -1, "..." });
|
||||
auto yv = features::labels_t(row.data_ptr<int>(), row.data_ptr<int>() + row.numel());
|
||||
auto featureSelection = features::SelectKBestWeighted(vsamples, yv, vweights, n_features, true);
|
||||
auto features = featureSelection.fit().getFeatures();
|
||||
// features = (
|
||||
// CSelectKBestWeighted(
|
||||
// self.X_, self.y_, weights, k = self.n_features_in_
|
||||
// )
|
||||
// .fit()
|
||||
// .get_features()
|
||||
auto scores = features::score_t(n_features, 0.0);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
models.push_back(std::make_unique<SPODE>(i));
|
||||
}
|
||||
}
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||
{
|
||||
// End building vectors
|
||||
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kDouble);
|
||||
auto X_ = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, n); // Get all the features sorted
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
models[i].fit(dataset, features, className, states, weights_);
|
||||
auto ypred = models[i].predict(X_);
|
||||
// em = np.sum(weights * (y_pred != self.y_)) / np.sum(weights)
|
||||
// am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
|
||||
// # Step 3.2: Update weights for next classifier
|
||||
// weights = [
|
||||
// wm * np.exp(am * (ym != yp))
|
||||
// for wm, ym, yp in zip(weights, self.y_, y_pred)
|
||||
// ]
|
||||
// # Step 4: Add the new model
|
||||
// self.estimators_.append(estimator)
|
||||
}
|
||||
}
|
||||
vector<string> BoostAODE::graph(const string& title) const
|
||||
{
|
||||
return Ensemble::graph(title);
|
||||
|
@@ -6,6 +6,7 @@ namespace bayesnet {
|
||||
class BoostAODE : public Ensemble {
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
public:
|
||||
BoostAODE();
|
||||
virtual ~BoostAODE() {};
|
||||
|
@@ -1,9 +1,8 @@
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/featureselect)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
|
||||
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
|
||||
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
|
||||
Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
|
||||
target_link_libraries(BayesNet mdlp FeatureSelect "${TORCH_LIBRARIES}")
|
||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
@@ -21,7 +21,6 @@ namespace bayesnet {
|
||||
SPODELd& SPODELd::fit(torch::Tensor& dataset, vector<string>& features_, string className_, map<string, vector<int>>& states_)
|
||||
{
|
||||
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
|
||||
cout << "Xf " << Xf.sizes() << " dtype: " << Xf.dtype() << endl;
|
||||
y = dataset.index({ -1, "..." }).clone();
|
||||
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
|
||||
features = features_;
|
||||
|
Reference in New Issue
Block a user