Implement IWSS and FCBF too for BoostAODE
This commit is contained in:
parent
6ef49385ea
commit
b35532dd9e
@ -29,6 +29,13 @@ namespace bayesnet {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
template <class T>
|
||||
T pop_first(vector<T>& v)
|
||||
{
|
||||
T temp = v[0];
|
||||
v.erase(v.begin());
|
||||
return temp;
|
||||
}
|
||||
public:
|
||||
Metrics() = default;
|
||||
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
|
||||
|
@ -6,6 +6,8 @@
|
||||
#include "Folding.h"
|
||||
#include "Paths.h"
|
||||
#include "CFS.h"
|
||||
#include "FCBF.h"
|
||||
#include "IWSS.h"
|
||||
|
||||
namespace bayesnet {
|
||||
BoostAODE::BoostAODE() : Ensemble() {}
|
||||
@ -44,7 +46,7 @@ namespace bayesnet {
|
||||
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
|
||||
{
|
||||
// Check if hyperparameters are valid
|
||||
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "cfs" };
|
||||
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" };
|
||||
checkHyperparameters(validKeys, hyperparameters);
|
||||
if (hyperparameters.contains("repeatSparent")) {
|
||||
repeatSparent = hyperparameters["repeatSparent"];
|
||||
@ -58,29 +60,39 @@ namespace bayesnet {
|
||||
if (hyperparameters.contains("convergence")) {
|
||||
convergence = hyperparameters["convergence"];
|
||||
}
|
||||
if (hyperparameters.contains("cfs")) {
|
||||
cfs = hyperparameters["cfs"];
|
||||
if (hyperparameters.contains("threshold")) {
|
||||
threshold = hyperparameters["threshold"];
|
||||
}
|
||||
if (hyperparameters.contains("select_features")) {
|
||||
auto selectedAlgorithm = hyperparameters["select_features"];
|
||||
vector<string> algos = { "IWSS", "FCBF", "CFS" };
|
||||
selectFeatures = true;
|
||||
algorithm = selectedAlgorithm;
|
||||
if (find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
|
||||
throw invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]");
|
||||
}
|
||||
}
|
||||
}
|
||||
unordered_set<int> BoostAODE::initializeModels()
|
||||
{
|
||||
unordered_set<int> featuresUsed;
|
||||
// Read the CFS features
|
||||
string output = "[", prefix = "";
|
||||
bool first = true;
|
||||
for (const auto& feature : features) {
|
||||
output += prefix + "'" + feature + "'";
|
||||
if (first) {
|
||||
prefix = ", ";
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
output += "]";
|
||||
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
int maxFeatures = 0;
|
||||
auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||
cfs.fit();
|
||||
auto cfsFeatures = cfs.getFeatures();
|
||||
if (algorithm == "CFS") {
|
||||
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||
} else if (algorithm == "IWSS") {
|
||||
if (threshold < 0 || threshold >0.5) {
|
||||
throw invalid_argument("Invalid threshold value for IWSS [0, 0.5]");
|
||||
}
|
||||
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
} else if (algorithm == "FCBF") {
|
||||
if (threshold < 1e-7 || threshold > 1) {
|
||||
throw invalid_argument("Invalid threshold value [1e-7, 1]");
|
||||
}
|
||||
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
}
|
||||
featureSelector->fit();
|
||||
auto cfsFeatures = featureSelector->getFeatures();
|
||||
for (const int& feature : cfsFeatures) {
|
||||
// cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl;
|
||||
featuresUsed.insert(feature);
|
||||
@ -90,12 +102,13 @@ namespace bayesnet {
|
||||
significanceModels.push_back(1.0);
|
||||
n_models++;
|
||||
}
|
||||
delete featureSelector;
|
||||
return featuresUsed;
|
||||
}
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||
{
|
||||
unordered_set<int> featuresUsed;
|
||||
if (cfs) {
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels();
|
||||
}
|
||||
if (maxModels == 0)
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "Ensemble.h"
|
||||
#include <map>
|
||||
#include "SPODE.h"
|
||||
#include "FeatureSelect.h"
|
||||
namespace bayesnet {
|
||||
class BoostAODE : public Ensemble {
|
||||
public:
|
||||
@ -22,7 +23,10 @@ namespace bayesnet {
|
||||
int maxModels = 0;
|
||||
bool ascending = false; //Process KBest features ascending or descending order
|
||||
bool convergence = false; //if true, stop when the model does not improve
|
||||
bool cfs = false; // if true use CFS to select features stored in cfs folder with sha256(features) file_name
|
||||
bool selectFeatures = false; // if true, use feature selection
|
||||
string algorithm = ""; // Selected feature selection algorithm
|
||||
FeatureSelect* featureSelector = nullptr;
|
||||
double threshold = -1;
|
||||
};
|
||||
}
|
||||
#endif
|
@ -2,13 +2,9 @@
|
||||
#include <limits>
|
||||
#include "bayesnetUtils.h"
|
||||
namespace bayesnet {
|
||||
|
||||
|
||||
|
||||
|
||||
void CFS::fit()
|
||||
{
|
||||
selectedFeatures.clear();
|
||||
initialize();
|
||||
computeSuLabels();
|
||||
auto featureOrder = argsort(suLabels); // sort descending order
|
||||
auto continueCondition = true;
|
||||
@ -21,7 +17,8 @@ namespace bayesnet {
|
||||
int bestFeature = -1;
|
||||
for (auto feature : featureOrder) {
|
||||
selectedFeatures.push_back(feature);
|
||||
auto meritNew = computeMeritCFS(); // Compute merit with cfsFeatures
|
||||
// Compute merit with selectedFeatures
|
||||
auto meritNew = computeMeritCFS();
|
||||
if (meritNew > merit) {
|
||||
merit = meritNew;
|
||||
bestFeature = feature;
|
||||
|
@ -5,5 +5,5 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
|
||||
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
|
||||
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
|
||||
Mst.cc Proposal.cc CFS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
|
||||
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
|
||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
44
src/BayesNet/FCBF.cc
Normal file
44
src/BayesNet/FCBF.cc
Normal file
@ -0,0 +1,44 @@
|
||||
#include "bayesnetUtils.h"
|
||||
#include "FCBF.h"
|
||||
namespace bayesnet {
|
||||
|
||||
FCBF::FCBF(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
|
||||
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
|
||||
{
|
||||
if (threshold < 1e-7) {
|
||||
throw std::invalid_argument("Threshold cannot be less than 1e-7");
|
||||
}
|
||||
}
|
||||
void FCBF::fit()
|
||||
{
|
||||
initialize();
|
||||
computeSuLabels();
|
||||
auto featureOrder = argsort(suLabels); // sort descending order
|
||||
auto featureOrderCopy = featureOrder;
|
||||
for (const auto& feature : featureOrder) {
|
||||
// Don't self compare
|
||||
featureOrderCopy.erase(featureOrderCopy.begin());
|
||||
if (suLabels.at(feature) == 0.0) {
|
||||
// The feature has been removed from the list
|
||||
continue;
|
||||
}
|
||||
if (suLabels.at(feature) < threshold) {
|
||||
break;
|
||||
}
|
||||
// Remove redundant features
|
||||
for (const auto& featureCopy : featureOrderCopy) {
|
||||
double value = computeSuFeatures(feature, featureCopy);
|
||||
if (value >= suLabels.at(featureCopy)) {
|
||||
// Remove feature from list
|
||||
suLabels[featureCopy] = 0.0;
|
||||
}
|
||||
}
|
||||
selectedFeatures.push_back(feature);
|
||||
selectedScores.push_back(suLabels[feature]);
|
||||
if (selectedFeatures.size() == maxFeatures) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
fitted = true;
|
||||
}
|
||||
}
|
18
src/BayesNet/FCBF.h
Normal file
18
src/BayesNet/FCBF.h
Normal file
@ -0,0 +1,18 @@
|
||||
#ifndef FCBF_H
|
||||
#define FCBF_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
#include "FeatureSelect.h"
|
||||
using namespace std;
|
||||
namespace bayesnet {
|
||||
class FCBF : public FeatureSelect {
|
||||
public:
|
||||
// dataset is a n+1xm tensor of integers where dataset[-1] is the y vector
|
||||
FCBF(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
|
||||
virtual ~FCBF() {};
|
||||
void fit() override;
|
||||
private:
|
||||
double threshold = -1;
|
||||
};
|
||||
}
|
||||
#endif
|
@ -7,6 +7,11 @@ namespace bayesnet {
|
||||
|
||||
{
|
||||
}
|
||||
void FeatureSelect::initialize()
|
||||
{
|
||||
selectedFeatures.clear();
|
||||
selectedScores.clear();
|
||||
}
|
||||
double FeatureSelect::symmetricalUncertainty(int a, int b)
|
||||
{
|
||||
/*
|
||||
|
@ -14,6 +14,7 @@ namespace bayesnet {
|
||||
vector<int> getFeatures() const;
|
||||
vector<double> getScores() const;
|
||||
protected:
|
||||
void initialize();
|
||||
void computeSuLabels();
|
||||
double computeSuFeatures(const int a, const int b);
|
||||
double symmetricalUncertainty(int a, int b);
|
||||
|
47
src/BayesNet/IWSS.cc
Normal file
47
src/BayesNet/IWSS.cc
Normal file
@ -0,0 +1,47 @@
|
||||
#include "IWSS.h"
|
||||
#include <limits>
|
||||
#include "bayesnetUtils.h"
|
||||
namespace bayesnet {
|
||||
IWSS::IWSS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
|
||||
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
|
||||
{
|
||||
if (threshold < 0 || threshold > .5) {
|
||||
throw std::invalid_argument("Threshold has to be in [0, 0.5]");
|
||||
}
|
||||
}
|
||||
void IWSS::fit()
|
||||
{
|
||||
initialize();
|
||||
computeSuLabels();
|
||||
auto featureOrder = argsort(suLabels); // sort descending order
|
||||
auto featureOrderCopy = featureOrder;
|
||||
// Add first and second features to result
|
||||
// First with its own score
|
||||
auto first_feature = pop_first(featureOrderCopy);
|
||||
selectedFeatures.push_back(first_feature);
|
||||
selectedScores.push_back(suLabels.at(first_feature));
|
||||
// Second with the score of the candidates
|
||||
selectedFeatures.push_back(pop_first(featureOrderCopy));
|
||||
auto merit = computeMeritCFS();
|
||||
selectedScores.push_back(merit);
|
||||
for (const auto feature : featureOrderCopy) {
|
||||
selectedFeatures.push_back(feature);
|
||||
// Compute merit with selectedFeatures
|
||||
auto meritNew = computeMeritCFS();
|
||||
double delta = merit != 0.0 ? abs(merit - meritNew) / merit : 0.0;
|
||||
if (meritNew > merit || delta < threshold) {
|
||||
if (meritNew > merit) {
|
||||
merit = meritNew;
|
||||
}
|
||||
selectedScores.push_back(meritNew);
|
||||
} else {
|
||||
selectedFeatures.pop_back();
|
||||
break;
|
||||
}
|
||||
if (selectedFeatures.size() == maxFeatures) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
fitted = true;
|
||||
}
|
||||
}
|
18
src/BayesNet/IWSS.h
Normal file
18
src/BayesNet/IWSS.h
Normal file
@ -0,0 +1,18 @@
|
||||
#ifndef IWSS_H
|
||||
#define IWSS_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
#include "FeatureSelect.h"
|
||||
using namespace std;
|
||||
namespace bayesnet {
|
||||
class IWSS : public FeatureSelect {
|
||||
public:
|
||||
// dataset is a n+1xm tensor of integers where dataset[-1] is the y vector
|
||||
IWSS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
|
||||
virtual ~IWSS() {};
|
||||
void fit() override;
|
||||
private:
|
||||
double threshold = -1;
|
||||
};
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user