Implement IWSS and FCBF too for BoostAODE
This commit is contained in:
parent
6ef49385ea
commit
b35532dd9e
@ -29,6 +29,13 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
template <class T>
|
||||||
|
T pop_first(vector<T>& v)
|
||||||
|
{
|
||||||
|
T temp = v[0];
|
||||||
|
v.erase(v.begin());
|
||||||
|
return temp;
|
||||||
|
}
|
||||||
public:
|
public:
|
||||||
Metrics() = default;
|
Metrics() = default;
|
||||||
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
|
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
#include "Folding.h"
|
#include "Folding.h"
|
||||||
#include "Paths.h"
|
#include "Paths.h"
|
||||||
#include "CFS.h"
|
#include "CFS.h"
|
||||||
|
#include "FCBF.h"
|
||||||
|
#include "IWSS.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
BoostAODE::BoostAODE() : Ensemble() {}
|
BoostAODE::BoostAODE() : Ensemble() {}
|
||||||
@ -44,7 +46,7 @@ namespace bayesnet {
|
|||||||
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
|
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
|
||||||
{
|
{
|
||||||
// Check if hyperparameters are valid
|
// Check if hyperparameters are valid
|
||||||
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "cfs" };
|
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" };
|
||||||
checkHyperparameters(validKeys, hyperparameters);
|
checkHyperparameters(validKeys, hyperparameters);
|
||||||
if (hyperparameters.contains("repeatSparent")) {
|
if (hyperparameters.contains("repeatSparent")) {
|
||||||
repeatSparent = hyperparameters["repeatSparent"];
|
repeatSparent = hyperparameters["repeatSparent"];
|
||||||
@ -58,29 +60,39 @@ namespace bayesnet {
|
|||||||
if (hyperparameters.contains("convergence")) {
|
if (hyperparameters.contains("convergence")) {
|
||||||
convergence = hyperparameters["convergence"];
|
convergence = hyperparameters["convergence"];
|
||||||
}
|
}
|
||||||
if (hyperparameters.contains("cfs")) {
|
if (hyperparameters.contains("threshold")) {
|
||||||
cfs = hyperparameters["cfs"];
|
threshold = hyperparameters["threshold"];
|
||||||
|
}
|
||||||
|
if (hyperparameters.contains("select_features")) {
|
||||||
|
auto selectedAlgorithm = hyperparameters["select_features"];
|
||||||
|
vector<string> algos = { "IWSS", "FCBF", "CFS" };
|
||||||
|
selectFeatures = true;
|
||||||
|
algorithm = selectedAlgorithm;
|
||||||
|
if (find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
|
||||||
|
throw invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unordered_set<int> BoostAODE::initializeModels()
|
unordered_set<int> BoostAODE::initializeModels()
|
||||||
{
|
{
|
||||||
unordered_set<int> featuresUsed;
|
unordered_set<int> featuresUsed;
|
||||||
// Read the CFS features
|
|
||||||
string output = "[", prefix = "";
|
|
||||||
bool first = true;
|
|
||||||
for (const auto& feature : features) {
|
|
||||||
output += prefix + "'" + feature + "'";
|
|
||||||
if (first) {
|
|
||||||
prefix = ", ";
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output += "]";
|
|
||||||
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||||
int maxFeatures = 0;
|
int maxFeatures = 0;
|
||||||
auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
if (algorithm == "CFS") {
|
||||||
cfs.fit();
|
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||||
auto cfsFeatures = cfs.getFeatures();
|
} else if (algorithm == "IWSS") {
|
||||||
|
if (threshold < 0 || threshold >0.5) {
|
||||||
|
throw invalid_argument("Invalid threshold value for IWSS [0, 0.5]");
|
||||||
|
}
|
||||||
|
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||||
|
} else if (algorithm == "FCBF") {
|
||||||
|
if (threshold < 1e-7 || threshold > 1) {
|
||||||
|
throw invalid_argument("Invalid threshold value [1e-7, 1]");
|
||||||
|
}
|
||||||
|
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||||
|
}
|
||||||
|
featureSelector->fit();
|
||||||
|
auto cfsFeatures = featureSelector->getFeatures();
|
||||||
for (const int& feature : cfsFeatures) {
|
for (const int& feature : cfsFeatures) {
|
||||||
// cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl;
|
// cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl;
|
||||||
featuresUsed.insert(feature);
|
featuresUsed.insert(feature);
|
||||||
@ -90,12 +102,13 @@ namespace bayesnet {
|
|||||||
significanceModels.push_back(1.0);
|
significanceModels.push_back(1.0);
|
||||||
n_models++;
|
n_models++;
|
||||||
}
|
}
|
||||||
|
delete featureSelector;
|
||||||
return featuresUsed;
|
return featuresUsed;
|
||||||
}
|
}
|
||||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
unordered_set<int> featuresUsed;
|
unordered_set<int> featuresUsed;
|
||||||
if (cfs) {
|
if (selectFeatures) {
|
||||||
featuresUsed = initializeModels();
|
featuresUsed = initializeModels();
|
||||||
}
|
}
|
||||||
if (maxModels == 0)
|
if (maxModels == 0)
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include "Ensemble.h"
|
#include "Ensemble.h"
|
||||||
#include <map>
|
#include <map>
|
||||||
#include "SPODE.h"
|
#include "SPODE.h"
|
||||||
|
#include "FeatureSelect.h"
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
class BoostAODE : public Ensemble {
|
class BoostAODE : public Ensemble {
|
||||||
public:
|
public:
|
||||||
@ -22,7 +23,10 @@ namespace bayesnet {
|
|||||||
int maxModels = 0;
|
int maxModels = 0;
|
||||||
bool ascending = false; //Process KBest features ascending or descending order
|
bool ascending = false; //Process KBest features ascending or descending order
|
||||||
bool convergence = false; //if true, stop when the model does not improve
|
bool convergence = false; //if true, stop when the model does not improve
|
||||||
bool cfs = false; // if true use CFS to select features stored in cfs folder with sha256(features) file_name
|
bool selectFeatures = false; // if true, use feature selection
|
||||||
|
string algorithm = ""; // Selected feature selection algorithm
|
||||||
|
FeatureSelect* featureSelector = nullptr;
|
||||||
|
double threshold = -1;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@ -2,13 +2,9 @@
|
|||||||
#include <limits>
|
#include <limits>
|
||||||
#include "bayesnetUtils.h"
|
#include "bayesnetUtils.h"
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void CFS::fit()
|
void CFS::fit()
|
||||||
{
|
{
|
||||||
selectedFeatures.clear();
|
initialize();
|
||||||
computeSuLabels();
|
computeSuLabels();
|
||||||
auto featureOrder = argsort(suLabels); // sort descending order
|
auto featureOrder = argsort(suLabels); // sort descending order
|
||||||
auto continueCondition = true;
|
auto continueCondition = true;
|
||||||
@ -21,7 +17,8 @@ namespace bayesnet {
|
|||||||
int bestFeature = -1;
|
int bestFeature = -1;
|
||||||
for (auto feature : featureOrder) {
|
for (auto feature : featureOrder) {
|
||||||
selectedFeatures.push_back(feature);
|
selectedFeatures.push_back(feature);
|
||||||
auto meritNew = computeMeritCFS(); // Compute merit with cfsFeatures
|
// Compute merit with selectedFeatures
|
||||||
|
auto meritNew = computeMeritCFS();
|
||||||
if (meritNew > merit) {
|
if (meritNew > merit) {
|
||||||
merit = meritNew;
|
merit = meritNew;
|
||||||
bestFeature = feature;
|
bestFeature = feature;
|
||||||
|
@ -5,5 +5,5 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
|
|||||||
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
|
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
|
||||||
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
|
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
|
||||||
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
|
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
|
||||||
Mst.cc Proposal.cc CFS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
|
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
|
||||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
44
src/BayesNet/FCBF.cc
Normal file
44
src/BayesNet/FCBF.cc
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
#include "bayesnetUtils.h"
|
||||||
|
#include "FCBF.h"
|
||||||
|
namespace bayesnet {
|
||||||
|
|
||||||
|
FCBF::FCBF(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
|
||||||
|
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
|
||||||
|
{
|
||||||
|
if (threshold < 1e-7) {
|
||||||
|
throw std::invalid_argument("Threshold cannot be less than 1e-7");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void FCBF::fit()
|
||||||
|
{
|
||||||
|
initialize();
|
||||||
|
computeSuLabels();
|
||||||
|
auto featureOrder = argsort(suLabels); // sort descending order
|
||||||
|
auto featureOrderCopy = featureOrder;
|
||||||
|
for (const auto& feature : featureOrder) {
|
||||||
|
// Don't self compare
|
||||||
|
featureOrderCopy.erase(featureOrderCopy.begin());
|
||||||
|
if (suLabels.at(feature) == 0.0) {
|
||||||
|
// The feature has been removed from the list
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (suLabels.at(feature) < threshold) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Remove redundant features
|
||||||
|
for (const auto& featureCopy : featureOrderCopy) {
|
||||||
|
double value = computeSuFeatures(feature, featureCopy);
|
||||||
|
if (value >= suLabels.at(featureCopy)) {
|
||||||
|
// Remove feature from list
|
||||||
|
suLabels[featureCopy] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
selectedFeatures.push_back(feature);
|
||||||
|
selectedScores.push_back(suLabels[feature]);
|
||||||
|
if (selectedFeatures.size() == maxFeatures) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fitted = true;
|
||||||
|
}
|
||||||
|
}
|
18
src/BayesNet/FCBF.h
Normal file
18
src/BayesNet/FCBF.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef FCBF_H
|
||||||
|
#define FCBF_H
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include <vector>
|
||||||
|
#include "FeatureSelect.h"
|
||||||
|
using namespace std;
|
||||||
|
namespace bayesnet {
|
||||||
|
class FCBF : public FeatureSelect {
|
||||||
|
public:
|
||||||
|
// dataset is a n+1xm tensor of integers where dataset[-1] is the y vector
|
||||||
|
FCBF(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
|
||||||
|
virtual ~FCBF() {};
|
||||||
|
void fit() override;
|
||||||
|
private:
|
||||||
|
double threshold = -1;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@ -7,6 +7,11 @@ namespace bayesnet {
|
|||||||
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
void FeatureSelect::initialize()
|
||||||
|
{
|
||||||
|
selectedFeatures.clear();
|
||||||
|
selectedScores.clear();
|
||||||
|
}
|
||||||
double FeatureSelect::symmetricalUncertainty(int a, int b)
|
double FeatureSelect::symmetricalUncertainty(int a, int b)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -14,6 +14,7 @@ namespace bayesnet {
|
|||||||
vector<int> getFeatures() const;
|
vector<int> getFeatures() const;
|
||||||
vector<double> getScores() const;
|
vector<double> getScores() const;
|
||||||
protected:
|
protected:
|
||||||
|
void initialize();
|
||||||
void computeSuLabels();
|
void computeSuLabels();
|
||||||
double computeSuFeatures(const int a, const int b);
|
double computeSuFeatures(const int a, const int b);
|
||||||
double symmetricalUncertainty(int a, int b);
|
double symmetricalUncertainty(int a, int b);
|
||||||
|
47
src/BayesNet/IWSS.cc
Normal file
47
src/BayesNet/IWSS.cc
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
#include "IWSS.h"
|
||||||
|
#include <limits>
|
||||||
|
#include "bayesnetUtils.h"
|
||||||
|
namespace bayesnet {
|
||||||
|
IWSS::IWSS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
|
||||||
|
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
|
||||||
|
{
|
||||||
|
if (threshold < 0 || threshold > .5) {
|
||||||
|
throw std::invalid_argument("Threshold has to be in [0, 0.5]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void IWSS::fit()
|
||||||
|
{
|
||||||
|
initialize();
|
||||||
|
computeSuLabels();
|
||||||
|
auto featureOrder = argsort(suLabels); // sort descending order
|
||||||
|
auto featureOrderCopy = featureOrder;
|
||||||
|
// Add first and second features to result
|
||||||
|
// First with its own score
|
||||||
|
auto first_feature = pop_first(featureOrderCopy);
|
||||||
|
selectedFeatures.push_back(first_feature);
|
||||||
|
selectedScores.push_back(suLabels.at(first_feature));
|
||||||
|
// Second with the score of the candidates
|
||||||
|
selectedFeatures.push_back(pop_first(featureOrderCopy));
|
||||||
|
auto merit = computeMeritCFS();
|
||||||
|
selectedScores.push_back(merit);
|
||||||
|
for (const auto feature : featureOrderCopy) {
|
||||||
|
selectedFeatures.push_back(feature);
|
||||||
|
// Compute merit with selectedFeatures
|
||||||
|
auto meritNew = computeMeritCFS();
|
||||||
|
double delta = merit != 0.0 ? abs(merit - meritNew) / merit : 0.0;
|
||||||
|
if (meritNew > merit || delta < threshold) {
|
||||||
|
if (meritNew > merit) {
|
||||||
|
merit = meritNew;
|
||||||
|
}
|
||||||
|
selectedScores.push_back(meritNew);
|
||||||
|
} else {
|
||||||
|
selectedFeatures.pop_back();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (selectedFeatures.size() == maxFeatures) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fitted = true;
|
||||||
|
}
|
||||||
|
}
|
18
src/BayesNet/IWSS.h
Normal file
18
src/BayesNet/IWSS.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef IWSS_H
|
||||||
|
#define IWSS_H
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include <vector>
|
||||||
|
#include "FeatureSelect.h"
|
||||||
|
using namespace std;
|
||||||
|
namespace bayesnet {
|
||||||
|
class IWSS : public FeatureSelect {
|
||||||
|
public:
|
||||||
|
// dataset is a n+1xm tensor of integers where dataset[-1] is the y vector
|
||||||
|
IWSS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
|
||||||
|
virtual ~IWSS() {};
|
||||||
|
void fit() override;
|
||||||
|
private:
|
||||||
|
double threshold = -1;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
Loading…
Reference in New Issue
Block a user