Prepare BoostAODE first try
This commit is contained in:
parent
5022a4dc90
commit
54b8939f35
@ -45,7 +45,6 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
# CMakes modules
|
||||
# --------------
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
|
||||
find_package(OpenSSL REQUIRED)
|
||||
include(AddGitSubmodule)
|
||||
if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
|
@ -112,11 +112,6 @@ namespace bayesnet {
|
||||
torch::Tensor counts = feature.bincount(weights);
|
||||
double totalWeight = counts.sum().item<double>();
|
||||
torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
|
||||
// cout << "Probs: ";
|
||||
// for (int i = 0; i < probs.size(0); ++i) {
|
||||
// cout << probs[i].item<double>() << ", ";
|
||||
// }
|
||||
// cout << endl;
|
||||
torch::Tensor logProbs = torch::log(probs);
|
||||
torch::Tensor entropy = -probs * logProbs;
|
||||
return entropy.nansum().item<double>();
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include "Colors.h"
|
||||
#include "Folding.h"
|
||||
#include "Paths.h"
|
||||
#include <openssl/evp.h>
|
||||
#include "CFS.h"
|
||||
|
||||
namespace bayesnet {
|
||||
@ -63,27 +62,6 @@ namespace bayesnet {
|
||||
cfs = hyperparameters["cfs"];
|
||||
}
|
||||
}
|
||||
string sha256(const string& input)
|
||||
{
|
||||
EVP_MD_CTX* mdctx;
|
||||
const EVP_MD* md;
|
||||
unsigned char hash[EVP_MAX_MD_SIZE];
|
||||
unsigned int hash_len;
|
||||
|
||||
OpenSSL_add_all_digests();
|
||||
md = EVP_get_digestbyname("sha256");
|
||||
mdctx = EVP_MD_CTX_new();
|
||||
EVP_DigestInit_ex(mdctx, md, nullptr);
|
||||
EVP_DigestUpdate(mdctx, input.c_str(), input.size());
|
||||
EVP_DigestFinal_ex(mdctx, hash, &hash_len);
|
||||
EVP_MD_CTX_free(mdctx);
|
||||
stringstream oss;
|
||||
for (unsigned int i = 0; i < hash_len; i++) {
|
||||
oss << hex << setfill('0') << setw(2) << (int)hash[i];
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
unordered_set<int> BoostAODE::initializeModels()
|
||||
{
|
||||
unordered_set<int> featuresUsed;
|
||||
@ -101,26 +79,16 @@ namespace bayesnet {
|
||||
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
int maxFeatures = 0;
|
||||
auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||
// std::size_t str_hash = std::hash<std::string>{}(output);
|
||||
string str_hash = sha256(output);
|
||||
stringstream oss;
|
||||
oss << platform::Paths::cfs() << str_hash << ".json";
|
||||
string name = oss.str();
|
||||
ifstream file(name);
|
||||
if (file.is_open()) {
|
||||
nlohmann::json cfsFeatures = nlohmann::json::parse(file);
|
||||
file.close();
|
||||
for (const int& feature : cfsFeatures) {
|
||||
// cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl;
|
||||
featuresUsed.insert(feature);
|
||||
unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0);
|
||||
n_models++;
|
||||
}
|
||||
} else {
|
||||
throw runtime_error("File " + name + " not found");
|
||||
cfs.fit();
|
||||
auto cfsFeatures = cfs.getFeatures();
|
||||
for (const int& feature : cfsFeatures) {
|
||||
// cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl;
|
||||
featuresUsed.insert(feature);
|
||||
unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0);
|
||||
n_models++;
|
||||
}
|
||||
return featuresUsed;
|
||||
}
|
||||
|
@ -18,21 +18,16 @@ namespace bayesnet {
|
||||
auto x = samples.index({ a, "..." });
|
||||
auto y = samples.index({ b, "..." });
|
||||
auto mu = mutualInformation(x, y, weights);
|
||||
// cout << "Mutual Information: (" << a << ", " << b << ") = " << mu << endl;
|
||||
auto hx = entropy(x, weights);
|
||||
// cout << "Entropy X: " << hx << endl;
|
||||
auto hy = entropy(y, weights);
|
||||
// cout << "Entropy Y: " << hy << endl;
|
||||
return 2.0 * mu / (hx + hy);
|
||||
}
|
||||
void CFS::computeSuLabels()
|
||||
{
|
||||
// Compute Simmetrical Uncertainty between features and labels
|
||||
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
|
||||
// cout << "SuLabels" << endl;
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
suLabels.push_back(symmetricalUncertainty(i, -1));
|
||||
// cout << i << " -> " << suLabels[i] << endl;
|
||||
}
|
||||
|
||||
}
|
||||
@ -40,8 +35,14 @@ namespace bayesnet {
|
||||
{
|
||||
// Compute Simmetrical Uncertainty between features
|
||||
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
|
||||
// TODO: Implement Cache in this function
|
||||
return symmetricalUncertainty(firstFeature, secondFeature);
|
||||
try {
|
||||
return suFeatures.at({ firstFeature, secondFeature });
|
||||
}
|
||||
catch (const out_of_range& e) {
|
||||
auto result = symmetricalUncertainty(firstFeature, secondFeature);
|
||||
suFeatures[{firstFeature, secondFeature}] = result;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
double CFS::computeMerit()
|
||||
{
|
||||
@ -73,7 +74,6 @@ namespace bayesnet {
|
||||
for (auto feature : featureOrder) {
|
||||
cfsFeatures.push_back(feature);
|
||||
auto meritNew = computeMerit(); // Compute merit with cfsFeatures
|
||||
//cout << "MeritNew: " << meritNew << " Merit: " << merit << endl;
|
||||
if (meritNew > merit) {
|
||||
merit = meritNew;
|
||||
bestFeature = feature;
|
||||
@ -81,7 +81,8 @@ namespace bayesnet {
|
||||
cfsFeatures.pop_back();
|
||||
}
|
||||
if (bestFeature == -1) {
|
||||
throw runtime_error("Feature not found");
|
||||
// meritNew has to be nan due to constant features
|
||||
break;
|
||||
}
|
||||
cfsFeatures.push_back(bestFeature);
|
||||
cfsScores.push_back(merit);
|
||||
@ -90,34 +91,6 @@ namespace bayesnet {
|
||||
}
|
||||
fitted = true;
|
||||
}
|
||||
void CFS::test()
|
||||
{
|
||||
cout << "H(y): " << entropy(samples.index({ -1, "..." }), weights) << endl;
|
||||
cout << "y: ";
|
||||
auto y = samples.index({ -1, "..." });
|
||||
for (int i = 0; i < y.size(0); ++i) {
|
||||
cout << y[i].item<double>() << ", ";
|
||||
}
|
||||
cout << endl;
|
||||
computeSuLabels();
|
||||
// cout << "Probabilites of features: " << endl;
|
||||
// for (const auto& featureName : features) {
|
||||
// int featureIdx = find(features.begin(), features.end(), featureName) - features.begin();
|
||||
// cout << featureName << "(" << featureIdx << "): ";
|
||||
// auto feature = samples.index({ featureIdx, "..." });
|
||||
// torch::Tensor counts = feature.bincount(weights);
|
||||
// double totalWeight = counts.sum().item<double>();
|
||||
// torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
|
||||
// for (int i = 0; i < probs.size(0); ++i) {
|
||||
// cout << probs[i].item<double>() << ", ";
|
||||
// }
|
||||
// cout << endl;
|
||||
// // for (int i = 0; i < x.size(0); ++i) {
|
||||
// // cout << x[i].item<double>() << ", ";
|
||||
// // }
|
||||
// // cout << endl;
|
||||
// }
|
||||
}
|
||||
bool CFS::computeContinueCondition(const vector<int>& featureOrder)
|
||||
{
|
||||
if (cfsFeatures.size() == maxFeatures || featureOrder.size() == 0) {
|
||||
|
@ -26,6 +26,7 @@ namespace bayesnet {
|
||||
vector<int> cfsFeatures;
|
||||
vector<double> cfsScores;
|
||||
vector<double> suLabels;
|
||||
map<pair<int, int>, double> suFeatures;
|
||||
bool fitted = false;
|
||||
};
|
||||
}
|
||||
|
@ -6,4 +6,4 @@ include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
|
||||
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
|
||||
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
|
||||
Mst.cc Proposal.cc CFS.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
|
||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}" OpenSSL::Crypto)
|
||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
@ -210,7 +210,7 @@ int main()
|
||||
// net.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto dt = Datasets(true, "Arff");
|
||||
for (const auto& name : dt.getNames()) {
|
||||
//for (const auto& name : { "iris" }) {
|
||||
// for (const auto& name : { "iris" }) {
|
||||
auto [X, y] = dt.getTensors(name);
|
||||
auto features = dt.getFeatures(name);
|
||||
auto states = dt.getStates(name);
|
||||
@ -222,8 +222,8 @@ int main()
|
||||
auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
|
||||
dataset = torch::cat({ dataset, yresized }, 0);
|
||||
auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, classNumStates, weights);
|
||||
cout << "Dataset: " << name << " CFS features: " << flush;
|
||||
cfs.fit();
|
||||
cout << "Dataset: " << name << " CFS features: ";
|
||||
for (const auto& feature : cfs.getFeatures()) {
|
||||
cout << feature << ", ";
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user