diff --git a/CMakeLists.txt b/CMakeLists.txt index 37c674d..1f837ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,7 +65,8 @@ endif (ENABLE_CLANG_TIDY) add_git_submodule("lib/mdlp") add_git_submodule("lib/argparse") add_git_submodule("lib/json") -find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib) + +find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib ${HOME}/lib/usr/local/lib) # Subdirectories # -------------- diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc index 0952a7a..a9120a0 100644 --- a/src/BayesNet/BoostAODE.cc +++ b/src/BayesNet/BoostAODE.cc @@ -41,9 +41,6 @@ namespace bayesnet { X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }); y_train = y_; } - if (cfs) { - initializeModels(); - } } void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters) { @@ -87,8 +84,9 @@ namespace bayesnet { return oss.str(); } - void BoostAODE::initializeModels() + unordered_set BoostAODE::initializeModels() { + unordered_set featuresUsed; // Read the CFS features string output = "[", prefix = ""; bool first = true; @@ -110,28 +108,30 @@ namespace bayesnet { if (file.is_open()) { nlohmann::json cfsFeatures = nlohmann::json::parse(file); file.close(); - for (const string& feature : cfsFeatures) { - // cout << "Feature: [" << feature << "]" << endl; - auto pos = find(features.begin(), features.end(), feature); - if (pos == features.end()) - throw runtime_error("Feature " + feature + " not found in dataset"); - int numFeature = pos - features.begin(); - cout << "Feature: [" << feature << "] " << numFeature << endl; - models.push_back(std::make_unique(numFeature)); - models.back()->fit(dataset, features, className, states, weights_); + for (const int& feature : cfsFeatures) { + // cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl; + featuresUsed.insert(feature); + unique_ptr model = std::make_unique(feature); + model->fit(dataset, features, className, states, weights_); + models.push_back(std::move(model)); + significanceModels.push_back(1.0); n_models++; } } else { throw runtime_error("File " + name + " not found"); } + return featuresUsed; } void BoostAODE::trainModel(const torch::Tensor& weights) { + unordered_set featuresUsed; + if (cfs) { + featuresUsed = initializeModels(); + } if (maxModels == 0) maxModels = .1 * n > 10 ? .1 * n : n; Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); bool exitCondition = false; - unordered_set featuresUsed; // Variables to control the accuracy finish condition double priorAccuracy = 0.0; double delta = 1.0; @@ -150,16 +150,16 @@ namespace bayesnet { unique_ptr model; auto feature = featureSelection[0]; if (!repeatSparent || featuresUsed.size() < featureSelection.size()) { - bool found = false; - for (auto feat : featureSelection) { + bool used = true; + for (const auto& feat : featureSelection) { if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) { continue; } - found = true; + used = false; feature = feat; break; } - if (!found) { + if (used) { exitCondition = true; continue; } @@ -199,7 +199,7 @@ namespace bayesnet { count++; } } - exitCondition = n_models == maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance; + exitCondition = n_models >= maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance; } if (featuresUsed.size() != features.size()) { status = WARNING; diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h index 683cb99..fb87fce 100644 --- a/src/BayesNet/BoostAODE.h +++ b/src/BayesNet/BoostAODE.h @@ -1,6 +1,7 @@ #ifndef BOOSTAODE_H #define BOOSTAODE_H #include "Ensemble.h" +#include #include "SPODE.h" namespace bayesnet { class BoostAODE : public Ensemble { @@ -15,7 +16,7 @@ namespace bayesnet { private: torch::Tensor dataset_; torch::Tensor X_train, y_train, X_test, y_test; - void initializeModels(); + unordered_set initializeModels(); // Hyperparameters bool repeatSparent = false; // if true, a feature can be selected more than once int maxModels = 0;