From f288bbd6fa6348f31f35a6b51e52ffb58c68925e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Tue, 10 Oct 2023 11:52:39 +0200
Subject: [PATCH 01/15] Begin adding cfs to BoostAODE

---
 src/BayesNet/BoostAODE.cc     | 13 +++++++++++++
 src/BayesNet/BoostAODE.h      |  5 ++++-
 src/Platform/Dataset.cc       | 10 ----------
 src/Platform/Dataset.h        |  2 +-
 src/Platform/Datasets.cc      |  2 +-
 src/Platform/DotEnv.h         |  7 +++++--
 src/Platform/Experiment.cc    |  5 ++---
 src/Platform/Paths.h          |  6 ++++++
 src/Platform/ReportBase.cc    |  3 +--
 src/Platform/ReportConsole.cc |  6 ++++--
 src/Platform/Utils.h          | 19 +++++++++++++++++++
 src/Platform/list.cc          |  4 +---
 src/Platform/main.cc          |  5 ++---
 13 files changed, 59 insertions(+), 28 deletions(-)
 create mode 100644 src/Platform/Utils.h
diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc
index c976408..4ddf30d 100644
--- a/src/BayesNet/BoostAODE.cc
+++ b/src/BayesNet/BoostAODE.cc
@@ -4,6 +4,7 @@
 #include "Colors.h"
 #include "Folding.h"
 #include <limits.h>
+#include "Paths.h"
 
 namespace bayesnet {
     BoostAODE::BoostAODE() : Ensemble() {}
@@ -28,6 +29,9 @@ namespace bayesnet {
         if (hyperparameters.contains("convergence")) {
             convergence = hyperparameters["convergence"];
         }
+        if (hyperparameters.contains("cfs")) {
+            cfs = hyperparameters["cfs"];
+        }
     }
     void BoostAODE::validationInit()
     {
@@ -58,6 +62,12 @@ namespace bayesnet {
         }
 
     }
+    void BoostAODE::initializeModels()
+    {
+        ifstream file(cfs + ".json");
+        if (file.is_open()) {
+        }
+    }
     void BoostAODE::trainModel(const torch::Tensor& weights)
     {
         models.clear();
@@ -66,6 +76,9 @@ namespace bayesnet {
             maxModels = .1 * n > 10 ? .1 * n : n;
         validationInit();
         Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
+        if (cfs != "") {
+            initializeModels();
+        }
         bool exitCondition = false;
         unordered_set<int> featuresUsed;
         // Variables to control the accuracy finish condition
diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h
index 61e2e95..5c99145 100644
--- a/src/BayesNet/BoostAODE.h
+++ b/src/BayesNet/BoostAODE.h
@@ -16,10 +16,13 @@ namespace bayesnet {
         torch::Tensor dataset_;
         torch::Tensor X_train, y_train, X_test, y_test;
         void validationInit();
-        bool repeatSparent = false;
+        void initializeModels();
+        // Hyperparameters
+        bool repeatSparent = false; // if true, a feature can be selected more than once
         int maxModels = 0;
         bool ascending = false; //Process KBest features ascending or descending order
         bool convergence = false; //if true, stop when the model does not improve
+        string cfs = ""; // if not empty, use CFS to select features
     };
 }
 #endif
\ No newline at end of file
diff --git a/src/Platform/Dataset.cc b/src/Platform/Dataset.cc
index 02a36f9..f75fdbc 100644
--- a/src/Platform/Dataset.cc
+++ b/src/Platform/Dataset.cc
@@ -212,14 +212,4 @@ namespace platform {
         }
         return Xd;
     }
-    vector<string> Dataset::split(const string& text, char delimiter)
-    {
-        vector<string> result;
-        stringstream ss(text);
-        string token;
-        while (getline(ss, token, delimiter)) {
-            result.push_back(token);
-        }
-        return result;
-    }
 }
\ No newline at end of file
diff --git a/src/Platform/Dataset.h b/src/Platform/Dataset.h
index fbc577e..21b619e 100644
--- a/src/Platform/Dataset.h
+++ b/src/Platform/Dataset.h
@@ -5,6 +5,7 @@
 #include <vector>
 #include <string>
 #include "CPPFImdlp.h"
+#include "Utils.h"
 namespace platform {
     using namespace std;
 
@@ -62,7 +63,6 @@ namespace platform {
     public:
         Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
         explicit Dataset(const Dataset&);
-        static vector<string> split(const string& text, char delimiter);
         string getName() const;
         string getClassName() const;
         vector<string> getFeatures() const;
diff --git a/src/Platform/Datasets.cc b/src/Platform/Datasets.cc
index 717ccbc..4f53a2b 100644
--- a/src/Platform/Datasets.cc
+++ b/src/Platform/Datasets.cc
@@ -13,7 +13,7 @@ namespace platform {
                 if (line.empty() || line[0] == '#') {
                     continue;
                 }
-                vector<string> tokens = Dataset::split(line, ',');
+                vector<string> tokens = split(line, ',');
                 string name = tokens[0];
                 string className;
                 if (tokens.size() == 1) {
diff --git a/src/Platform/DotEnv.h b/src/Platform/DotEnv.h
index c481310..87ec50e 100644
--- a/src/Platform/DotEnv.h
+++ b/src/Platform/DotEnv.h
@@ -4,7 +4,10 @@
 #include <map>
 #include <fstream>
 #include <sstream>
-#include "Dataset.h"
+#include <iostream>
+#include "Utils.h"
+
+//#include "Dataset.h"
 namespace platform {
     class DotEnv {
     private:
@@ -51,7 +54,7 @@ namespace platform {
             auto seeds_str = env["seeds"];
             seeds_str = trim(seeds_str);
             seeds_str = seeds_str.substr(1, seeds_str.size() - 2);
-            auto seeds_str_split = Dataset::split(seeds_str, ',');
+            auto seeds_str_split = split(seeds_str, ',');
             transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) {
                 return stoi(str);
                 });
diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc
index dced445..311dbc7 100644
--- a/src/Platform/Experiment.cc
+++ b/src/Platform/Experiment.cc
@@ -3,7 +3,7 @@
 #include "Datasets.h"
 #include "Models.h"
 #include "ReportConsole.h"
-#include "DotEnv.h"
+#include "Paths.h"
 namespace platform {
     using json = nlohmann::json;
     string get_date()
@@ -134,8 +134,7 @@ namespace platform {
     }
     void Experiment::cross_validation(const string& fileName)
     {
-        auto env = platform::DotEnv();
-        auto datasets = platform::Datasets(discretized, env.get("source_data"));
+        auto datasets = platform::Datasets(discretized, Paths::datasets());
         // Get dataset
         auto [X, y] = datasets.getTensors(fileName);
         auto states = datasets.getStates(fileName);
diff --git a/src/Platform/Paths.h b/src/Platform/Paths.h
index 926568e..a1eb00c 100644
--- a/src/Platform/Paths.h
+++ b/src/Platform/Paths.h
@@ -1,11 +1,17 @@
 #ifndef PATHS_H
 #define PATHS_H
 #include <string>
+#include "DotEnv.h"
 namespace platform {
     class Paths {
     public:
         static std::string results() { return "results/"; }
         static std::string excel() { return "excel/"; }
+        static std::string datasets()
+        {
+            auto env = platform::DotEnv();
+            return env.get("source_data");
+        }
     };
 }
 #endif
\ No newline at end of file
diff --git a/src/Platform/ReportBase.cc b/src/Platform/ReportBase.cc
index 5f113a5..acb5581 100644
--- a/src/Platform/ReportBase.cc
+++ b/src/Platform/ReportBase.cc
@@ -58,8 +58,7 @@ namespace platform {
             }
         } else {
             if (data["score_name"].get<string>() == "accuracy") {
-                auto env = platform::DotEnv();
-                auto dt = Datasets(false, env.get("source_data"));
+                auto dt = Datasets(false, Paths::datasets());
                 dt.loadDataset(dataset);
                 auto numClasses = dt.getNClasses(dataset);
                 if (numClasses == 2) {
diff --git a/src/Platform/ReportConsole.cc b/src/Platform/ReportConsole.cc
index bb08ef3..aaba840 100644
--- a/src/Platform/ReportConsole.cc
+++ b/src/Platform/ReportConsole.cc
@@ -56,10 +56,12 @@ namespace platform {
             try {
                 cout << r["hyperparameters"].get<string>();
             }
-            catch (const exception& err) {
-                cout << r["hyperparameters"];
+            catch (...) {
+                //cout << r["hyperparameters"];
+                cout << "Arrggggghhhh!" << endl;
             }
             cout << endl;
+            cout << flush;
             lastResult = r;
             totalScore += r["score"].get<double>();
             odd = !odd;
diff --git a/src/Platform/Utils.h b/src/Platform/Utils.h
new file mode 100644
index 0000000..3e24f05
--- /dev/null
+++ b/src/Platform/Utils.h
@@ -0,0 +1,19 @@
+#ifndef UTILS_H
+#define UTILS_H
+#include <sstream>
+#include <string>
+#include <vector>
+namespace platform {
+    //static vector<string> split(const string& text, char delimiter);
+    static std::vector<std::string> split(const std::string& text, char delimiter)
+    {
+        std::vector<std::string> result;
+        std::stringstream ss(text);
+        std::string token;
+        while (std::getline(ss, token, delimiter)) {
+            result.push_back(token);
+        }
+        return result;
+    }
+}
+#endif
\ No newline at end of file
diff --git a/src/Platform/list.cc b/src/Platform/list.cc
index 8c386a5..581ee5f 100644
--- a/src/Platform/list.cc
+++ b/src/Platform/list.cc
@@ -3,7 +3,6 @@
 #include "Paths.h"
 #include "Colors.h"
 #include "Datasets.h"
-#include "DotEnv.h"
 
 using namespace std;
 const int BALANCE_LENGTH = 75;
@@ -28,8 +27,7 @@ void outputBalance(const string& balance)
 
 int main(int argc, char** argv)
 {
-    auto env = platform::DotEnv();
-    auto data = platform::Datasets(false, env.get("source_data"));
+    auto data = platform::Datasets(false, platform::Paths::datasets());
     locale mylocale(cout.getloc(), new separated);
     locale::global(mylocale);
     cout.imbue(mylocale);
diff --git a/src/Platform/main.cc b/src/Platform/main.cc
index 62470c5..1101e2b 100644
--- a/src/Platform/main.cc
+++ b/src/Platform/main.cc
@@ -82,8 +82,7 @@ int main(int argc, char** argv)
     auto seeds = program.get<vector<int>>("seeds");
     auto hyperparameters = program.get<string>("hyperparameters");
     vector<string> filesToTest;
-    auto env = platform::DotEnv();
-    auto datasets = platform::Datasets(discretize_dataset, env.get("source_data"));
+    auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets());
     auto title = program.get<string>("title");
     auto saveResults = program.get<bool>("save");
     if (file_name != "") {
@@ -102,7 +101,7 @@ int main(int argc, char** argv)
     /*
     * Begin Processing
     */
-
+    auto env = platform::DotEnv();
     auto experiment = platform::Experiment();
     experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
     experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
-- 
2.45.2


From df9b4c48d26c2fdf0ed199e699ecc8c64e8e1712 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Tue, 10 Oct 2023 13:39:11 +0200
Subject: [PATCH 02/15] Begin CFS initialization

---
 src/BayesNet/BoostAODE.cc     | 62 +++++++++++++++++++----------------
 src/BayesNet/BoostAODE.h      |  1 -
 src/Platform/ReportConsole.cc |  8 +----
 src/Platform/main.cc          | 49 +++++++++++++--------------
 4 files changed, 57 insertions(+), 63 deletions(-)

diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc
index 4ddf30d..aeae235 100644
--- a/src/BayesNet/BoostAODE.cc
+++ b/src/BayesNet/BoostAODE.cc
@@ -11,30 +11,7 @@ namespace bayesnet {
     void BoostAODE::buildModel(const torch::Tensor& weights)
     {
         // Models shall be built in trainModel
-    }
-    void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
-    {
-        // Check if hyperparameters are valid
-        const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence" };
-        checkHyperparameters(validKeys, hyperparameters);
-        if (hyperparameters.contains("repeatSparent")) {
-            repeatSparent = hyperparameters["repeatSparent"];
-        }
-        if (hyperparameters.contains("maxModels")) {
-            maxModels = hyperparameters["maxModels"];
-        }
-        if (hyperparameters.contains("ascending")) {
-            ascending = hyperparameters["ascending"];
-        }
-        if (hyperparameters.contains("convergence")) {
-            convergence = hyperparameters["convergence"];
-        }
-        if (hyperparameters.contains("cfs")) {
-            cfs = hyperparameters["cfs"];
-        }
-    }
-    void BoostAODE::validationInit()
-    {
+        // Prepare the validation dataset
         auto y_ = dataset.index({ -1, "..." });
         if (convergence) {
             // Prepare train & validation sets from train data
@@ -60,12 +37,43 @@ namespace bayesnet {
             X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
             y_train = y_;
         }
-
+        if (cfs != "") {
+            initializeModels();
+        }
+    }
+    void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
+    {
+        // Check if hyperparameters are valid
+        const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "cfs" };
+        checkHyperparameters(validKeys, hyperparameters);
+        if (hyperparameters.contains("repeatSparent")) {
+            repeatSparent = hyperparameters["repeatSparent"];
+        }
+        if (hyperparameters.contains("maxModels")) {
+            maxModels = hyperparameters["maxModels"];
+        }
+        if (hyperparameters.contains("ascending")) {
+            ascending = hyperparameters["ascending"];
+        }
+        if (hyperparameters.contains("convergence")) {
+            convergence = hyperparameters["convergence"];
+        }
+        if (hyperparameters.contains("cfs")) {
+            cfs = hyperparameters["cfs"];
+        }
     }
     void BoostAODE::initializeModels()
     {
         ifstream file(cfs + ".json");
         if (file.is_open()) {
+            nlohmann::json data;
+            file >> data;
+            file.close();
+            auto model = "iris"; // has to come in when building object
+            auto features = data[model];
+            cout << "features: " << features.dump() << endl;
+        } else {
+            throw runtime_error("File " + cfs + ".json not found");
         }
     }
     void BoostAODE::trainModel(const torch::Tensor& weights)
@@ -74,11 +82,7 @@ namespace bayesnet {
         n_models = 0;
         if (maxModels == 0)
             maxModels = .1 * n > 10 ? .1 * n : n;
-        validationInit();
         Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
-        if (cfs != "") {
-            initializeModels();
-        }
         bool exitCondition = false;
         unordered_set<int> featuresUsed;
         // Variables to control the accuracy finish condition
diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h
index 5c99145..f3fa5bd 100644
--- a/src/BayesNet/BoostAODE.h
+++ b/src/BayesNet/BoostAODE.h
@@ -15,7 +15,6 @@ namespace bayesnet {
     private:
         torch::Tensor dataset_;
         torch::Tensor X_train, y_train, X_test, y_test;
-        void validationInit();
         void initializeModels();
         // Hyperparameters
         bool repeatSparent = false; // if true, a feature can be selected more than once
diff --git a/src/Platform/ReportConsole.cc b/src/Platform/ReportConsole.cc
index aaba840..c8e6890 100644
--- a/src/Platform/ReportConsole.cc
+++ b/src/Platform/ReportConsole.cc
@@ -53,13 +53,7 @@ namespace platform {
             const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
             cout << status;
             cout << setw(12) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " ";
-            try {
-                cout << r["hyperparameters"].get<string>();
-            }
-            catch (...) {
-                //cout << r["hyperparameters"];
-                cout << "Arrggggghhhh!" << endl;
-            }
+            cout << r["hyperparameters"].dump();
             cout << endl;
             cout << flush;
             lastResult = r;
diff --git a/src/Platform/main.cc b/src/Platform/main.cc
index 1101e2b..ecdf258 100644
--- a/src/Platform/main.cc
+++ b/src/Platform/main.cc
@@ -12,7 +12,7 @@
 using namespace std;
 using json = nlohmann::json;
 
-argparse::ArgumentParser manageArguments(int argc, char** argv)
+argparse::ArgumentParser manageArguments()
 {
     auto env = platform::DotEnv();
     argparse::ArgumentParser program("main");
@@ -48,43 +48,40 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
         }});
     auto seed_values = env.getSeeds();
     program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
+    return program;
+}
+
+int main(int argc, char** argv)
+{
+    string file_name, model_name, title;
+    json hyperparameters_json;
+    bool discretize_dataset, stratified, saveResults;
+    vector<int> seeds;
+    vector<string> filesToTest;
+    int n_folds;
+    auto program = manageArguments();
     try {
         program.parse_args(argc, argv);
-        auto file_name = program.get<string>("dataset");
-        auto model_name = program.get<string>("model");
-        auto discretize_dataset = program.get<bool>("discretize");
-        auto stratified = program.get<bool>("stratified");
-        auto n_folds = program.get<int>("folds");
-        auto seeds = program.get<vector<int>>("seeds");
-        auto title = program.get<string>("title");
+        file_name = program.get<string>("dataset");
+        model_name = program.get<string>("model");
+        discretize_dataset = program.get<bool>("discretize");
+        stratified = program.get<bool>("stratified");
+        n_folds = program.get<int>("folds");
+        seeds = program.get<vector<int>>("seeds");
         auto hyperparameters = program.get<string>("hyperparameters");
-        auto saveResults = program.get<bool>("save");
+        hyperparameters_json = json::parse(hyperparameters);
+        title = program.get<string>("title");
         if (title == "" && file_name == "") {
             throw runtime_error("title is mandatory if dataset is not provided");
         }
+        saveResults = program.get<bool>("save");
     }
     catch (const exception& err) {
         cerr << err.what() << endl;
         cerr << program;
         exit(1);
     }
-    return program;
-}
-
-int main(int argc, char** argv)
-{
-    auto program = manageArguments(argc, argv);
-    auto file_name = program.get<string>("dataset");
-    auto model_name = program.get<string>("model");
-    auto discretize_dataset = program.get<bool>("discretize");
-    auto stratified = program.get<bool>("stratified");
-    auto n_folds = program.get<int>("folds");
-    auto seeds = program.get<vector<int>>("seeds");
-    auto hyperparameters = program.get<string>("hyperparameters");
-    vector<string> filesToTest;
     auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets());
-    auto title = program.get<string>("title");
-    auto saveResults = program.get<bool>("save");
     if (file_name != "") {
         if (!datasets.isDataset(file_name)) {
             cerr << "Dataset " << file_name << " not found" << endl;
@@ -106,7 +103,7 @@ int main(int argc, char** argv)
     experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
     experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
     experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
-    experiment.setHyperparameters(json::parse(hyperparameters));
+    experiment.setHyperparameters(hyperparameters_json);
     for (auto seed : seeds) {
         experiment.addRandomSeed(seed);
     }
-- 
2.45.2


From ca833a34f5835cb085c1ead9c9dd4f48edc435c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Tue, 10 Oct 2023 18:16:43 +0200
Subject: [PATCH 03/15] try openssl sha256

---
 CMakeLists.txt              |  2 +-
 src/BayesNet/BoostAODE.cc   | 58 ++++++++++++++++++++++++++++++-------
 src/BayesNet/BoostAODE.h    |  2 +-
 src/BayesNet/CMakeLists.txt |  2 +-
 4 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 294f0bf..37c674d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,7 +45,7 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
 # CMakes modules
 # --------------
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
-
+find_package(OpenSSL REQUIRED)
 include(AddGitSubmodule)
 if (CODE_COVERAGE)
     enable_testing()
diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc
index aeae235..d3e8901 100644
--- a/src/BayesNet/BoostAODE.cc
+++ b/src/BayesNet/BoostAODE.cc
@@ -1,10 +1,12 @@
-#include "BoostAODE.h"
 #include <set>
+#include <functional>
+#include <limits.h>
+#include "BoostAODE.h"
 #include "BayesMetrics.h"
 #include "Colors.h"
 #include "Folding.h"
-#include <limits.h>
 #include "Paths.h"
+#include <openssl/evp.h>
 
 namespace bayesnet {
     BoostAODE::BoostAODE() : Ensemble() {}
@@ -13,6 +15,8 @@ namespace bayesnet {
         // Models shall be built in trainModel
         // Prepare the validation dataset
         auto y_ = dataset.index({ -1, "..." });
+        int nSamples = dataset.size(1);
+        int nFeatures = dataset.size(0) - 1;
         if (convergence) {
             // Prepare train & validation sets from train data
             auto fold = platform::StratifiedKFold(5, y_, 271);
@@ -38,7 +42,7 @@ namespace bayesnet {
             y_train = y_;
         }
         if (cfs != "") {
-            initializeModels();
+            initializeModels(nSamples, nFeatures);
         }
     }
     void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
@@ -62,18 +66,52 @@ namespace bayesnet {
             cfs = hyperparameters["cfs"];
         }
     }
-    void BoostAODE::initializeModels()
+    string sha256(const string& input)
     {
-        ifstream file(cfs + ".json");
+        EVP_MD_CTX* mdctx;
+        const EVP_MD* md;
+        unsigned char hash[EVP_MAX_MD_SIZE];
+        unsigned int hash_len;
+
+        OpenSSL_add_all_digests();
+        md = EVP_get_digestbyname("sha256");
+        mdctx = EVP_MD_CTX_new();
+        EVP_DigestInit_ex(mdctx, md, nullptr);
+        EVP_DigestUpdate(mdctx, input.c_str(), input.size());
+        EVP_DigestFinal_ex(mdctx, hash, &hash_len);
+        EVP_MD_CTX_free(mdctx);
+        stringstream oss;
+        for (unsigned int i = 0; i < hash_len; i++) {
+            oss << hex << (int)hash[i];
+        }
+        return oss.str();
+    }
+
+    void BoostAODE::initializeModels(int nSamples, int nFeatures)
+    {
+        // Read the CFS features
+        string output = "[", prefix = "";
+        bool first = true;
+        for (const auto& feature : features) {
+            output += prefix + feature;
+            if (first) {
+                prefix = ", ";
+                first = false;
+            }
+        }
+        output += "]";
+        // std::size_t str_hash = std::hash<std::string>{}(output);
+        string str_hash = sha256(output);
+        stringstream oss;
+        oss << "cfs/" << str_hash << ".json";
+        string name = oss.str();
+        ifstream file(name);
         if (file.is_open()) {
-            nlohmann::json data;
-            file >> data;
+            nlohmann::json features = nlohmann::json::parse(file);
             file.close();
-            auto model = "iris"; // has to come in when building object
-            auto features = data[model];
             cout << "features: " << features.dump() << endl;
         } else {
-            throw runtime_error("File " + cfs + ".json not found");
+            throw runtime_error("File " + name + " not found");
         }
     }
     void BoostAODE::trainModel(const torch::Tensor& weights)
diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h
index f3fa5bd..3464a7d 100644
--- a/src/BayesNet/BoostAODE.h
+++ b/src/BayesNet/BoostAODE.h
@@ -15,7 +15,7 @@ namespace bayesnet {
     private:
         torch::Tensor dataset_;
         torch::Tensor X_train, y_train, X_test, y_test;
-        void initializeModels();
+        void initializeModels(int nSamples, int nFeatures);
         // Hyperparameters
         bool repeatSparent = false; // if true, a feature can be selected more than once
         int maxModels = 0;
diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt
index 2a120f3..6ca1238 100644
--- a/src/BayesNet/CMakeLists.txt
+++ b/src/BayesNet/CMakeLists.txt
@@ -6,4 +6,4 @@ include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
 add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc 
     KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc 
     Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
-target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
\ No newline at end of file
+target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}" OpenSSL::Crypto)
\ No newline at end of file
-- 
2.45.2


From e7ded6826792d14abc7aeeaa1ca877c8e0382b4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Tue, 10 Oct 2023 23:00:38 +0200
Subject: [PATCH 04/15] First cfs working version

---
 src/BayesNet/BoostAODE.cc | 33 +++++++++++++++++++++------------
 src/BayesNet/BoostAODE.h  |  4 ++--
 src/Platform/Paths.h      |  1 +
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc
index d3e8901..0952a7a 100644
--- a/src/BayesNet/BoostAODE.cc
+++ b/src/BayesNet/BoostAODE.cc
@@ -13,10 +13,10 @@ namespace bayesnet {
     void BoostAODE::buildModel(const torch::Tensor& weights)
     {
         // Models shall be built in trainModel
+        models.clear();
+        n_models = 0;
         // Prepare the validation dataset
         auto y_ = dataset.index({ -1, "..." });
-        int nSamples = dataset.size(1);
-        int nFeatures = dataset.size(0) - 1;
         if (convergence) {
             // Prepare train & validation sets from train data
             auto fold = platform::StratifiedKFold(5, y_, 271);
@@ -41,8 +41,8 @@ namespace bayesnet {
             X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
             y_train = y_;
         }
-        if (cfs != "") {
-            initializeModels(nSamples, nFeatures);
+        if (cfs) {
+            initializeModels();
         }
     }
     void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
@@ -82,18 +82,18 @@ namespace bayesnet {
         EVP_MD_CTX_free(mdctx);
         stringstream oss;
         for (unsigned int i = 0; i < hash_len; i++) {
-            oss << hex << (int)hash[i];
+            oss << hex << setfill('0') << setw(2) << (int)hash[i];
         }
         return oss.str();
     }
 
-    void BoostAODE::initializeModels(int nSamples, int nFeatures)
+    void BoostAODE::initializeModels()
     {
         // Read the CFS features
         string output = "[", prefix = "";
         bool first = true;
         for (const auto& feature : features) {
-            output += prefix + feature;
+            output += prefix + "'" + feature + "'";
             if (first) {
                 prefix = ", ";
                 first = false;
@@ -103,21 +103,30 @@ namespace bayesnet {
         // std::size_t str_hash = std::hash<std::string>{}(output);
         string str_hash = sha256(output);
         stringstream oss;
-        oss << "cfs/" << str_hash << ".json";
+        oss << platform::Paths::cfs() << str_hash << ".json";
         string name = oss.str();
         ifstream file(name);
+        Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
         if (file.is_open()) {
-            nlohmann::json features = nlohmann::json::parse(file);
+            nlohmann::json cfsFeatures = nlohmann::json::parse(file);
             file.close();
-            cout << "features: " << features.dump() << endl;
+            for (const string& feature : cfsFeatures) {
+                // cout << "Feature: [" << feature << "]" << endl;
+                auto pos = find(features.begin(), features.end(), feature);
+                if (pos == features.end())
+                    throw runtime_error("Feature " + feature + " not found in dataset");
+                int numFeature = pos - features.begin();
+                cout << "Feature: [" << feature << "] " << numFeature << endl;
+                models.push_back(std::make_unique<SPODE>(numFeature));
+                models.back()->fit(dataset, features, className, states, weights_);
+                n_models++;
+            }
         } else {
             throw runtime_error("File " + name + " not found");
         }
     }
     void BoostAODE::trainModel(const torch::Tensor& weights)
     {
-        models.clear();
-        n_models = 0;
         if (maxModels == 0)
             maxModels = .1 * n > 10 ? .1 * n : n;
         Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h
index 3464a7d..683cb99 100644
--- a/src/BayesNet/BoostAODE.h
+++ b/src/BayesNet/BoostAODE.h
@@ -15,13 +15,13 @@ namespace bayesnet {
     private:
         torch::Tensor dataset_;
         torch::Tensor X_train, y_train, X_test, y_test;
-        void initializeModels(int nSamples, int nFeatures);
+        void initializeModels();
         // Hyperparameters
         bool repeatSparent = false; // if true, a feature can be selected more than once
         int maxModels = 0;
         bool ascending = false; //Process KBest features ascending or descending order
         bool convergence = false; //if true, stop when the model does not improve
-        string cfs = ""; // if not empty, use CFS to select features
+        bool cfs = false; // if true use CFS to select features stored in cfs folder with sha256(features) file_name
     };
 }
 #endif
\ No newline at end of file
diff --git a/src/Platform/Paths.h b/src/Platform/Paths.h
index a1eb00c..16d459c 100644
--- a/src/Platform/Paths.h
+++ b/src/Platform/Paths.h
@@ -7,6 +7,7 @@ namespace platform {
     public:
         static std::string results() { return "results/"; }
         static std::string excel() { return "excel/"; }
+        static std::string cfs() { return "cfs/"; }
         static std::string datasets()
         {
             auto env = platform::DotEnv();
-- 
2.45.2


From 47e2b138c5a92d2fc317054987c1eacb34b005ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Wed, 11 Oct 2023 11:33:29 +0200
Subject: [PATCH 05/15] Complete first working cfs

---
 CMakeLists.txt            |  3 ++-
 src/BayesNet/BoostAODE.cc | 38 +++++++++++++++++++-------------------
 src/BayesNet/BoostAODE.h  |  3 ++-
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 37c674d..1f837ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,7 +65,8 @@ endif (ENABLE_CLANG_TIDY)
 add_git_submodule("lib/mdlp")
 add_git_submodule("lib/argparse")
 add_git_submodule("lib/json")
-find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib)
+
+find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib ${HOME}/lib/usr/local/lib)
 
 # Subdirectories
 # --------------
diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc
index 0952a7a..a9120a0 100644
--- a/src/BayesNet/BoostAODE.cc
+++ b/src/BayesNet/BoostAODE.cc
@@ -41,9 +41,6 @@ namespace bayesnet {
             X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
             y_train = y_;
         }
-        if (cfs) {
-            initializeModels();
-        }
     }
     void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
     {
@@ -87,8 +84,9 @@ namespace bayesnet {
         return oss.str();
     }
 
-    void BoostAODE::initializeModels()
+    unordered_set<int> BoostAODE::initializeModels()
     {
+        unordered_set<int> featuresUsed;
         // Read the CFS features
         string output = "[", prefix = "";
         bool first = true;
@@ -110,28 +108,30 @@ namespace bayesnet {
         if (file.is_open()) {
             nlohmann::json cfsFeatures = nlohmann::json::parse(file);
             file.close();
-            for (const string& feature : cfsFeatures) {
-                // cout << "Feature: [" << feature << "]" << endl;
-                auto pos = find(features.begin(), features.end(), feature);
-                if (pos == features.end())
-                    throw runtime_error("Feature " + feature + " not found in dataset");
-                int numFeature = pos - features.begin();
-                cout << "Feature: [" << feature << "] " << numFeature << endl;
-                models.push_back(std::make_unique<SPODE>(numFeature));
-                models.back()->fit(dataset, features, className, states, weights_);
+            for (const int& feature : cfsFeatures) {
+                // cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl;
+                featuresUsed.insert(feature);
+                unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
+                model->fit(dataset, features, className, states, weights_);
+                models.push_back(std::move(model));
+                significanceModels.push_back(1.0);
                 n_models++;
             }
         } else {
             throw runtime_error("File " + name + " not found");
         }
+        return featuresUsed;
     }
     void BoostAODE::trainModel(const torch::Tensor& weights)
     {
+        unordered_set<int> featuresUsed;
+        if (cfs) {
+            featuresUsed = initializeModels();
+        }
         if (maxModels == 0)
             maxModels = .1 * n > 10 ? .1 * n : n;
         Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
         bool exitCondition = false;
-        unordered_set<int> featuresUsed;
         // Variables to control the accuracy finish condition
         double priorAccuracy = 0.0;
         double delta = 1.0;
@@ -150,16 +150,16 @@ namespace bayesnet {
             unique_ptr<Classifier> model;
             auto feature = featureSelection[0];
             if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
-                bool found = false;
-                for (auto feat : featureSelection) {
+                bool used = true;
+                for (const auto& feat : featureSelection) {
                     if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
                         continue;
                     }
-                    found = true;
+                    used = false;
                     feature = feat;
                     break;
                 }
-                if (!found) {
+                if (used) {
                     exitCondition = true;
                     continue;
                 }
@@ -199,7 +199,7 @@ namespace bayesnet {
                     count++;
                 }
             }
-            exitCondition = n_models == maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance;
+            exitCondition = n_models >= maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance;
         }
         if (featuresUsed.size() != features.size()) {
             status = WARNING;
diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h
index 683cb99..fb87fce 100644
--- a/src/BayesNet/BoostAODE.h
+++ b/src/BayesNet/BoostAODE.h
@@ -1,6 +1,7 @@
 #ifndef BOOSTAODE_H
 #define BOOSTAODE_H
 #include "Ensemble.h"
+#include <map>
 #include "SPODE.h"
 namespace bayesnet {
     class BoostAODE : public Ensemble {
@@ -15,7 +16,7 @@ namespace bayesnet {
     private:
         torch::Tensor dataset_;
         torch::Tensor X_train, y_train, X_test, y_test;
-        void initializeModels();
+        unordered_set<int> initializeModels();
         // Hyperparameters
         bool repeatSparent = false; // if true, a feature can be selected more than once
         int maxModels = 0;
-- 
2.45.2


From 40d1dad5d827f3729e38c7b3fe448e1b511bb880 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Wed, 11 Oct 2023 21:17:26 +0200
Subject: [PATCH 06/15] Begin CFS implementation

---
 src/BayesNet/BayesMetrics.cc |   9 +--
 src/BayesNet/BayesMetrics.h  |  14 ++--
 src/BayesNet/CFS.cc          | 127 +++++++++++++++++++++++++++++++++++
 src/BayesNet/CFS.h           |  31 +++++++++
 src/BayesNet/CMakeLists.txt  |   2 +-
 src/BayesNet/Node.h          |   2 +-
 6 files changed, 173 insertions(+), 12 deletions(-)
 create mode 100644 src/BayesNet/CFS.cc
 create mode 100644 src/BayesNet/CFS.h

diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesNet/BayesMetrics.cc
index 623656e..86de9ea 100644
--- a/src/BayesNet/BayesMetrics.cc
+++ b/src/BayesNet/BayesMetrics.cc
@@ -60,11 +60,12 @@ namespace bayesnet {
     {
         return scoresKBest;
     }
-    vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
+    template <class T>
+    vector<pair<T, T>> Metrics::doCombinations(const vector<T>& source)
     {
-        vector<pair<string, string>> result;
+        vector<pair<T, T>> result;
         for (int i = 0; i < source.size(); ++i) {
-            string temp = source[i];
+            T temp = source[i];
             for (int j = i + 1; j < source.size(); ++j) {
                 result.push_back({ temp, source[j] });
             }
@@ -76,7 +77,7 @@ namespace bayesnet {
         auto result = vector<double>();
         auto source = vector<string>(features);
         source.push_back(className);
-        auto combinations = doCombinations(source);
+        auto combinations = doCombinations<string>(source);
         // Compute class prior
         auto margin = torch::zeros({ classNumStates }, torch::kFloat);
         for (int value = 0; value < classNumStates; ++value) {
diff --git a/src/BayesNet/BayesMetrics.h b/src/BayesNet/BayesMetrics.h
index 01841a7..30606c0 100644
--- a/src/BayesNet/BayesMetrics.h
+++ b/src/BayesNet/BayesMetrics.h
@@ -8,20 +8,22 @@ namespace bayesnet {
     using namespace torch;
     class Metrics {
     private:
-        Tensor samples; // nxm tensor used to fit the model
-        vector<string> features;
-        string className;
         int classNumStates = 0;
         vector<double> scoresKBest;
         vector<int> featuresKBest; // sorted indices of the features
-        double entropy(const Tensor& feature, const Tensor& weights);
         double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
-        vector<pair<string, string>> doCombinations(const vector<string>&);
+    protected:
+        Tensor samples; // n+1xm tensor used to fit the model where samples[-1] is the y vector
+        string className;
+        double entropy(const Tensor& feature, const Tensor& weights);
+        vector<string> features;
+        template <class T>
+        vector<pair<T, T>> doCombinations(const vector<T>& source);
     public:
         Metrics() = default;
         Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
         Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates);
-        vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending=false, unsigned k = 0);
+        vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
         vector<double> getScoresKBest() const;
         double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
         vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python
diff --git a/src/BayesNet/CFS.cc b/src/BayesNet/CFS.cc
new file mode 100644
index 0000000..b3473cd
--- /dev/null
+++ b/src/BayesNet/CFS.cc
@@ -0,0 +1,127 @@
+#include "CFS.h"
+#include <limits>
+#include "bayesnetUtils.h"
+namespace bayesnet {
+    CFS::CFS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
+        Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
+
+    {
+    }
+    double CFS::symmetricalUncertainty(int a, int b)
+    {
+        /*
+        Compute symmetrical uncertainty. Normalize* information gain (mutual
+        information) with the entropies of the features in order to compensate
+        the bias due to high cardinality features. *Range [0, 1]
+        (https://www.sciencedirect.com/science/article/pii/S0020025519303603)
+        */
+        auto x = samples.index({ a, "..." });
+        auto y = samples.index({ b, "..." });
+        return 2.0 * mutualInformation(y, x, weights) / (entropy(x, weights) + entropy(y, weights));
+    }
+    void CFS::computeSuLabels()
+    {
+        // Compute Simmetrical Uncertainty between features and labels
+        // https://en.wikipedia.org/wiki/Symmetric_uncertainty
+        for (int i = 0; i < features.size(); ++i) {
+            suLabels[i] = symmetricalUncertainty(i, -1);
+        }
+
+    }
+    double CFS::computeSuFeatures(const int firstFeature, const int secondFeature)
+    {
+        // Compute Simmetrical Uncertainty between features
+        // https://en.wikipedia.org/wiki/Symmetric_uncertainty
+        // TODO: Implement Cache in this function
+        return symmetricalUncertainty(firstFeature, secondFeature);
+    }
+    double CFS::computeMerit()
+    {
+        double result;
+        double rcf = 0;
+        for (auto feature : cfsFeatures) {
+            rcf += suLabels[feature];
+        }
+        double rff = 0;
+        int n = cfsFeatures.size();
+        for (const auto& item : doCombinations<int>(cfsFeatures)) {
+            rff += computeSuFeatures(item.first, item.second);
+        }
+        return rcf / sqrt(n + (n * n - n) * rff);
+    }
+    void CFS::fit()
+    {
+        cfsFeatures.clear();
+        computeSuLabels();
+        auto featureOrder = argsort(suLabels); // sort descending order
+        auto continueCondition = true;
+        auto feature = featureOrder[0];
+        cfsFeatures.push_back(feature);
+        cfsScores.push_back(suLabels[feature]);
+        while (continueCondition) {
+            double merit = numeric_limits<double>::lowest();
+            int bestFeature = -1;
+            for (auto feature : featureOrder) {
+                cfsFeatures.push_back(feature);
+                auto meritNew = computeMerit(); // Compute merit with cfsFeatures
+                if (meritNew > merit) {
+                    merit = meritNew;
+                    bestFeature = feature;
+                }
+                cfsFeatures.pop_back();
+            }
+            cfsFeatures.push_back(bestFeature);
+            cfsScores.push_back(merit);
+            featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), feature), featureOrder.end());
+            continueCondition = computeContinueCondition(featureOrder);
+        }
+        fitted = true;
+    }
+    bool CFS::computeContinueCondition(const vector<int>& featureOrder)
+    {
+        if (cfsFeatures.size() == maxFeatures || featureOrder.size() == 0) {
+            return false;
+        }
+        if (cfsScores.size() >= 5) {
+            /*
+            "To prevent the best first search from exploring the entire
+            feature subset search space, a stopping criterion is imposed.
+            The search will terminate if five consecutive fully expanded
+            subsets show no improvement over the current best subset."
+            as stated in Mark A.Hall Thesis
+            */
+            double item_ant = numeric_limits<double>::lowest();
+            int num = 0;
+            vector<double> lastFive(cfsScores.end() - 5, cfsScores.end());
+            for (auto item : lastFive) {
+                if (item_ant == numeric_limits<double>::lowest()) {
+                    item_ant = item;
+                }
+                if (item > item_ant) {
+                    break;
+                } else {
+                    num++;
+                    item_ant = item;
+                }
+            }
+            if (num == 5) {
+                return false;
+            }
+        }
+        return true;
+    }
+    vector<int> CFS::getFeatures() const
+    {
+        if (!fitted) {
+            throw runtime_error("CFS not fitted");
+        }
+        return cfsFeatures;
+    }
+    vector<double> CFS::getScores() const
+    {
+        if (!fitted) {
+            throw runtime_error("CFS not fitted");
+        }
+        return cfsScores;
+    }
+}
\ No newline at end of file
diff --git a/src/BayesNet/CFS.h b/src/BayesNet/CFS.h
new file mode 100644
index 0000000..1cf621d
--- /dev/null
+++ b/src/BayesNet/CFS.h
@@ -0,0 +1,31 @@
+#ifndef CFS_H
+#define CFS_H
+#include <torch/torch.h>
+#include <vector>
+#include "BayesMetrics.h"
+using namespace std;
+namespace bayesnet {
+    class CFS : public Metrics {
+    public:
+        // dataset is a n+1xm tensor of integers where dataset[-1] is the y vector
+        CFS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights);
+        virtual ~CFS() {};
+        void fit();
+        vector<int> getFeatures() const;
+        vector<double> getScores() const;
+    private:
+        void computeSuLabels();
+        double computeSuFeatures(const int a, const int b);
+        double symmetricalUncertainty(int a, int b);
+        double computeMerit();
+        bool computeContinueCondition(const vector<int>& featureOrder);
+        vector<pair<int, int>> combinations(const vector<int>& features);
+        const torch::Tensor& weights;
+        int maxFeatures;
+        vector<int> cfsFeatures;
+        vector<double> cfsScores;
+        vector<double> suLabels;
+        bool fitted = false;
+    };
+}
+#endif
\ No newline at end of file
diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt
index 6ca1238..e22827e 100644
--- a/src/BayesNet/CMakeLists.txt
+++ b/src/BayesNet/CMakeLists.txt
@@ -5,5 +5,5 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
 include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
 add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc 
     KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc 
-    Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
+    Mst.cc Proposal.cc CFS.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
 target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}" OpenSSL::Crypto)
\ No newline at end of file
diff --git a/src/BayesNet/Node.h b/src/BayesNet/Node.h
index 6758c5c..4979007 100644
--- a/src/BayesNet/Node.h
+++ b/src/BayesNet/Node.h
@@ -14,8 +14,8 @@ namespace bayesnet {
         int numStates; // number of states of the variable
         torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
         vector<int64_t> dimensions; // dimensions of the cpTable
-    public:
         vector<pair<string, string>> combinations(const vector<string>&);
+    public:
         explicit Node(const string&);
         void clear();
         void addParent(Node*);
-- 
2.45.2


From 5022a4dc90345c27bf373d351b039f9a0d83285c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Fri, 13 Oct 2023 12:29:25 +0200
Subject: [PATCH 07/15] Complete CFS  tested with Python mufs

---
 .vscode/c_cpp_properties.json | 18 +++++++++++++
 src/BayesNet/BayesMetrics.cc  | 20 +++++---------
 src/BayesNet/BayesMetrics.h   | 12 ++++++++-
 src/BayesNet/BoostAODE.cc     |  6 +++--
 src/BayesNet/CFS.cc           | 49 +++++++++++++++++++++++++++++++---
 src/BayesNet/CFS.h            |  1 +
 src/Platform/CMakeLists.txt   |  2 +-
 src/Platform/testx.cpp        | 50 +++++++++++++++++++++++++----------
 8 files changed, 123 insertions(+), 35 deletions(-)
 create mode 100644 .vscode/c_cpp_properties.json

diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
new file mode 100644
index 0000000..6faaf51
--- /dev/null
+++ b/.vscode/c_cpp_properties.json
@@ -0,0 +1,18 @@
+{
+    "configurations": [
+        {
+            "name": "Mac",
+            "includePath": [
+                "${workspaceFolder}/**"
+            ],
+            "defines": [],
+            "macFrameworkPath": [
+                "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks"
+            ],
+            "cStandard": "c17",
+            "cppStandard": "c++17",
+            "compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json"
+        }
+    ],
+    "version": 4
+}
\ No newline at end of file
diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesNet/BayesMetrics.cc
index 86de9ea..e98f41a 100644
--- a/src/BayesNet/BayesMetrics.cc
+++ b/src/BayesNet/BayesMetrics.cc
@@ -60,24 +60,13 @@ namespace bayesnet {
     {
         return scoresKBest;
     }
-    template <class T>
-    vector<pair<T, T>> Metrics::doCombinations(const vector<T>& source)
-    {
-        vector<pair<T, T>> result;
-        for (int i = 0; i < source.size(); ++i) {
-            T temp = source[i];
-            for (int j = i + 1; j < source.size(); ++j) {
-                result.push_back({ temp, source[j] });
-            }
-        }
-        return result;
-    }
+
     torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
     {
         auto result = vector<double>();
         auto source = vector<string>(features);
         source.push_back(className);
-        auto combinations = doCombinations<string>(source);
+        auto combinations = doCombinations(source);
         // Compute class prior
         auto margin = torch::zeros({ classNumStates }, torch::kFloat);
         for (int value = 0; value < classNumStates; ++value) {
@@ -123,6 +112,11 @@ namespace bayesnet {
         torch::Tensor counts = feature.bincount(weights);
         double totalWeight = counts.sum().item<double>();
         torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
+        // cout << "Probs: ";
+        // for (int i = 0; i < probs.size(0); ++i) {
+        //     cout << probs[i].item<double>() << ", ";
+        // }
+        // cout << endl;
         torch::Tensor logProbs = torch::log(probs);
         torch::Tensor entropy = -probs * logProbs;
         return entropy.nansum().item<double>();
diff --git a/src/BayesNet/BayesMetrics.h b/src/BayesNet/BayesMetrics.h
index 30606c0..341951e 100644
--- a/src/BayesNet/BayesMetrics.h
+++ b/src/BayesNet/BayesMetrics.h
@@ -18,7 +18,17 @@ namespace bayesnet {
         double entropy(const Tensor& feature, const Tensor& weights);
         vector<string> features;
         template <class T>
-        vector<pair<T, T>> doCombinations(const vector<T>& source);
+        vector<pair<T, T>> doCombinations(const vector<T>& source)
+        {
+            vector<pair<T, T>> result;
+            for (int i = 0; i < source.size(); ++i) {
+                T temp = source[i];
+                for (int j = i + 1; j < source.size(); ++j) {
+                    result.push_back({ temp, source[j] });
+                }
+            }
+            return result;
+        }
     public:
         Metrics() = default;
         Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc
index a9120a0..a95d6e2 100644
--- a/src/BayesNet/BoostAODE.cc
+++ b/src/BayesNet/BoostAODE.cc
@@ -2,11 +2,11 @@
 #include <functional>
 #include <limits.h>
 #include "BoostAODE.h"
-#include "BayesMetrics.h"
 #include "Colors.h"
 #include "Folding.h"
 #include "Paths.h"
 #include <openssl/evp.h>
+#include "CFS.h"
 
 namespace bayesnet {
     BoostAODE::BoostAODE() : Ensemble() {}
@@ -98,13 +98,15 @@ namespace bayesnet {
             }
         }
         output += "]";
+        Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
+        int maxFeatures = 0;
+        auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
         // std::size_t str_hash = std::hash<std::string>{}(output);
         string str_hash = sha256(output);
         stringstream oss;
         oss << platform::Paths::cfs() << str_hash << ".json";
         string name = oss.str();
         ifstream file(name);
-        Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
         if (file.is_open()) {
             nlohmann::json cfsFeatures = nlohmann::json::parse(file);
             file.close();
diff --git a/src/BayesNet/CFS.cc b/src/BayesNet/CFS.cc
index b3473cd..51e30dc 100644
--- a/src/BayesNet/CFS.cc
+++ b/src/BayesNet/CFS.cc
@@ -17,14 +17,22 @@ namespace bayesnet {
         */
         auto x = samples.index({ a, "..." });
         auto y = samples.index({ b, "..." });
-        return 2.0 * mutualInformation(y, x, weights) / (entropy(x, weights) + entropy(y, weights));
+        auto mu = mutualInformation(x, y, weights);
+        // cout << "Mutual Information: (" << a << ", " << b << ") = " << mu << endl;
+        auto hx = entropy(x, weights);
+        // cout << "Entropy X: " << hx << endl;
+        auto hy = entropy(y, weights);
+        // cout << "Entropy Y: " << hy << endl;
+        return 2.0 * mu / (hx + hy);
     }
     void CFS::computeSuLabels()
     {
         // Compute Simmetrical Uncertainty between features and labels
         // https://en.wikipedia.org/wiki/Symmetric_uncertainty
+        // cout << "SuLabels" << endl;
         for (int i = 0; i < features.size(); ++i) {
-            suLabels[i] = symmetricalUncertainty(i, -1);
+            suLabels.push_back(symmetricalUncertainty(i, -1));
+            // cout << i << " -> " << suLabels[i] << endl;
         }
 
     }
@@ -44,7 +52,7 @@ namespace bayesnet {
         }
         double rff = 0;
         int n = cfsFeatures.size();
-        for (const auto& item : doCombinations<int>(cfsFeatures)) {
+        for (const auto& item : doCombinations(cfsFeatures)) {
             rff += computeSuFeatures(item.first, item.second);
         }
         return rcf / sqrt(n + (n * n - n) * rff);
@@ -58,25 +66,58 @@ namespace bayesnet {
         auto feature = featureOrder[0];
         cfsFeatures.push_back(feature);
         cfsScores.push_back(suLabels[feature]);
+        cfsFeatures.erase(cfsFeatures.begin());
         while (continueCondition) {
             double merit = numeric_limits<double>::lowest();
             int bestFeature = -1;
             for (auto feature : featureOrder) {
                 cfsFeatures.push_back(feature);
                 auto meritNew = computeMerit(); // Compute merit with cfsFeatures
+                //cout << "MeritNew: " << meritNew << " Merit: " << merit << endl;
                 if (meritNew > merit) {
                     merit = meritNew;
                     bestFeature = feature;
                 }
                 cfsFeatures.pop_back();
             }
+            if (bestFeature == -1) {
+                throw runtime_error("Feature not found");
+            }
             cfsFeatures.push_back(bestFeature);
             cfsScores.push_back(merit);
-            featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), feature), featureOrder.end());
+            featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), bestFeature), featureOrder.end());
             continueCondition = computeContinueCondition(featureOrder);
         }
         fitted = true;
     }
+    void CFS::test()
+    {
+        cout << "H(y): " << entropy(samples.index({ -1, "..." }), weights) << endl;
+        cout << "y: ";
+        auto y = samples.index({ -1, "..." });
+        for (int i = 0; i < y.size(0); ++i) {
+            cout << y[i].item<double>() << ", ";
+        }
+        cout << endl;
+        computeSuLabels();
+        // cout << "Probabilites of features: " << endl;
+        // for (const auto& featureName : features) {
+        //     int featureIdx = find(features.begin(), features.end(), featureName) - features.begin();
+        //     cout << featureName << "(" << featureIdx << "): ";
+        //     auto feature = samples.index({ featureIdx, "..." });
+        //     torch::Tensor counts = feature.bincount(weights);
+        //     double totalWeight = counts.sum().item<double>();
+        //     torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
+        //     for (int i = 0; i < probs.size(0); ++i) {
+        //         cout << probs[i].item<double>() << ", ";
+        //     }
+        //     cout << endl;
+        //     // for (int i = 0; i < x.size(0); ++i) {
+        //     //     cout << x[i].item<double>() << ", ";
+        //     // }
+        //     // cout << endl;
+        // }
+    }
     bool CFS::computeContinueCondition(const vector<int>& featureOrder)
     {
         if (cfsFeatures.size() == maxFeatures || featureOrder.size() == 0) {
diff --git a/src/BayesNet/CFS.h b/src/BayesNet/CFS.h
index 1cf621d..556659a 100644
--- a/src/BayesNet/CFS.h
+++ b/src/BayesNet/CFS.h
@@ -11,6 +11,7 @@ namespace bayesnet {
         CFS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights);
         virtual ~CFS() {};
         void fit();
+        void test();
         vector<int> getFeatures() const;
         vector<double> getScores() const;
     private:
diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt
index 4111c34..75e846f 100644
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -9,7 +9,7 @@ add_executable(b_main main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Mo
 add_executable(b_manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
 add_executable(b_list list.cc Datasets.cc Dataset.cc)
 add_executable(b_best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ExcelFile.cc)
-add_executable(testx testx.cpp Datasets.cc Dataset.cc Folding.cc)
+add_executable(testx testx.cpp Datasets.cc Dataset.cc Folding.cc )
 target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
 if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
     target_link_libraries(b_manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
diff --git a/src/Platform/testx.cpp b/src/Platform/testx.cpp
index 43ab29c..c6b733e 100644
--- a/src/Platform/testx.cpp
+++ b/src/Platform/testx.cpp
@@ -7,6 +7,7 @@
 #include "Network.h"
 #include "ArffFiles.h"
 #include "CPPFImdlp.h"
+#include "CFS.h"
 
 using namespace std;
 using namespace platform;
@@ -191,22 +192,43 @@ int main()
     //     }
     //     cout << "***********************************************************************************************" << endl;
     // }
-    const string file_name = "iris";
-    auto net = bayesnet::Network();
+    // const string file_name = "iris";
+    // auto net = bayesnet::Network();
+    // auto dt = Datasets(true, "Arff");
+    // auto raw = RawDatasets("iris", true);
+    // auto [X, y] = dt.getVectors(file_name);
+    // cout << "Dataset dims " << raw.dataset.sizes() << endl;
+    // cout << "weights dims " << raw.weights.sizes() << endl;
+    // cout << "States dims " << raw.statest.size() << endl;
+    // cout << "features: ";
+    // for (const auto& feature : raw.featurest) {
+    //     cout << feature << ", ";
+    //     net.addNode(feature);
+    // }
+    // net.addNode(raw.classNamet);
+    // cout << endl;
+    // net.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest);
     auto dt = Datasets(true, "Arff");
-    auto raw = RawDatasets("iris", true);
-    auto [X, y] = dt.getVectors(file_name);
-    cout << "Dataset dims " << raw.dataset.sizes() << endl;
-    cout << "weights dims " << raw.weights.sizes() << endl;
-    cout << "States dims " << raw.statest.size() << endl;
-    cout << "features: ";
-    for (const auto& feature : raw.featurest) {
-        cout << feature << ", ";
-        net.addNode(feature);
+    for (const auto& name : dt.getNames()) {
+        //for (const auto& name : { "iris" }) {
+        auto [X, y] = dt.getTensors(name);
+        auto features = dt.getFeatures(name);
+        auto states = dt.getStates(name);
+        auto className = dt.getClassName(name);
+        int maxFeatures = 0;
+        auto classNumStates = states.at(className).size();
+        torch::Tensor weights = torch::full({ X.size(1) }, 1.0 / X.size(1), torch::kDouble);
+        auto dataset = X;
+        auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
+        dataset = torch::cat({ dataset, yresized }, 0);
+        auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, classNumStates, weights);
+        cfs.fit();
+        cout << "Dataset: " << name << " CFS features: ";
+        for (const auto& feature : cfs.getFeatures()) {
+            cout << feature << ", ";
+        }
+        cout << "end." << endl;
     }
-    net.addNode(raw.classNamet);
-    cout << endl;
-    net.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest);
 
 }
 
-- 
2.45.2


From 54b8939f35afac7b9f3779daaf429b3adce84336 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Fri, 13 Oct 2023 13:46:22 +0200
Subject: [PATCH 08/15] Prepare BoostAODE first try

---
 CMakeLists.txt               |  1 -
 src/BayesNet/BayesMetrics.cc |  5 ----
 src/BayesNet/BoostAODE.cc    | 52 +++++++-----------------------------
 src/BayesNet/CFS.cc          | 47 +++++++-------------------------
 src/BayesNet/CFS.h           |  1 +
 src/BayesNet/CMakeLists.txt  |  2 +-
 src/Platform/testx.cpp       |  4 +--
 7 files changed, 24 insertions(+), 88 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1f837ac..88d769f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,7 +45,6 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
 # CMakes modules
 # --------------
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
-find_package(OpenSSL REQUIRED)
 include(AddGitSubmodule)
 if (CODE_COVERAGE)
     enable_testing()
diff --git a/src/BayesNet/BayesMetrics.cc b/src/BayesNet/BayesMetrics.cc
index e98f41a..6bd3bbb 100644
--- a/src/BayesNet/BayesMetrics.cc
+++ b/src/BayesNet/BayesMetrics.cc
@@ -112,11 +112,6 @@ namespace bayesnet {
         torch::Tensor counts = feature.bincount(weights);
         double totalWeight = counts.sum().item<double>();
         torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
-        // cout << "Probs: ";
-        // for (int i = 0; i < probs.size(0); ++i) {
-        //     cout << probs[i].item<double>() << ", ";
-        // }
-        // cout << endl;
         torch::Tensor logProbs = torch::log(probs);
         torch::Tensor entropy = -probs * logProbs;
         return entropy.nansum().item<double>();
diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc
index a95d6e2..cee8a51 100644
--- a/src/BayesNet/BoostAODE.cc
+++ b/src/BayesNet/BoostAODE.cc
@@ -5,7 +5,6 @@
 #include "Colors.h"
 #include "Folding.h"
 #include "Paths.h"
-#include <openssl/evp.h>
 #include "CFS.h"
 
 namespace bayesnet {
@@ -63,27 +62,6 @@ namespace bayesnet {
             cfs = hyperparameters["cfs"];
         }
     }
-    string sha256(const string& input)
-    {
-        EVP_MD_CTX* mdctx;
-        const EVP_MD* md;
-        unsigned char hash[EVP_MAX_MD_SIZE];
-        unsigned int hash_len;
-
-        OpenSSL_add_all_digests();
-        md = EVP_get_digestbyname("sha256");
-        mdctx = EVP_MD_CTX_new();
-        EVP_DigestInit_ex(mdctx, md, nullptr);
-        EVP_DigestUpdate(mdctx, input.c_str(), input.size());
-        EVP_DigestFinal_ex(mdctx, hash, &hash_len);
-        EVP_MD_CTX_free(mdctx);
-        stringstream oss;
-        for (unsigned int i = 0; i < hash_len; i++) {
-            oss << hex << setfill('0') << setw(2) << (int)hash[i];
-        }
-        return oss.str();
-    }
-
     unordered_set<int> BoostAODE::initializeModels()
     {
         unordered_set<int> featuresUsed;
@@ -101,26 +79,16 @@ namespace bayesnet {
         Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
         int maxFeatures = 0;
         auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
-        // std::size_t str_hash = std::hash<std::string>{}(output);
-        string str_hash = sha256(output);
-        stringstream oss;
-        oss << platform::Paths::cfs() << str_hash << ".json";
-        string name = oss.str();
-        ifstream file(name);
-        if (file.is_open()) {
-            nlohmann::json cfsFeatures = nlohmann::json::parse(file);
-            file.close();
-            for (const int& feature : cfsFeatures) {
-                // cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl;
-                featuresUsed.insert(feature);
-                unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
-                model->fit(dataset, features, className, states, weights_);
-                models.push_back(std::move(model));
-                significanceModels.push_back(1.0);
-                n_models++;
-            }
-        } else {
-            throw runtime_error("File " + name + " not found");
+        cfs.fit();
+        auto cfsFeatures = cfs.getFeatures();
+        for (const int& feature : cfsFeatures) {
+            // cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl;
+            featuresUsed.insert(feature);
+            unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
+            model->fit(dataset, features, className, states, weights_);
+            models.push_back(std::move(model));
+            significanceModels.push_back(1.0);
+            n_models++;
         }
         return featuresUsed;
     }
diff --git a/src/BayesNet/CFS.cc b/src/BayesNet/CFS.cc
index 51e30dc..6b64220 100644
--- a/src/BayesNet/CFS.cc
+++ b/src/BayesNet/CFS.cc
@@ -18,21 +18,16 @@ namespace bayesnet {
         auto x = samples.index({ a, "..." });
         auto y = samples.index({ b, "..." });
         auto mu = mutualInformation(x, y, weights);
-        // cout << "Mutual Information: (" << a << ", " << b << ") = " << mu << endl;
         auto hx = entropy(x, weights);
-        // cout << "Entropy X: " << hx << endl;
         auto hy = entropy(y, weights);
-        // cout << "Entropy Y: " << hy << endl;
         return 2.0 * mu / (hx + hy);
     }
     void CFS::computeSuLabels()
     {
         // Compute Simmetrical Uncertainty between features and labels
         // https://en.wikipedia.org/wiki/Symmetric_uncertainty
-        // cout << "SuLabels" << endl;
         for (int i = 0; i < features.size(); ++i) {
             suLabels.push_back(symmetricalUncertainty(i, -1));
-            // cout << i << " -> " << suLabels[i] << endl;
         }
 
     }
@@ -40,8 +35,14 @@ namespace bayesnet {
     {
         // Compute Simmetrical Uncertainty between features
         // https://en.wikipedia.org/wiki/Symmetric_uncertainty
-        // TODO: Implement Cache in this function
-        return symmetricalUncertainty(firstFeature, secondFeature);
+        try {
+            return suFeatures.at({ firstFeature, secondFeature });
+        }
+        catch (const out_of_range& e) {
+            auto result = symmetricalUncertainty(firstFeature, secondFeature);
+            suFeatures[{firstFeature, secondFeature}] = result;
+            return result;
+        }
     }
     double CFS::computeMerit()
     {
@@ -73,7 +74,6 @@ namespace bayesnet {
             for (auto feature : featureOrder) {
                 cfsFeatures.push_back(feature);
                 auto meritNew = computeMerit(); // Compute merit with cfsFeatures
-                //cout << "MeritNew: " << meritNew << " Merit: " << merit << endl;
                 if (meritNew > merit) {
                     merit = meritNew;
                     bestFeature = feature;
@@ -81,7 +81,8 @@ namespace bayesnet {
                 cfsFeatures.pop_back();
             }
             if (bestFeature == -1) {
-                throw runtime_error("Feature not found");
+                // meritNew has to be nan due to constant features
+                break;
             }
             cfsFeatures.push_back(bestFeature);
             cfsScores.push_back(merit);
@@ -90,34 +91,6 @@ namespace bayesnet {
         }
         fitted = true;
     }
-    void CFS::test()
-    {
-        cout << "H(y): " << entropy(samples.index({ -1, "..." }), weights) << endl;
-        cout << "y: ";
-        auto y = samples.index({ -1, "..." });
-        for (int i = 0; i < y.size(0); ++i) {
-            cout << y[i].item<double>() << ", ";
-        }
-        cout << endl;
-        computeSuLabels();
-        // cout << "Probabilites of features: " << endl;
-        // for (const auto& featureName : features) {
-        //     int featureIdx = find(features.begin(), features.end(), featureName) - features.begin();
-        //     cout << featureName << "(" << featureIdx << "): ";
-        //     auto feature = samples.index({ featureIdx, "..." });
-        //     torch::Tensor counts = feature.bincount(weights);
-        //     double totalWeight = counts.sum().item<double>();
-        //     torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
-        //     for (int i = 0; i < probs.size(0); ++i) {
-        //         cout << probs[i].item<double>() << ", ";
-        //     }
-        //     cout << endl;
-        //     // for (int i = 0; i < x.size(0); ++i) {
-        //     //     cout << x[i].item<double>() << ", ";
-        //     // }
-        //     // cout << endl;
-        // }
-    }
     bool CFS::computeContinueCondition(const vector<int>& featureOrder)
     {
         if (cfsFeatures.size() == maxFeatures || featureOrder.size() == 0) {
diff --git a/src/BayesNet/CFS.h b/src/BayesNet/CFS.h
index 556659a..eff5da6 100644
--- a/src/BayesNet/CFS.h
+++ b/src/BayesNet/CFS.h
@@ -26,6 +26,7 @@ namespace bayesnet {
         vector<int> cfsFeatures;
         vector<double> cfsScores;
         vector<double> suLabels;
+        map<pair<int, int>, double> suFeatures;
         bool fitted = false;
     };
 }
diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt
index e22827e..27a2d3a 100644
--- a/src/BayesNet/CMakeLists.txt
+++ b/src/BayesNet/CMakeLists.txt
@@ -6,4 +6,4 @@ include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
 add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc 
     KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc 
     Mst.cc Proposal.cc CFS.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
-target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}" OpenSSL::Crypto)
\ No newline at end of file
+target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
\ No newline at end of file
diff --git a/src/Platform/testx.cpp b/src/Platform/testx.cpp
index c6b733e..1ab1d83 100644
--- a/src/Platform/testx.cpp
+++ b/src/Platform/testx.cpp
@@ -210,7 +210,7 @@ int main()
     // net.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest);
     auto dt = Datasets(true, "Arff");
     for (const auto& name : dt.getNames()) {
-        //for (const auto& name : { "iris" }) {
+        // for (const auto& name : { "iris" }) {
         auto [X, y] = dt.getTensors(name);
         auto features = dt.getFeatures(name);
         auto states = dt.getStates(name);
@@ -222,8 +222,8 @@ int main()
         auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
         dataset = torch::cat({ dataset, yresized }, 0);
         auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, classNumStates, weights);
+        cout << "Dataset: " << name << " CFS features: " << flush;
         cfs.fit();
-        cout << "Dataset: " << name << " CFS features: ";
         for (const auto& feature : cfs.getFeatures()) {
             cout << feature << ", ";
         }
-- 
2.45.2


From 977ff6fddbd02dc669bc697bd5829ab31d7cde90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Fri, 13 Oct 2023 14:01:52 +0200
Subject: [PATCH 09/15] Update CMakeLists for Linux

---
 src/Platform/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt
index 75e846f..3e52c59 100644
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -11,7 +11,7 @@ add_executable(b_list list.cc Datasets.cc Dataset.cc)
 add_executable(b_best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ExcelFile.cc)
 add_executable(testx testx.cpp Datasets.cc Dataset.cc Folding.cc )
 target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
-if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
+if ( CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux")
     target_link_libraries(b_manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
     target_link_libraries(b_best Boost::boost libxlsxwriter.so stdc++fs)
 else()
-- 
2.45.2


From d00b08cbe8b91ac1880608a1cf8c4747473785fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Fri, 13 Oct 2023 14:26:47 +0200
Subject: [PATCH 10/15] Fix Header for Linux

---
 src/Platform/DotEnv.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Platform/DotEnv.h b/src/Platform/DotEnv.h
index 87ec50e..7d5ee2b 100644
--- a/src/Platform/DotEnv.h
+++ b/src/Platform/DotEnv.h
@@ -4,6 +4,7 @@
 #include <map>
 #include <fstream>
 #include <sstream>
+#include <algorithm>
 #include <iostream>
 #include "Utils.h"
 
-- 
2.45.2


From 6d5a25cdc8c9d44b1b13c7c155f15cf029f9a55c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Sat, 14 Oct 2023 11:27:46 +0200
Subject: [PATCH 11/15] Refactor CFS class  creating abstract base class

---
 src/BayesNet/CFS.cc           | 94 ++++++-----------------------------
 src/BayesNet/CFS.h            | 26 +++-------
 src/BayesNet/CMakeLists.txt   |  2 +-
 src/BayesNet/FeatureSelect.cc | 74 +++++++++++++++++++++++++++
 src/BayesNet/FeatureSelect.h  | 31 ++++++++++++
 5 files changed, 127 insertions(+), 100 deletions(-)
 create mode 100644 src/BayesNet/FeatureSelect.cc
 create mode 100644 src/BayesNet/FeatureSelect.h

diff --git a/src/BayesNet/CFS.cc b/src/BayesNet/CFS.cc
index 6b64220..50c0ea8 100644
--- a/src/BayesNet/CFS.cc
+++ b/src/BayesNet/CFS.cc
@@ -2,90 +2,38 @@
 #include <limits>
 #include "bayesnetUtils.h"
 namespace bayesnet {
-    CFS::CFS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
-        Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
 
-    {
-    }
-    double CFS::symmetricalUncertainty(int a, int b)
-    {
-        /*
-        Compute symmetrical uncertainty. Normalize* information gain (mutual
-        information) with the entropies of the features in order to compensate
-        the bias due to high cardinality features. *Range [0, 1]
-        (https://www.sciencedirect.com/science/article/pii/S0020025519303603)
-        */
-        auto x = samples.index({ a, "..." });
-        auto y = samples.index({ b, "..." });
-        auto mu = mutualInformation(x, y, weights);
-        auto hx = entropy(x, weights);
-        auto hy = entropy(y, weights);
-        return 2.0 * mu / (hx + hy);
-    }
-    void CFS::computeSuLabels()
-    {
-        // Compute Simmetrical Uncertainty between features and labels
-        // https://en.wikipedia.org/wiki/Symmetric_uncertainty
-        for (int i = 0; i < features.size(); ++i) {
-            suLabels.push_back(symmetricalUncertainty(i, -1));
-        }
 
-    }
-    double CFS::computeSuFeatures(const int firstFeature, const int secondFeature)
-    {
-        // Compute Simmetrical Uncertainty between features
-        // https://en.wikipedia.org/wiki/Symmetric_uncertainty
-        try {
-            return suFeatures.at({ firstFeature, secondFeature });
-        }
-        catch (const out_of_range& e) {
-            auto result = symmetricalUncertainty(firstFeature, secondFeature);
-            suFeatures[{firstFeature, secondFeature}] = result;
-            return result;
-        }
-    }
-    double CFS::computeMerit()
-    {
-        double result;
-        double rcf = 0;
-        for (auto feature : cfsFeatures) {
-            rcf += suLabels[feature];
-        }
-        double rff = 0;
-        int n = cfsFeatures.size();
-        for (const auto& item : doCombinations(cfsFeatures)) {
-            rff += computeSuFeatures(item.first, item.second);
-        }
-        return rcf / sqrt(n + (n * n - n) * rff);
-    }
+
+
     void CFS::fit()
     {
-        cfsFeatures.clear();
+        selectedFeatures.clear();
         computeSuLabels();
         auto featureOrder = argsort(suLabels); // sort descending order
         auto continueCondition = true;
         auto feature = featureOrder[0];
-        cfsFeatures.push_back(feature);
-        cfsScores.push_back(suLabels[feature]);
-        cfsFeatures.erase(cfsFeatures.begin());
+        selectedFeatures.push_back(feature);
+        selectedScores.push_back(suLabels[feature]);
+        selectedFeatures.erase(selectedFeatures.begin());
         while (continueCondition) {
             double merit = numeric_limits<double>::lowest();
             int bestFeature = -1;
             for (auto feature : featureOrder) {
-                cfsFeatures.push_back(feature);
-                auto meritNew = computeMerit(); // Compute merit with cfsFeatures
+                selectedFeatures.push_back(feature);
+                auto meritNew = computeMeritCFS(); // Compute merit with cfsFeatures
                 if (meritNew > merit) {
                     merit = meritNew;
                     bestFeature = feature;
                 }
-                cfsFeatures.pop_back();
+                selectedFeatures.pop_back();
             }
             if (bestFeature == -1) {
                 // meritNew has to be nan due to constant features
                 break;
             }
-            cfsFeatures.push_back(bestFeature);
-            cfsScores.push_back(merit);
+            selectedFeatures.push_back(bestFeature);
+            selectedScores.push_back(merit);
             featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), bestFeature), featureOrder.end());
             continueCondition = computeContinueCondition(featureOrder);
         }
@@ -93,10 +41,10 @@ namespace bayesnet {
     }
     bool CFS::computeContinueCondition(const vector<int>& featureOrder)
     {
-        if (cfsFeatures.size() == maxFeatures || featureOrder.size() == 0) {
+        if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) {
             return false;
         }
-        if (cfsScores.size() >= 5) {
+        if (selectedScores.size() >= 5) {
             /*
             "To prevent the best first search from exploring the entire
             feature subset search space, a stopping criterion is imposed.
@@ -106,7 +54,7 @@ namespace bayesnet {
             */
             double item_ant = numeric_limits<double>::lowest();
             int num = 0;
-            vector<double> lastFive(cfsScores.end() - 5, cfsScores.end());
+            vector<double> lastFive(selectedScores.end() - 5, selectedScores.end());
             for (auto item : lastFive) {
                 if (item_ant == numeric_limits<double>::lowest()) {
                     item_ant = item;
@@ -124,18 +72,4 @@ namespace bayesnet {
         }
         return true;
     }
-    vector<int> CFS::getFeatures() const
-    {
-        if (!fitted) {
-            throw runtime_error("CFS not fitted");
-        }
-        return cfsFeatures;
-    }
-    vector<double> CFS::getScores() const
-    {
-        if (!fitted) {
-            throw runtime_error("CFS not fitted");
-        }
-        return cfsScores;
-    }
 }
\ No newline at end of file
diff --git a/src/BayesNet/CFS.h b/src/BayesNet/CFS.h
index eff5da6..36b7c52 100644
--- a/src/BayesNet/CFS.h
+++ b/src/BayesNet/CFS.h
@@ -2,32 +2,20 @@
 #define CFS_H
 #include <torch/torch.h>
 #include <vector>
-#include "BayesMetrics.h"
+#include "FeatureSelect.h"
 using namespace std;
 namespace bayesnet {
-    class CFS : public Metrics {
+    class CFS : public FeatureSelect {
     public:
         // dataset is a n+1xm tensor of integers where dataset[-1] is the y vector
-        CFS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights);
+        CFS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
+            FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights)
+        {
+        }
         virtual ~CFS() {};
-        void fit();
-        void test();
-        vector<int> getFeatures() const;
-        vector<double> getScores() const;
+        void fit() override;
     private:
-        void computeSuLabels();
-        double computeSuFeatures(const int a, const int b);
-        double symmetricalUncertainty(int a, int b);
-        double computeMerit();
         bool computeContinueCondition(const vector<int>& featureOrder);
-        vector<pair<int, int>> combinations(const vector<int>& features);
-        const torch::Tensor& weights;
-        int maxFeatures;
-        vector<int> cfsFeatures;
-        vector<double> cfsScores;
-        vector<double> suLabels;
-        map<pair<int, int>, double> suFeatures;
-        bool fitted = false;
     };
 }
 #endif
\ No newline at end of file
diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt
index 27a2d3a..c9543ea 100644
--- a/src/BayesNet/CMakeLists.txt
+++ b/src/BayesNet/CMakeLists.txt
@@ -5,5 +5,5 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
 include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
 add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc 
     KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc 
-    Mst.cc Proposal.cc CFS.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
+    Mst.cc Proposal.cc CFS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
 target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
\ No newline at end of file
diff --git a/src/BayesNet/FeatureSelect.cc b/src/BayesNet/FeatureSelect.cc
new file mode 100644
index 0000000..4eb45fe
--- /dev/null
+++ b/src/BayesNet/FeatureSelect.cc
@@ -0,0 +1,74 @@
+#include "FeatureSelect.h"
+#include <limits>
+#include "bayesnetUtils.h"
+namespace bayesnet {
+    FeatureSelect::FeatureSelect(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
+        Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
+
+    {
+    }
+    double FeatureSelect::symmetricalUncertainty(int a, int b)
+    {
+        /*
+        Compute symmetrical uncertainty. Normalize* information gain (mutual
+        information) with the entropies of the features in order to compensate
+        the bias due to high cardinality features. *Range [0, 1]
+        (https://www.sciencedirect.com/science/article/pii/S0020025519303603)
+        */
+        auto x = samples.index({ a, "..." });
+        auto y = samples.index({ b, "..." });
+        auto mu = mutualInformation(x, y, weights);
+        auto hx = entropy(x, weights);
+        auto hy = entropy(y, weights);
+        return 2.0 * mu / (hx + hy);
+    }
+    void FeatureSelect::computeSuLabels()
+    {
+        // Compute Simmetrical Uncertainty between features and labels
+        // https://en.wikipedia.org/wiki/Symmetric_uncertainty
+        for (int i = 0; i < features.size(); ++i) {
+            suLabels.push_back(symmetricalUncertainty(i, -1));
+        }
+    }
+    double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature)
+    {
+        // Compute Simmetrical Uncertainty between features
+        // https://en.wikipedia.org/wiki/Symmetric_uncertainty
+        try {
+            return suFeatures.at({ firstFeature, secondFeature });
+        }
+        catch (const out_of_range& e) {
+            double result = symmetricalUncertainty(firstFeature, secondFeature);
+            suFeatures[{firstFeature, secondFeature}] = result;
+            return result;
+        }
+    }
+    double FeatureSelect::computeMeritCFS()
+    {
+        double result;
+        double rcf = 0;
+        for (auto feature : selectedFeatures) {
+            rcf += suLabels[feature];
+        }
+        double rff = 0;
+        int n = selectedFeatures.size();
+        for (const auto& item : doCombinations(selectedFeatures)) {
+            rff += computeSuFeatures(item.first, item.second);
+        }
+        return rcf / sqrt(n + (n * n - n) * rff);
+    }
+    vector<int> FeatureSelect::getFeatures() const
+    {
+        if (!fitted) {
+            throw runtime_error("FeatureSelect not fitted");
+        }
+        return selectedFeatures;
+    }
+    vector<double> FeatureSelect::getScores() const
+    {
+        if (!fitted) {
+            throw runtime_error("FeatureSelect not fitted");
+        }
+        return selectedScores;
+    }
+}
\ No newline at end of file
diff --git a/src/BayesNet/FeatureSelect.h b/src/BayesNet/FeatureSelect.h
new file mode 100644
index 0000000..c1e280c
--- /dev/null
+++ b/src/BayesNet/FeatureSelect.h
@@ -0,0 +1,31 @@
+#ifndef FEATURE_SELECT_H
+#define FEATURE_SELECT_H
+#include <torch/torch.h>
+#include <vector>
+#include "BayesMetrics.h"
+using namespace std;
+namespace bayesnet {
+    class FeatureSelect : public Metrics {
+    public:
+        // dataset is a n+1xm tensor of integers where dataset[-1] is the y vector
+        FeatureSelect(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights);
+        virtual ~FeatureSelect() {};
+        virtual void fit() = 0;
+        vector<int> getFeatures() const;
+        vector<double> getScores() const;
+    protected:
+        void computeSuLabels();
+        double computeSuFeatures(const int a, const int b);
+        double symmetricalUncertainty(int a, int b);
+        double computeMeritCFS();
+        vector<pair<int, int>> combinations(const vector<int>& features);
+        const torch::Tensor& weights;
+        int maxFeatures;
+        vector<int> selectedFeatures;
+        vector<double> selectedScores;
+        vector<double> suLabels;
+        map<pair<int, int>, double> suFeatures;
+        bool fitted = false;
+    };
+}
+#endif
\ No newline at end of file
-- 
2.45.2


From 6ef49385ea3eb7389979ff97e8587eb45581b7d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Sat, 14 Oct 2023 11:30:32 +0200
Subject: [PATCH 12/15] Remove unneeded method declaration  FeatureSelect

---
 src/BayesNet/FeatureSelect.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/BayesNet/FeatureSelect.h b/src/BayesNet/FeatureSelect.h
index c1e280c..c342468 100644
--- a/src/BayesNet/FeatureSelect.h
+++ b/src/BayesNet/FeatureSelect.h
@@ -18,7 +18,6 @@ namespace bayesnet {
         double computeSuFeatures(const int a, const int b);
         double symmetricalUncertainty(int a, int b);
         double computeMeritCFS();
-        vector<pair<int, int>> combinations(const vector<int>& features);
         const torch::Tensor& weights;
         int maxFeatures;
         vector<int> selectedFeatures;
-- 
2.45.2


From b35532dd9e15862fd2fcab1c50894ce379fdc968 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Sat, 14 Oct 2023 13:12:04 +0200
Subject: [PATCH 13/15] Implement IWSS and FCBF too for BoostAODE

---
 src/BayesNet/BayesMetrics.h   |  7 +++++
 src/BayesNet/BoostAODE.cc     | 49 ++++++++++++++++++++++-------------
 src/BayesNet/BoostAODE.h      |  6 ++++-
 src/BayesNet/CFS.cc           |  9 +++----
 src/BayesNet/CMakeLists.txt   |  2 +-
 src/BayesNet/FCBF.cc          | 44 +++++++++++++++++++++++++++++++
 src/BayesNet/FCBF.h           | 18 +++++++++++++
 src/BayesNet/FeatureSelect.cc |  5 ++++
 src/BayesNet/FeatureSelect.h  |  1 +
 src/BayesNet/IWSS.cc          | 47 +++++++++++++++++++++++++++++++++
 src/BayesNet/IWSS.h           | 18 +++++++++++++
 11 files changed, 180 insertions(+), 26 deletions(-)
 create mode 100644 src/BayesNet/FCBF.cc
 create mode 100644 src/BayesNet/FCBF.h
 create mode 100644 src/BayesNet/IWSS.cc
 create mode 100644 src/BayesNet/IWSS.h

diff --git a/src/BayesNet/BayesMetrics.h b/src/BayesNet/BayesMetrics.h
index 341951e..66016a6 100644
--- a/src/BayesNet/BayesMetrics.h
+++ b/src/BayesNet/BayesMetrics.h
@@ -29,6 +29,13 @@ namespace bayesnet {
             }
             return result;
         }
+        template <class T>
+        T pop_first(vector<T>& v)
+        {
+            T temp = v[0];
+            v.erase(v.begin());
+            return temp;
+        }
     public:
         Metrics() = default;
         Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc
index cee8a51..fb38a7c 100644
--- a/src/BayesNet/BoostAODE.cc
+++ b/src/BayesNet/BoostAODE.cc
@@ -6,6 +6,8 @@
 #include "Folding.h"
 #include "Paths.h"
 #include "CFS.h"
+#include "FCBF.h"
+#include "IWSS.h"
 
 namespace bayesnet {
     BoostAODE::BoostAODE() : Ensemble() {}
@@ -44,7 +46,7 @@ namespace bayesnet {
     void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
     {
         // Check if hyperparameters are valid
-        const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "cfs" };
+        const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" };
         checkHyperparameters(validKeys, hyperparameters);
         if (hyperparameters.contains("repeatSparent")) {
             repeatSparent = hyperparameters["repeatSparent"];
@@ -58,29 +60,39 @@ namespace bayesnet {
         if (hyperparameters.contains("convergence")) {
             convergence = hyperparameters["convergence"];
         }
-        if (hyperparameters.contains("cfs")) {
-            cfs = hyperparameters["cfs"];
+        if (hyperparameters.contains("threshold")) {
+            threshold = hyperparameters["threshold"];
+        }
+        if (hyperparameters.contains("select_features")) {
+            auto selectedAlgorithm = hyperparameters["select_features"];
+            vector<string> algos = { "IWSS", "FCBF", "CFS" };
+            selectFeatures = true;
+            algorithm = selectedAlgorithm;
+            if (find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
+                throw invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]");
+            }
         }
     }
     unordered_set<int> BoostAODE::initializeModels()
     {
         unordered_set<int> featuresUsed;
-        // Read the CFS features
-        string output = "[", prefix = "";
-        bool first = true;
-        for (const auto& feature : features) {
-            output += prefix + "'" + feature + "'";
-            if (first) {
-                prefix = ", ";
-                first = false;
-            }
-        }
-        output += "]";
         Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
         int maxFeatures = 0;
-        auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
-        cfs.fit();
-        auto cfsFeatures = cfs.getFeatures();
+        if (algorithm == "CFS") {
+            featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
+        } else if (algorithm == "IWSS") {
+            if (threshold < 0 || threshold >0.5) {
+                throw invalid_argument("Invalid threshold value for IWSS [0, 0.5]");
+            }
+            featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
+        } else if (algorithm == "FCBF") {
+            if (threshold < 1e-7 || threshold > 1) {
+                throw invalid_argument("Invalid threshold value [1e-7, 1]");
+            }
+            featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
+        }
+        featureSelector->fit();
+        auto cfsFeatures = featureSelector->getFeatures();
         for (const int& feature : cfsFeatures) {
             // cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << endl;
             featuresUsed.insert(feature);
@@ -90,12 +102,13 @@ namespace bayesnet {
             significanceModels.push_back(1.0);
             n_models++;
         }
+        delete featureSelector;
         return featuresUsed;
     }
     void BoostAODE::trainModel(const torch::Tensor& weights)
     {
         unordered_set<int> featuresUsed;
-        if (cfs) {
+        if (selectFeatures) {
             featuresUsed = initializeModels();
         }
         if (maxModels == 0)
diff --git a/src/BayesNet/BoostAODE.h b/src/BayesNet/BoostAODE.h
index fb87fce..dd1cf75 100644
--- a/src/BayesNet/BoostAODE.h
+++ b/src/BayesNet/BoostAODE.h
@@ -3,6 +3,7 @@
 #include "Ensemble.h"
 #include <map>
 #include "SPODE.h"
+#include "FeatureSelect.h"
 namespace bayesnet {
     class BoostAODE : public Ensemble {
     public:
@@ -22,7 +23,10 @@ namespace bayesnet {
         int maxModels = 0;
         bool ascending = false; //Process KBest features ascending or descending order
         bool convergence = false; //if true, stop when the model does not improve
-        bool cfs = false; // if true use CFS to select features stored in cfs folder with sha256(features) file_name
+        bool selectFeatures = false; // if true, use feature selection
+        string algorithm = ""; // Selected feature selection algorithm
+        FeatureSelect* featureSelector = nullptr;
+        double threshold = -1;
     };
 }
 #endif
\ No newline at end of file
diff --git a/src/BayesNet/CFS.cc b/src/BayesNet/CFS.cc
index 50c0ea8..f2ffc1e 100644
--- a/src/BayesNet/CFS.cc
+++ b/src/BayesNet/CFS.cc
@@ -2,13 +2,9 @@
 #include <limits>
 #include "bayesnetUtils.h"
 namespace bayesnet {
-
-
-
-
     void CFS::fit()
     {
-        selectedFeatures.clear();
+        initialize();
         computeSuLabels();
         auto featureOrder = argsort(suLabels); // sort descending order
         auto continueCondition = true;
@@ -21,7 +17,8 @@ namespace bayesnet {
             int bestFeature = -1;
             for (auto feature : featureOrder) {
                 selectedFeatures.push_back(feature);
-                auto meritNew = computeMeritCFS(); // Compute merit with cfsFeatures
+                // Compute merit with selectedFeatures
+                auto meritNew = computeMeritCFS();
                 if (meritNew > merit) {
                     merit = meritNew;
                     bestFeature = feature;
diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt
index c9543ea..cc0f5a5 100644
--- a/src/BayesNet/CMakeLists.txt
+++ b/src/BayesNet/CMakeLists.txt
@@ -5,5 +5,5 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
 include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
 add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc 
     KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc 
-    Mst.cc Proposal.cc CFS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
+    Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
 target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
\ No newline at end of file
diff --git a/src/BayesNet/FCBF.cc b/src/BayesNet/FCBF.cc
new file mode 100644
index 0000000..db935af
--- /dev/null
+++ b/src/BayesNet/FCBF.cc
@@ -0,0 +1,44 @@
+#include "bayesnetUtils.h"
+#include "FCBF.h"
+namespace bayesnet {
+
+    FCBF::FCBF(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
+        FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
+    {
+        if (threshold < 1e-7) {
+            throw std::invalid_argument("Threshold cannot be less than 1e-7");
+        }
+    }
+    void FCBF::fit()
+    {
+        initialize();
+        computeSuLabels();
+        auto featureOrder = argsort(suLabels); // sort descending order
+        auto featureOrderCopy = featureOrder;
+        for (const auto& feature : featureOrder) {
+            // Don't self compare
+            featureOrderCopy.erase(featureOrderCopy.begin());
+            if (suLabels.at(feature) == 0.0) {
+                // The feature has been removed from the list
+                continue;
+            }
+            if (suLabels.at(feature) < threshold) {
+                break;
+            }
+            // Remove redundant features
+            for (const auto& featureCopy : featureOrderCopy) {
+                double value = computeSuFeatures(feature, featureCopy);
+                if (value >= suLabels.at(featureCopy)) {
+                    // Remove feature from list
+                    suLabels[featureCopy] = 0.0;
+                }
+            }
+            selectedFeatures.push_back(feature);
+            selectedScores.push_back(suLabels[feature]);
+            if (selectedFeatures.size() == maxFeatures) {
+                break;
+            }
+        }
+        fitted = true;
+    }
+}
\ No newline at end of file
diff --git a/src/BayesNet/FCBF.h b/src/BayesNet/FCBF.h
new file mode 100644
index 0000000..aa7ff47
--- /dev/null
+++ b/src/BayesNet/FCBF.h
@@ -0,0 +1,18 @@
+#ifndef FCBF_H
+#define FCBF_H
+#include <torch/torch.h>
+#include <vector>
+#include "FeatureSelect.h"
+using namespace std;
+namespace bayesnet {
+    class FCBF : public FeatureSelect {
+    public:
+        // dataset is a n+1xm tensor of integers where dataset[-1] is the y vector
+        FCBF(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
+        virtual ~FCBF() {};
+        void fit() override;
+    private:
+        double threshold = -1;
+    };
+}
+#endif
\ No newline at end of file
diff --git a/src/BayesNet/FeatureSelect.cc b/src/BayesNet/FeatureSelect.cc
index 4eb45fe..11d929b 100644
--- a/src/BayesNet/FeatureSelect.cc
+++ b/src/BayesNet/FeatureSelect.cc
@@ -7,6 +7,11 @@ namespace bayesnet {
 
     {
     }
+    void FeatureSelect::initialize()
+    {
+        selectedFeatures.clear();
+        selectedScores.clear();
+    }
     double FeatureSelect::symmetricalUncertainty(int a, int b)
     {
         /*
diff --git a/src/BayesNet/FeatureSelect.h b/src/BayesNet/FeatureSelect.h
index c342468..46923c9 100644
--- a/src/BayesNet/FeatureSelect.h
+++ b/src/BayesNet/FeatureSelect.h
@@ -14,6 +14,7 @@ namespace bayesnet {
         vector<int> getFeatures() const;
         vector<double> getScores() const;
     protected:
+        void initialize();
         void computeSuLabels();
         double computeSuFeatures(const int a, const int b);
         double symmetricalUncertainty(int a, int b);
diff --git a/src/BayesNet/IWSS.cc b/src/BayesNet/IWSS.cc
new file mode 100644
index 0000000..f39f137
--- /dev/null
+++ b/src/BayesNet/IWSS.cc
@@ -0,0 +1,47 @@
+#include "IWSS.h"
+#include <limits>
+#include "bayesnetUtils.h"
+namespace bayesnet {
+    IWSS::IWSS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
+        FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
+    {
+        if (threshold < 0 || threshold > .5) {
+            throw std::invalid_argument("Threshold has to be in [0, 0.5]");
+        }
+    }
+    void IWSS::fit()
+    {
+        initialize();
+        computeSuLabels();
+        auto featureOrder = argsort(suLabels); // sort descending order
+        auto featureOrderCopy = featureOrder;
+        // Add first and second features to result
+        //     First with its own score
+        auto first_feature = pop_first(featureOrderCopy);
+        selectedFeatures.push_back(first_feature);
+        selectedScores.push_back(suLabels.at(first_feature));
+        //     Second with the score of the candidates
+        selectedFeatures.push_back(pop_first(featureOrderCopy));
+        auto merit = computeMeritCFS();
+        selectedScores.push_back(merit);
+        for (const auto feature : featureOrderCopy) {
+            selectedFeatures.push_back(feature);
+            // Compute merit with selectedFeatures
+            auto meritNew = computeMeritCFS();
+            double delta = merit != 0.0 ? abs(merit - meritNew) / merit : 0.0;
+            if (meritNew > merit || delta < threshold) {
+                if (meritNew > merit) {
+                    merit = meritNew;
+                }
+                selectedScores.push_back(meritNew);
+            } else {
+                selectedFeatures.pop_back();
+                break;
+            }
+            if (selectedFeatures.size() == maxFeatures) {
+                break;
+            }
+        }
+        fitted = true;
+    }
+}
\ No newline at end of file
diff --git a/src/BayesNet/IWSS.h b/src/BayesNet/IWSS.h
new file mode 100644
index 0000000..88a1034
--- /dev/null
+++ b/src/BayesNet/IWSS.h
@@ -0,0 +1,18 @@
+#ifndef IWSS_H
+#define IWSS_H
+#include <torch/torch.h>
+#include <vector>
+#include "FeatureSelect.h"
+using namespace std;
+namespace bayesnet {
+    class IWSS : public FeatureSelect {
+    public:
+        // dataset is a n+1xm tensor of integers where dataset[-1] is the y vector
+        IWSS(const torch::Tensor& samples, const vector<string>& features, const string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
+        virtual ~IWSS() {};
+        void fit() override;
+    private:
+        double threshold = -1;
+    };
+}
+#endif
\ No newline at end of file
-- 
2.45.2


From 660e78351761b7398c72d1848b384df5489638d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= <rmontanana@gmail.com>
Date: Sat, 14 Oct 2023 13:32:09 +0200
Subject: [PATCH 14/15] Update validation for feature selection

---
 src/Platform/CMakeLists.txt |  2 +-
 src/Platform/testx.cpp      | 24 +++++++++++++++++++-----
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt
index 3e52c59..26584e7 100644
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -19,4 +19,4 @@ else()
     target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}")
 endif()
 target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}")
-target_link_libraries(testx ArffFiles mdlp BayesNet "${TORCH_LIBRARIES}")
\ No newline at end of file
+target_link_libraries(testx ArffFiles BayesNet "${TORCH_LIBRARIES}")
\ No newline at end of file
diff --git a/src/Platform/testx.cpp b/src/Platform/testx.cpp
index 1ab1d83..dfd6a21 100644
--- a/src/Platform/testx.cpp
+++ b/src/Platform/testx.cpp
@@ -1,5 +1,6 @@
 #include "Folding.h"
 #include <torch/torch.h>
+#include "nlohmann/json.hpp"
 #include "map"
 #include <iostream>
 #include <sstream>
@@ -8,6 +9,8 @@
 #include "ArffFiles.h"
 #include "CPPFImdlp.h"
 #include "CFS.h"
+#include "IWSS.h"
+#include "FCBF.h"
 
 using namespace std;
 using namespace platform;
@@ -209,6 +212,7 @@ int main()
     // cout << endl;
     // net.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest);
     auto dt = Datasets(true, "Arff");
+    nlohmann::json output;
     for (const auto& name : dt.getNames()) {
         // for (const auto& name : { "iris" }) {
         auto [X, y] = dt.getTensors(name);
@@ -222,13 +226,23 @@ int main()
         auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
         dataset = torch::cat({ dataset, yresized }, 0);
         auto cfs = bayesnet::CFS(dataset, features, className, maxFeatures, classNumStates, weights);
-        cout << "Dataset: " << name << " CFS features: " << flush;
+        auto fcbf = bayesnet::FCBF(dataset, features, className, maxFeatures, classNumStates, weights, 1e-7);
+        auto iwss = bayesnet::IWSS(dataset, features, className, maxFeatures, classNumStates, weights, 0.5);
+        cout << "Dataset: " << setw(20) << name << flush;
         cfs.fit();
-        for (const auto& feature : cfs.getFeatures()) {
-            cout << feature << ", ";
-        }
-        cout << "end." << endl;
+        cout << " CFS: " << setw(4) << cfs.getFeatures().size() << flush;
+        fcbf.fit();
+        cout << " FCBF: " << setw(4) << fcbf.getFeatures().size() << flush;
+        iwss.fit();
+        cout << " IWSS: " << setw(4) << iwss.getFeatures().size() << flush;
+        cout << endl;
+        output[name]["CFS"] = cfs.getFeatures();
+        output[name]["FCBF"] = fcbf.getFeatures();
+        output[name]["IWSS"] = iwss.getFeatures();
     }
+    ofstream file("features_cpp.json");
+    file << output;
+    file.close();
 
 }
 
-- 
2.45.2


From fa7fe081ad405d917a121154328ce8744f2fa568 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?=
 <rmontanana@gmail.com>
Date: Sun, 15 Oct 2023 11:19:58 +0200
Subject: [PATCH 15/15] Fix xlsx library finding

---
 CMakeLists.txt              | 5 ++++-
 README.md                   | 4 +---
 src/Platform/CMakeLists.txt | 9 ++-------
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 88d769f..0a4515f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,7 +65,10 @@ add_git_submodule("lib/mdlp")
 add_git_submodule("lib/argparse")
 add_git_submodule("lib/json")
 
-find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib ${HOME}/lib/usr/local/lib)
+
+find_library(XLSXWRITER_LIB NAMES libxlsxwriter.dylib libxlsxwriter.so PATHS ${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/lib)
+message("XLSXWRITER_LIB=${XLSXWRITER_LIB}")
+
 
 # Subdirectories
 # --------------
diff --git a/README.md b/README.md
index 426be8d..ad2660c 100644
--- a/README.md
+++ b/README.md
@@ -27,11 +27,9 @@ export BOOST_ROOT=/path/to/library/
 ```bash
 cd lib/libxlsxwriter
 make
-sudo make install
+make install DESTDIR=/home/rmontanana/Code PREFIX=
 ```
 
-It has to be installed in /usr/local/lib otherwise CMakeLists.txt has to be modified accordingly
-
 Environment variable has to be set:
 
 ```bash
diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt
index 26584e7..3a565e1 100644
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -11,12 +11,7 @@ add_executable(b_list list.cc Datasets.cc Dataset.cc)
 add_executable(b_best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ExcelFile.cc)
 add_executable(testx testx.cpp Datasets.cc Dataset.cc Folding.cc )
 target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
-if ( CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux")
-    target_link_libraries(b_manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
-    target_link_libraries(b_best Boost::boost libxlsxwriter.so stdc++fs)
-else()
-    target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
-    target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}")
-endif()
+target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
+target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}")
 target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}")
 target_link_libraries(testx ArffFiles BayesNet "${TORCH_LIBRARIES}")
\ No newline at end of file
-- 
2.45.2