2023-09-02 15:29:34 +00:00
15 changed files with 33 additions and 67 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -25,16 +25,17 @@
            "program": "${workspaceFolder}/build/src/Platform/main",
            "args": [
                "-m",
-                "AODELd",
+                "AODE",
                "-p",
-                "/Users/rmontanana/Code/discretizbench/datasets",
+                "/home/rmontanana/Code/discretizbench/datasets",
                "--stratified",
                "-d",
-                "wine"
+                "mfeat-morphological",
+                "--discretize"
                // "--hyperparameters",
                // "{\"repeatSparent\": true, \"maxModels\": 12}"
            ],
-            "cwd": "/Users/rmontanana/Code/discretizbench",
+            "cwd": "/home/rmontanana/Code/discretizbench",
        },
        {
            "type": "lldb",
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.20)

 project(BayesNet
-  VERSION 0.1.0
+  VERSION 0.2.0
  DESCRIPTION "Bayesian Network and basic classifiers Library."
  HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
  LANGUAGES CXX
@ -40,8 +40,7 @@ if (CODE_COVERAGE)
    enable_testing()
    include(CodeCoverage)
    MESSAGE("Code coverage enabled")
-    set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
-    set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
+    set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0")
    SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
 endif (CODE_COVERAGE)

--- a/data/_TAN_cpp_accuracy__.json
+++ b/data/_TAN_cpp_accuracy__.json
@ -1 +0,0 @@
-null
--- a/diagrams/BayesNet.pdf
+++ b/diagrams/BayesNet.pdf
--- a/src/BayesNet/BaseClassifier.h
+++ b/src/BayesNet/BaseClassifier.h
@ -25,7 +25,7 @@ namespace bayesnet {
        int virtual getNumberOfStates() const = 0;
        vector<string> virtual show() const = 0;
        vector<string> virtual graph(const string& title = "") const = 0;
-        const string inline getVersion() const { return "0.1.0"; };
+        const string inline getVersion() const { return "0.2.0"; };
        vector<string> virtual topological_order() = 0;
        void virtual dump_cpt()const = 0;
        virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;
--- a/src/BayesNet/Classifier.cc
+++ b/src/BayesNet/Classifier.cc
@ -13,7 +13,7 @@ namespace bayesnet {
        m = dataset.size(1);
        n = dataset.size(0) - 1;
        checkFitParameters();
-        auto n_classes = states[className].size();
+        auto n_classes = states.at(className).size();
        metrics = Metrics(dataset, features, className, n_classes);
        model.initialize();
        buildModel(weights);
--- a/src/BayesNet/Ensemble.cc
+++ b/src/BayesNet/Ensemble.cc
@ -17,9 +17,13 @@ namespace bayesnet {
    {
        auto y_pred_ = y_pred.accessor<int, 2>();
        vector<int> y_pred_final;
+        int numClasses = states.at(className).size();
+        // y_pred is m x n_models with the prediction of every model for each sample
        for (int i = 0; i < y_pred.size(0); ++i) {
-            vector<double> votes(y_pred.size(1), 0);
-            for (int j = 0; j < y_pred.size(1); ++j) {
+            // votes store in each index (value of class) the significance added by each model
+            // i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
+            vector<double> votes(numClasses, 0.0);
+            for (int j = 0; j < n_models; ++j) {
                votes[y_pred_[i][j]] += significanceModels[j];
            }
            // argsort in descending order
@ -34,7 +38,6 @@ namespace bayesnet {
            throw logic_error("Ensemble has not been fitted");
        }
        Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32);
-        //Create a threadpool
        auto threads{ vector<thread>() };
        mutex mtx;
        for (auto i = 0; i < n_models; ++i) {
--- a/src/BayesNet/Network.cc
+++ b/src/BayesNet/Network.cc
@ -174,42 +174,10 @@ namespace bayesnet {
    {
        setStates(states);
        laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
-        int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
-        if (maxThreadsRunning < 1) {
-            maxThreadsRunning = 1;
+        for (auto& node : nodes) {
+            node.second->computeCPT(samples, features, laplaceSmoothing, weights);
+            fitted = true;
        }
-        vector<thread> threads;
-        mutex mtx;
-        condition_variable cv;
-        int activeThreads = 0;
-        int nextNodeIndex = 0;
-        while (nextNodeIndex < nodes.size()) {
-            unique_lock<mutex> lock(mtx);
-            cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
-            threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads, &weights]() {
-                while (true) {
-                    unique_lock<mutex> lock(mtx);
-                    if (nextNodeIndex >= nodes.size()) {
-                        break;  // No more work remaining
-                    }
-                    auto& pair = *std::next(nodes.begin(), nextNodeIndex);
-                    ++nextNodeIndex;
-                    lock.unlock();
-                    pair.second->computeCPT(samples, features, laplaceSmoothing, weights);
-                    lock.lock();
-                    nodes[pair.first] = std::move(pair.second);
-                    lock.unlock();
-                }
-                lock_guard<mutex> lock(mtx);
-                --activeThreads;
-                cv.notify_one();
-                });
-            ++activeThreads;
-        }
-        for (auto& thread : threads) {
-            thread.join();
-        }
-        fitted = true;
    }
    torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
    {
--- a/src/BayesNet/Network.h
+++ b/src/BayesNet/Network.h
@ -27,6 +27,7 @@ namespace bayesnet {
        Network();
        explicit Network(float);
        explicit Network(Network&);
+        ~Network() = default;
        torch::Tensor& getSamples();
        float getmaxThreads();
        void addNode(const string&);
@ -52,7 +53,7 @@ namespace bayesnet {
        vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format
        void initialize();
        void dump_cpt() const;
-        inline string version() { return "0.1.0"; }
+        inline string version() { return "0.2.0"; }
    };
 }
 #endif
--- a/src/BayesNet/Node.cc
+++ b/src/BayesNet/Node.cc
@ -100,7 +100,7 @@ namespace bayesnet {
        }
        int name_index = pos - features.begin();
        for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
-            torch::List<c10::optional<torch::Tensor>> coordinates;
+            c10::List<c10::optional<at::Tensor>> coordinates;
            coordinates.push_back(dataset.index({ name_index, n_sample }));
            for (auto parent : parents) {
                pos = find(features.begin(), features.end(), parent->getName());
@ -118,10 +118,10 @@ namespace bayesnet {
    }
    float Node::getFactorValue(map<string, int>& evidence)
    {
-        torch::List<c10::optional<torch::Tensor>> coordinates;
+        c10::List<c10::optional<at::Tensor>> coordinates;
        // following predetermined order of indices in the cpTable (see Node.h)
-        coordinates.push_back(torch::tensor(evidence[name]));
-        transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return torch::tensor(evidence[parent->getName()]); });
+        coordinates.push_back(at::tensor(evidence[name]));
+        transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
        return cpTable.index({ coordinates }).item<float>();
    }
    vector<string> Node::graph(const string& className)
--- a/src/BayesNet/Proposal.cc
+++ b/src/BayesNet/Proposal.cc
@ -53,15 +53,6 @@ namespace bayesnet {
            auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
            auto xvf = vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
            discretizers[feature]->fit(xvf, yxv);
-            //
-            //
-            //
-            // auto tmp = discretizers[feature]->transform(xvf);
-            // Xv[index] = tmp;
-            // auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
-            // iota(xStates.begin(), xStates.end(), 0);
-            // //Update new states of the feature/node
-            // states[feature] = xStates;
        }
        if (upgrade) {
            // Discretize again X (only the affected indices) with the new fitted discretizers
--- a/src/Platform/Datasets.cc
+++ b/src/Platform/Datasets.cc
@ -213,10 +213,11 @@ namespace platform {
    {
        for (int i = 0; i < features.size(); ++i) {
            states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
-            iota(begin(states[features[i]]), end(states[features[i]]), 0);
+            auto item = states.at(features[i]);
+            iota(begin(item), end(item), 0);
        }
        states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
-        iota(begin(states[className]), end(states[className]), 0);
+        iota(begin(states.at(className)), end(states.at(className)), 0);
    }
    void Dataset::load_arff()
    {
--- a/src/Platform/Experiment.cc
+++ b/src/Platform/Experiment.cc
@ -179,8 +179,10 @@ namespace platform {
                result.addTimeTrain(train_time[item].item<double>());
                result.addTimeTest(test_time[item].item<double>());
                item++;
+                clf.reset();
            }
            cout << "end. " << flush;
+            delete fold;
        }
        result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
        result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
--- a/src/Platform/Models.cc
+++ b/src/Platform/Models.cc
@ -26,7 +26,7 @@ namespace platform {
            instance = it->second();
        // wrap instance in a shared ptr and return
        if (instance != nullptr)
-            return shared_ptr<bayesnet::BaseClassifier>(instance);
+            return unique_ptr<bayesnet::BaseClassifier>(instance);
        else
            return nullptr;
    }
--- a/src/Platform/platformUtils.cc
+++ b/src/Platform/platformUtils.cc
@ -69,11 +69,12 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
        Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
        for (int i = 0; i < features.size(); ++i) {
            states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
-            iota(begin(states[features[i]]), end(states[features[i]]), 0);
+            auto item = states.at(features[i]);
+            iota(begin(item), end(item), 0);
            Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
        }
        states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
-        iota(begin(states[className]), end(states[className]), 0);
+        iota(begin(states.at(className)), end(states.at(className)), 0);
    } else {
        Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
        for (int i = 0; i < features.size(); ++i) {