block_update and install in local folder

2024-04-10 00:55:36 +02:00 · 2024-04-10 00:55:36 +02:00 · cf9b5716ac
commit cf9b5716ac
parent 1326891d6a
7 changed files with 90 additions and 18 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -8,7 +8,7 @@
            "program": "${workspaceFolder}/build_release/sample/bayesnet_sample",
            "args": [
                "${workspaceFolder}/tests/data/glass.arff"
-            ],
+            ]
        },
        {
            "type": "lldb",
@ -16,11 +16,33 @@
            "name": "test",
            "program": "${workspaceFolder}/build_debug/tests/TestBayesNet",
            "args": [
-                "[Network]"
-                //"-c=\"Metrics Test\"",
-                // "-s",
+                "Block Update"
            ],
-            "cwd": "${workspaceFolder}/build_debug/tests",
+            "cwd": "${workspaceFolder}/build_debug/tests"
+        },
+        {
+            "name": "(gdb) Launch",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "enter program name, for example ${workspaceFolder}/a.out",
+            "args": [],
+            "stopAtEntry": false,
+            "cwd": "${fileDirname}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                },
+                {
+                    "description": "Set Disassembly Flavor to Intel",
+                    "text": "-gdb-set disassembly-flavor intel",
+                    "ignoreFailures": true
+                }
+            ]
        }
    ]
 }
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.20)

 project(BayesNet
-  VERSION 1.0.4
+  VERSION 1.0.4.1
  DESCRIPTION "Bayesian Network and basic classifiers Library."
  HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
  LANGUAGES CXX
--- a/3
+++ b/3
@ -53,9 +53,10 @@ uninstall: ## Uninstall library
 	xargs rm < $(f_release)/install_manifest.txt
 	@echo ">>> Done";

+prefix = "/usr/local"
 install: ## Install library
 	@echo ">>> Installing BayesNet...";
-	@cmake --install $(f_release)
+	@cmake --install $(f_release) --prefix $(prefix)
 	@echo ">>> Done";

 debug: ## Build a debug version of the project
--- a/bayesnet/ensembles/BoostAODE.cc
+++ b/bayesnet/ensembles/BoostAODE.cc
@ -127,6 +127,50 @@ namespace bayesnet {
        }
        return { weights, alpha_t, terminate };
    }
+    std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
+    {
+        //
+        // Make predict with only the last k models
+        //
+        std::unique_ptr<Classifier> model;
+        std::vector<std::unique_ptr<Classifier>> models_bak;
+        auto significance_bak = significanceModels;
+        auto n_models_bak = n_models;
+        // Remove the first n_models - k models
+        for (int i = 0; i < n_models - k; ++i) {
+            model = std::move(models[0]);
+            models.erase(models.begin());
+            models_bak.push_back(std::move(model));
+        }
+        assert(models.size() == k);
+        significanceModels = std::vector<double>(k, 1.0);
+        n_models = k;
+        auto ypred = predict(X_train);
+        //
+        // Update weights
+        //
+        double alpha_t;
+        bool terminate;
+        std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
+        //
+        // Restore the models if needed
+        //
+        if (k != n_models_bak) {
+            for (int i = k - 1; i >= 0; --i) {
+                model = std::move(models_bak[i]);
+                models.insert(models.begin(), std::move(model));
+            }
+        }
+        significanceModels = significance_bak;
+        n_models = n_models_bak;
+        //
+        // Update the significance of the last k models
+        //
+        for (int i = 0; i < k; ++i) {
+            significanceModels[n_models - k + i] = alpha_t;
+        }
+        return { weights, alpha_t, terminate };
+    }
    std::vector<int> BoostAODE::initializeModels()
    {
        std::vector<int> featuresUsed;
@ -156,7 +200,7 @@ namespace bayesnet {
            std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
            model->fit(dataset, features, className, states, weights_);
            models.push_back(std::move(model));
-            significanceModels.push_back(1.0);
+            significanceModels.push_back(1.0); // They will be updated later in trainModel
            n_models++;
        }
        notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
@ -229,13 +273,15 @@ namespace bayesnet {
                std::unique_ptr<Classifier> model;
                model = std::make_unique<SPODE>(feature);
                model->fit(dataset, features, className, states, weights_);
-                torch::Tensor ypred;
-                ypred = model->predict(X_train);
-                // Step 3.1: Compute the classifier amout of say
-                std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
-                if (finished) {
-                    VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **");
-                    break;
+                alpha_t = 0.0;
+                if (!block_update) {
+                    auto ypred = model->predict(X_train);
+                    // Step 3.1: Compute the classifier amout of say
+                    std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
+                    if (finished) {
+                        VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **");
+                        break;
+                    }
                }
                // Step 3.4: Store classifier and its accuracy to weigh its future vote
                numItemsPack++;
@ -245,6 +291,9 @@ namespace bayesnet {
                n_models++;
                VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
            }
+            if (block_update) {
+                std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
+            }
            if (convergence && !finished) {
                auto y_val_predict = predict(X_test);
                double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
--- a/bayesnet/ensembles/BoostAODE.h
+++ b/bayesnet/ensembles/BoostAODE.h
@ -25,6 +25,7 @@ namespace bayesnet {
        void buildModel(const torch::Tensor& weights) override;
        void trainModel(const torch::Tensor& weights) override;
    private:
+        std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
        std::vector<int> initializeModels();
        torch::Tensor X_train, y_train, X_test, y_test;
        // Hyperparameters
@ -36,7 +37,7 @@ namespace bayesnet {
        std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
        FeatureSelect* featureSelector = nullptr;
        double threshold = -1;
-        bool block_update = true;
+        bool block_update = false;
    };
 }
 #endif
--- a/docs/BoostAODE.md
+++ b/docs/BoostAODE.md
@ -8,7 +8,7 @@ The hyperparameters defined in the algorithm are:

 - ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.

- ***block_update*** (*boolean*): Sets whether the algorithm will update the weights of the models in blocks. If set to false, the algorithm will update the weights of the models one by one. Default value: *true*.
+- ***block_update*** (*boolean*): Sets whether the algorithm will update the weights of the models in blocks. If set to false, the algorithm will update the weights of the models one by one. Default value: *false*.

 - ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *true*.

--- a/update_coverage.py
+++ b/update_coverage.py
@ -1,5 +1,4 @@
 import subprocess
-import os
 import sys

 readme_file = "README.md"