Add status to classifier and Experiment

2023-09-05 13:39:43 +02:00
parent 64fc7bd9dd
commit 5a7c8f1818
5 changed files with 256 additions and 180 deletions
--- a/src/BayesNet/BaseClassifier.h
+++ b/src/BayesNet/BaseClassifier.h
@@ -5,6 +5,7 @@
 #include <vector>
 namespace bayesnet {
    using namespace std;
+    enum status_t { NORMAL, WARNING, ERROR };
    class BaseClassifier {
    protected:
        virtual void trainModel(const torch::Tensor& weights) = 0;
@@ -18,6 +19,7 @@ namespace bayesnet {
        virtual ~BaseClassifier() = default;
        torch::Tensor virtual predict(torch::Tensor& X) = 0;
        vector<int> virtual predict(vector<vector<int>>& X) = 0;
+        status_t virtual getStatus() const = 0;
        float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
        float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
        int virtual getNumberOfNodes()const = 0;
--- a/src/BayesNet/BoostAODE.cc
+++ b/src/BayesNet/BoostAODE.cc
@@ -1,6 +1,7 @@
 #include "BoostAODE.h"
 #include <set>
 #include "BayesMetrics.h"
+#include "Colors.h"

 namespace bayesnet {
    BoostAODE::BoostAODE() : Ensemble() {}
@@ -64,22 +65,26 @@ namespace bayesnet {
            auto ypred = model->predict(X_);
            // Step 3.1: Compute the classifier amout of say
            auto mask_wrong = ypred != y_;
+            auto mask_right = ypred == y_;
            auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
-            double wrongWeights = masked_weights.sum().item<double>();
-            double significance = wrongWeights == 0 ? 1 : 0.5 * log((1 - wrongWeights) / wrongWeights);
+            double epsilon_t = masked_weights.sum().item<double>();
+            double wt = (1 - epsilon_t) / epsilon_t;
+            double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
            // Step 3.2: Update weights for next classifier
            // Step 3.2.1: Update weights of wrong samples
-            weights_ += mask_wrong.to(weights_.dtype()) * exp(significance) * weights_;
+            weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
+            // Step 3.2.2: Update weights of right samples
+            weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
            // Step 3.3: Normalise the weights
            double totalWeights = torch::sum(weights_).item<double>();
            weights_ = weights_ / totalWeights;
            // Step 3.4: Store classifier and its accuracy to weigh its future vote
            models.push_back(std::move(model));
-            significanceModels.push_back(significance);
-            exitCondition = n_models == maxModels && repeatSparent;
+            significanceModels.push_back(alpha_t);
+            exitCondition = n_models == maxModels && repeatSparent || epsilon_t > 0.5;
        }
        if (featuresUsed.size() != features.size()) {
-            cout << "Warning: BoostAODE did not use all the features" << endl;
+            status = WARNING;
        }
        weights.copy_(weights_);
    }
--- a/src/BayesNet/Classifier.h
+++ b/src/BayesNet/Classifier.h
@@ -21,6 +21,7 @@ namespace bayesnet {
        string className;
        map<string, vector<int>> states;
        Tensor dataset; // (n+1)xm tensor
+        status_t status = NORMAL;
        void checkFitParameters();
        virtual void buildModel(const torch::Tensor& weights) = 0;
        void trainModel(const torch::Tensor& weights) override;
@@ -37,6 +38,7 @@ namespace bayesnet {
        int getNumberOfEdges() const override;
        int getNumberOfStates() const override;
        Tensor predict(Tensor& X) override;
+        status_t getStatus() const override { return status; }
        vector<int> predict(vector<vector<int>>& X) override;
        float score(Tensor& X, Tensor& y) override;
        float score(vector<vector<int>>& X, vector<int>& y) override;
--- a/src/Platform/Experiment.cc
+++ b/src/Platform/Experiment.cc
@@ -111,6 +111,26 @@ namespace platform {
        }
    }

+    string getColor(bayesnet::status_t status)
+    {
+        switch (status) {
+            case bayesnet::NORMAL:
+                return Colors::GREEN();
+            case bayesnet::WARNING:
+                return Colors::YELLOW();
+            case bayesnet::ERROR:
+                return Colors::RED();
+            default:
+                return Colors::RESET();
+        }
+    }
+
+    void showProgress(int fold, const string& color, const string& phase)
+    {
+        string prefix = phase == "a" ? "" : "\b\b\b\b";
+        cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
+
+    }
    void Experiment::cross_validation(const string& path, const string& fileName)
    {
        auto datasets = platform::Datasets(path, discretized, platform::ARFF);
@@ -159,23 +179,24 @@ namespace platform {
                auto y_train = y.index({ train_t });
                auto X_test = X.index({ "...", test_t });
                auto y_test = y.index({ test_t });
-                cout << nfold + 1 << "(a)" << flush;
+                showProgress(nfold + 1, getColor(clf->getStatus()), "a");
                // Train model
                clf->fit(X_train, y_train, features, className, states);
-                cout << "\b\bb)" << flush;
+                showProgress(nfold + 1, getColor(clf->getStatus()), "b");
                nodes[item] = clf->getNumberOfNodes();
                edges[item] = clf->getNumberOfEdges();
                num_states[item] = clf->getNumberOfStates();
                train_time[item] = train_timer.getDuration();
+                // Score train
                auto accuracy_train_value = clf->score(X_train, y_train);
-                cout << "\b\bc)" << flush;
                // Test model
+                showProgress(nfold + 1, getColor(clf->getStatus()), "c");
                test_timer.start();
                auto accuracy_test_value = clf->score(X_test, y_test);
-                cout << "\b\b\b, " << flush;
                test_time[item] = test_timer.getDuration();
                accuracy_train[item] = accuracy_train_value;
                accuracy_test[item] = accuracy_test_value;
+                cout << "\b\b\b, " << flush;
                // Store results and times in vector
                result.addScoreTrain(accuracy_train_value);
                result.addScoreTest(accuracy_test_value);