Add Experiment, Result and Timer classes

2023-07-24 01:15:12 +02:00 · 2023-07-24 01:15:12 +02:00 · c10ebca0e0
commit c10ebca0e0
parent 0c226371cc
5 changed files with 106 additions and 56 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -16,10 +16,18 @@
        {
            "type": "lldb",
            "request": "launch",
-            "name": "aout",
-            "program": "${workspaceFolder}/a.out",
-            "args": [],
-            "cwd": "${workspaceFolder}"
+            "name": "experiment",
+            "program": "${workspaceFolder}/build/src/Platform/main",
+            "args": [
+                "-f",
+                "iris",
+                "-m",
+                "TAN",
+                "-p",
+                "../../../data/",
+                "--discretize"
+            ],
+            "cwd": "${workspaceFolder}/build/src/Platform",
        },
        {
            "name": "Build & debug active file",
--- a/src/Platform/Experiment.cc
+++ b/src/Platform/Experiment.cc
@ -12,16 +12,22 @@
 #include "AODE.h"
 #include "TAN.h"
 #include "platformUtils.h"
+#include "Result.h"
 #include "Folding.h"


 using namespace std;

-pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
+Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
 {
+    auto result = Result();
    auto k = fold->getNumberOfFolds();
-    float accuracy = 0.0;
+    auto accuracy = torch::zeros({ k }, kFloat64);
+    auto train_time = torch::zeros({ k }, kFloat64);
+    auto test_time = torch::zeros({ k }, kFloat64);
+    Timer train_timer, test_timer;
    for (int i = 0; i < k; i++) {
+        train_timer.start();
        auto [train, test] = fold->getFold(i);
        auto train_t = torch::tensor(train);
        auto test_t = torch::tensor(test);
@ -30,10 +36,15 @@ pair<float, float> cross_validation(Fold* fold, bayesnet::BaseClassifier* model,
        auto X_test = X.index({ test_t });
        auto y_test = y.index({ test_t });
        model->fit(X_train, y_train, features, className, states);
+        train_time[i] = train_timer.getDuration();
+        test_timer.start();
        auto acc = model->score(X_test, y_test);
-        accuracy += acc;
+        test_time[i] = test_timer.getDuration();
+        accuracy[i] = acc;
    }
-    return { accuracy / k, 0 };
+    result.setScore(torch::mean(accuracy).item<double>());
+    result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
+    return result;
 }

 int main(int argc, char** argv)
@ -96,25 +107,23 @@ int main(int argc, char** argv)
        cerr << program;
        exit(1);
    }
-
    /*
    * Begin Processing
    */
-    auto [X, y, features, className] = loadDataset(file_name, discretize_dataset, class_last);
-    auto states = map<string, vector<int>>();
-    if (discretize_dataset) {
-        auto [Xd, maxes] = discretizeTorch(X, y, features);
-        states = get_states(Xd, y, features, className);
-        X = Xd;
-    }
+    auto [X, y, features, className, states] = loadDataset(path, file_name, class_last, discretize_dataset);
    auto fold = StratifiedKFold(5, y, -1);
    auto classifiers = map<string, bayesnet::BaseClassifier*>({
        { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
        { "SPODE",  new bayesnet::SPODE(2) }, { "TAN",  new bayesnet::TAN() }
        }
    );
+    auto experiment = Experiment();
+    experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp");
+    experiment.setStratified(true).setNFolds(5).addRandomSeed(271).setScoreName("accuracy");
    bayesnet::BaseClassifier* model = classifiers[model_name];
-    auto results = cross_validation(&fold, model, X, y, features, className, states);
-    cout << "Accuracy: " << results.first << endl;
+    auto result = cross_validation(&fold, model, X, y, features, className, states);
+    result.setDataset(file_name);
+    experiment.addResult(result);
+    experiment.save(path);
    return 0;
 }
--- a/src/Platform/Result.h
+++ b/src/Platform/Result.h
@ -0,0 +1,57 @@
+#ifndef RESULT_H
+#define RESULT_H
+#include <string>
+#include <chrono>
+
+using namespace std;
+class Timer {
+private:
+    chrono::time_point<chrono::steady_clock> begin;
+public:
+    Timer() = default;
+    ~Timer() = default;
+    void start() { begin = chrono::high_resolution_clock::now(); }
+    float getDuration() { return chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - begin).count(); }
+};
+class Result {
+private:
+    string dataset, hyperparameters;
+    int samples, features, classes;
+    float score, score_std, train_time, train_time_std, test_time, test_time_std;
+public:
+    Result() = default;
+    Result& setDataset(string dataset) { this->dataset = dataset; return *this; }
+    Result& setHyperparameters(string hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
+    Result& setSamples(int samples) { this->samples = samples; return *this; }
+    Result& setFeatures(int features) { this->features = features; return *this; }
+    Result& setClasses(int classes) { this->classes = classes; return *this; }
+    Result& setScore(float score) { this->score = score; return *this; }
+    Result& setScoreStd(float score_std) { this->score_std = score_std; return *this; }
+    Result& setTrainTime(float train_time) { this->train_time = train_time; return *this; }
+    Result& setTrainTimeStd(float train_time_std) { this->train_time_std = train_time_std; return *this; }
+    Result& setTestTime(float test_time) { this->test_time = test_time; return *this; }
+    Result& setTestTimeStd(float test_time_std) { this->test_time_std = test_time_std; return *this; }
+};
+class Experiment {
+private:
+    string title, model, platform, score_name, model_version, language_version;
+    bool discretized, stratified;
+    vector<Result> results;
+    vector<int> random_seeds;
+    int nfolds;
+public:
+    Experiment() = default;
+    Experiment& setTitle(string title) { this->title = title; return *this; }
+    Experiment& setModel(string model) { this->model = model; return *this; }
+    Experiment& setPlatform(string platform) { this->platform = platform; return *this; }
+    Experiment& setScoreName(string score_name) { this->score_name = score_name; return *this; }
+    Experiment& setModelVersion(string model_version) { this->model_version = model_version; return *this; }
+    Experiment& setLanguageVersion(string language_version) { this->language_version = language_version; return *this; }
+    Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
+    Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
+    Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
+    Experiment& addResult(Result result) { results.push_back(result); return *this; }
+    Experiment& addRandomSeed(int random_seed) { random_seeds.push_back(random_seed); return *this; }
+    void save(string path) { cout << "Saving experiment..." << endl; }
+};
+#endif
--- a/src/Platform/platformUtils.cc
+++ b/src/Platform/platformUtils.cc
@ -15,22 +15,6 @@ pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t
    }
    return { Xd, maxes };
 }
-pair<Tensor, map<string, int>> discretizeTorch(Tensor& X, Tensor& y, vector<string> features)
-{
-    map<string, int> maxes;
-    auto fimdlp = mdlp::CPPFImdlp();
-    auto Xd = torch::zeros_like(X, torch::kInt64);
-    auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
-    for (int i = 0; i < X.size(1); i++) {
-        auto xv = vector<float>(X.select(1, i).data_ptr<float>(), X.select(1, i).data_ptr<float>() + X.size(0));
-        fimdlp.fit(xv, yv);
-        auto xdv = fimdlp.transform(xv);
-        auto xd = torch::tensor(xdv, torch::kInt64);
-        maxes[features[i]] = xd.max().item<int>() + 1;
-        Xd.index_put_({ "...", i }, xd);
-    }
-    return { Xd, maxes };
-}

 vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
 {
@ -54,10 +38,10 @@ bool file_exists(const std::string& name)
    }
 }

-tuple < Tensor, Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last)
+tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(string path, string name, bool class_last, bool discretize_dataset)
 {
    auto handler = ArffFiles();
-    handler.load(PATH + static_cast<string>(name) + ".arff", class_last);
+    handler.load(path + static_cast<string>(name) + ".arff", class_last);
    // Get Dataset X, y
    vector<mdlp::samples_t>& X = handler.getX();
    mdlp::labels_t& y = handler.getY();
@ -68,32 +52,24 @@ tuple < Tensor, Tensor, vector<string>, string> loadDataset(string name, bool di
        features.push_back(feature.first);
    }
    Tensor Xd;
-    if (discretize) {
+    auto states = map<string, vector<int>>();
+    if (discretize_dataset) {
        auto Xr = discretizeDataset(X, y);
        Xd = torch::zeros({ static_cast<int64_t>(Xr[0].size()), static_cast<int64_t>(Xr.size()) }, torch::kInt64);
        for (int i = 0; i < features.size(); ++i) {
+            states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
+            iota(begin(states[features[i]]), end(states[features[i]]), 0);
            Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt64));
        }
+        states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
+        iota(begin(states[className]), end(states[className]), 0);
    } else {
-        Xd = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, torch::kFloat64);
+        Xd = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, torch::kFloat32);
        for (int i = 0; i < features.size(); ++i) {
-            Xd.index_put_({ "...", i }, torch::tensor(X[i], torch::kFloat64));
+            Xd.index_put_({ "...", i }, torch::tensor(X[i]));
        }
    }
-    return { Xd, torch::tensor(y, torch::kInt64), features, className };
-}
-
-map<string, vector<int>> get_states(Tensor& X, Tensor& y, vector<string> features, string className)
-{
-    int max;
-    map<string, vector<int>> states;
-    for (int i = 0; i < X.size(1); i++) {
-        max = X.select(1, i).max().item<int>() + 1;
-        states[features[i]] = vector<int>(max);
-    }
-    max = y.max().item<int>() + 1;
-    states[className] = vector<int>(max);
-    return states;
+    return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
 }

 tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name)
--- a/src/Platform/platformUtils.h
+++ b/src/Platform/platformUtils.h
@ -12,8 +12,8 @@ const string PATH = "../../data/";

 bool file_exists(const std::string& name);
 pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features);
-pair<torch::Tensor, map<string, int>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string> features);
+pair<torch::Tensor, map<string, vector<int>>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className);
 tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(string name);
-tuple<torch::Tensor, torch::Tensor, vector<string>, string> loadDataset(string name, bool discretize, bool class_last);
-map<string, vector<int>> get_states(torch::Tensor& X, torch::Tensor& y, vector<string> features, string className);
+tuple<torch::Tensor, torch::Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(string path, string name, bool class_last, bool discretize_dataset);
+map<string, vector<int>> get_states(vector<string>& features, string className, map<string, int>& maxes);
 #endif //PLATFORM_UTILS_H