Fix some mistakes in tensors treatment

2023-07-26 01:39:01 +02:00
parent be06e475f0
commit 099b4bea09
18 changed files with 255 additions and 72 deletions
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -5,4 +5,5 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
 include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
 add_executable(main Experiment.cc Folding.cc platformUtils.cc)
 add_executable(testx testx.cpp Folding.cc)
-target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
+target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
+target_link_libraries(testx ArffFiles mdlp "${TORCH_LIBRARIES}")
--- a/src/Platform/Experiment.cc
+++ b/src/Platform/Experiment.cc
@@ -18,8 +18,13 @@

 using namespace std;

-Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
+Result cross_validation(Fold* fold, string model_name, Tensor& X, Tensor& y, vector<string> features, string className, map<string, vector<int>> states)
 {
+    auto classifiers = map<string, bayesnet::BaseClassifier*>({
+       { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
+       { "SPODE",  new bayesnet::SPODE(2) }, { "TAN",  new bayesnet::TAN() }
+        }
+    );
    auto result = Result();
    auto k = fold->getNumberOfFolds();
    auto accuracy = torch::zeros({ k }, kFloat64);
@@ -27,6 +32,7 @@ Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X,
    auto test_time = torch::zeros({ k }, kFloat64);
    Timer train_timer, test_timer;
    for (int i = 0; i < k; i++) {
+        bayesnet::BaseClassifier* model = classifiers[model_name];
        train_timer.start();
        auto [train, test] = fold->getFold(i);
        auto train_t = torch::tensor(train);
@@ -43,8 +49,7 @@ Result cross_validation(Fold* fold, bayesnet::BaseClassifier* model, Tensor& X,
        cout << "y_test: " << y_test.sizes() << endl;
        train_time[i] = train_timer.getDuration();
        test_timer.start();
-        //auto acc = model->score(X_test, y_test);
-        auto acc = 7;
+        auto acc = model->score(X_test, y_test);
        test_time[i] = test_timer.getDuration();
        accuracy[i] = acc;
    }
@@ -140,18 +145,16 @@ int main(int argc, char** argv)
        fold = new StratifiedKFold(n_folds, y, -1);
    else
        fold = new KFold(n_folds, y.numel(), -1);
-    auto classifiers = map<string, bayesnet::BaseClassifier*>({
-        { "AODE", new bayesnet::AODE() }, { "KDB", new bayesnet::KDB(2) },
-        { "SPODE",  new bayesnet::SPODE(2) }, { "TAN",  new bayesnet::TAN() }
-        }
-    );
+
    auto experiment = Experiment();
    experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("cpp");
    experiment.setStratified(stratified).setNFolds(5).addRandomSeed(271).setScoreName("accuracy");
-    bayesnet::BaseClassifier* model = classifiers[model_name];
-    auto result = cross_validation(fold, model, X, y, features, className, states);
+    auto result = cross_validation(fold, model_name, X, y, features, className, states);
    result.setDataset(file_name);
    experiment.addResult(result);
    experiment.save(path);
+    for (auto& item : states) {
+        cout << item.first << ": " << item.second.size() << endl;
+    }
    return 0;
 }
--- a/src/Platform/Folding.cc
+++ b/src/Platform/Folding.cc
@@ -1,13 +1,16 @@
 #include "Folding.h"
 #include <algorithm>
 #include <map>
-#include <random>
+Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
+{
+    random_device rd;
+    random_seed = default_random_engine(seed == -1 ? rd() : seed);
+    srand(seed == -1 ? time(0) : seed);
+}
 KFold::KFold(int k, int n, int seed) : Fold(k, n, seed)
 {
    indices = vector<int>(n);
    iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
-    random_device rd;
-    default_random_engine random_seed(seed == -1 ? rd() : seed);
    shuffle(indices.begin(), indices.end(), random_seed);
 }
 pair<vector<int>, vector<int>> KFold::getFold(int nFold)
@@ -54,8 +57,6 @@ void StratifiedKFold::build()
        class_indices[y[i]].push_back(i);
    }
    // Shuffle class indices
-    random_device rd;
-    default_random_engine random_seed(seed == -1 ? rd() : seed);
    for (auto& [cls, indices] : class_indices) {
        shuffle(indices.begin(), indices.end(), random_seed);
    }
@@ -71,7 +72,7 @@ void StratifiedKFold::build()
            class_indices[label].erase(class_indices[label].begin(), it);
        }
        while (remainder_samples_to_take > 0) {
-            int fold = (arc4random() % static_cast<int>(k));
+            int fold = (rand() % static_cast<int>(k));
            if (stratified_indices[fold].size() == fold_size) {
                continue;
            }
--- a/src/Platform/Folding.h
+++ b/src/Platform/Folding.h
@@ -2,6 +2,7 @@
 #define FOLDING_H
 #include <torch/torch.h>
 #include <vector>
+#include <random> 
 using namespace std;

 class Fold {
@@ -9,8 +10,9 @@ protected:
    int k;
    int n;
    int seed;
+    default_random_engine random_seed;
 public:
-    Fold(int k, int n, int seed = -1) : k(k), n(n), seed(seed) {}
+    Fold(int k, int n, int seed = -1);
    virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
    virtual ~Fold() = default;
    int getNumberOfFolds() { return k; }
--- a/src/Platform/platformUtils.cc
+++ b/src/Platform/platformUtils.cc
@@ -55,16 +55,16 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
    auto states = map<string, vector<int>>();
    if (discretize_dataset) {
        auto Xr = discretizeDataset(X, y);
-        Xd = torch::zeros({ static_cast<int64_t>(Xr[0].size()), static_cast<int64_t>(Xr.size()) }, torch::kInt64);
+        Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
        for (int i = 0; i < features.size(); ++i) {
            states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
            iota(begin(states[features[i]]), end(states[features[i]]), 0);
-            Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt64));
+            Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
        }
        states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
        iota(begin(states[className]), end(states[className]), 0);
    } else {
-        Xd = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, torch::kFloat32);
+        Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
        for (int i = 0; i < features.size(); ++i) {
            Xd.index_put_({ "...", i }, torch::tensor(X[i]));
        }
--- a/src/Platform/testx.cpp
+++ b/src/Platform/testx.cpp
@@ -62,14 +62,40 @@ int main()
        cout << endl;
        cout << "Test Statistics: " << counts(y, test);
        cout << "==============================================================================" << endl;
+        torch::Tensor a = torch::zeros({ 5, 3 });
+        torch::Tensor b = torch::zeros({ 5 }) + 1;
+        torch::Tensor c = torch::cat({ a,  b.view({5, 1}) }, 1);
+        cout << "a:" << a.sizes() << endl;
+        cout << a << endl;
+        cout << "b:" << b.sizes() << endl;
+        cout << b << endl;
+        cout << "c:" << c.sizes() << endl;
+        cout << c << endl;
+        torch::Tensor d = torch::zeros({ 5, 3 });
+        torch::Tensor e = torch::tensor({ 1,2,3,4,5 }) + 1;
+        torch::Tensor f = torch::cat({ d,  e.view({5, 1}) }, 1);
+        cout << "d:" << d.sizes() << endl;
+        cout << d << endl;
+        cout << "e:" << e.sizes() << endl;
+        cout << e << endl;
+        cout << "f:" << f.sizes() << endl;
+        cout << f << endl;
+        auto indices = torch::tensor({ 0, 2, 4 });
+        auto k = f.index({ indices, "..." });
+        cout << "k:" << k.sizes() << endl;
+        cout << k << endl;
+        auto w = torch::index_select(f, 0, indices);
+        cout << "w:" << w.sizes() << endl;
+        cout << w << endl;
+
        // cout << "Vector poly" << endl;
-        // auto some = vector<A>();
-        // auto cx = C(5, 4);
-        // auto bx = B(7, 6);
-        // some.push_back(cx);
-        // some.push_back(bx);
-        // for (auto& obj : some) {
-        // 	cout << "Obj :" << obj.getA() << endl;
-        // }
+            // auto some = vector<A>();
+            // auto cx = C(5, 4);
+            // auto bx = B(7, 6);
+            // some.push_back(cx);
+            // some.push_back(bx);
+            // for (auto& obj : some) {
+            // 	cout << "Obj :" << obj.getA() << endl;
+            // }
    }
 }