refactor importClass and valgrind

2023-11-12 00:02:49 +01:00
parent a3bf97e501
commit c7372b7fc7
9 changed files with 1596906 additions and 44 deletions
--- a/example/example.cc
+++ b/example/example.cc
@@ -1,10 +1,10 @@
 #include <torch/torch.h>
 #include "ArffFiles.h"
+#include <random> 
+#include <algorithm>
 #include <vector>
 #include <string>
 #include <iostream>
-#include <map>
-#include <tuple>
 #include "STree.h"
 #include "SVC.h"
 #include "RandomForest.h"
@@ -36,21 +36,42 @@ tuple<Tensor, Tensor> loadDataset(const string& name, bool class_last)
    return { Xd, torch::tensor(y, torch::kInt32) };
 }

+pair<torch::Tensor, torch::Tensor> get_train_test_indices(int size)
+{
+    int seed = 17;
+    float train_size_p = 0.7;
+    int train_size = static_cast<int>(size * train_size_p);
+    int test_size = size - train_size;
+    std::vector<int> indices(size);
+    std::iota(indices.begin(), indices.end(), 0);
+    shuffle(indices.begin(), indices.end(), std::default_random_engine(seed));
+    auto train_indices = torch::zeros({ train_size }, torch::kInt32);
+    auto test_indices = torch::zeros({ test_size }, torch::kInt32);
+    for (auto i = 0; i < size; ++i) {
+        if (i < train_size) {
+            train_indices[i] = indices[i];
+        } else if (i < size) {
+            test_indices[i - train_size] = indices[i];
+        }
+    }
+    return { train_indices, test_indices };
+}
+
 int main(int argc, char* argv[])
 {
    using json = nlohmann::json;
    cout << "* Begin." << endl;
    {
        using namespace torch::indexing;
-        auto datasetName = "iris";
+        auto datasetName = "wine";
        bool class_last = true;
        auto [X, y] = loadDataset(datasetName, class_last);
-        auto m = y.size(0);
-        int train_split = m * .7;
-        auto Xtrain = X.index({ "...", Slice(0, train_split) });
-        auto ytrain = y.index({ Slice(0, train_split) });
-        auto Xtest = X.index({ "...", Slice(train_split, m) });
-        auto ytest = y.index({ Slice(train_split, m) });
+        // Split train/test
+        auto [train_indices, test_indices] = get_train_test_indices(X.size(1));
+        auto Xtrain = X.index({ "...", train_indices });
+        auto ytrain = y.index({ train_indices });
+        auto Xtest = X.index({ "...", test_indices });
+        auto ytest = y.index({ test_indices });
        cout << "Dataset: " << datasetName << endl;
        cout << "X: " << X.sizes() << endl;
        cout << "y: " << y.sizes() << endl;
@@ -62,36 +83,36 @@ int main(int argc, char* argv[])
        // STree
        //
        auto clf = pywrap::STree();
-        clf.fit(X, y);
-        double clf_score = clf.score(X, y);
-        auto stree = pywrap::STree();
-        auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}");
-        stree.setHyperparameters(hyperparameters);
-        cout << "STree Version: " << clf.version() << endl;
-        auto prediction = clf.predict(X);
-        cout << "Prediction: " << endl << "{";
-        for (int i = 0; i < prediction.size(0); ++i) {
-            cout << prediction[i].item<int>() << ", ";
-        }
-        cout << "}" << endl;
+        clf.fit(Xtrain, ytest);
+        double clf_score = clf.score(Xtest, ytest);
+        // auto stree = pywrap::STree();
+        // auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}");
+        // stree.setHyperparameters(hyperparameters);
+        // cout << "STree Version: " << clf.version() << endl;
+        // auto prediction = clf.predict(X);
+        // cout << "Prediction: " << endl << "{";
+        // for (int i = 0; i < prediction.size(0); ++i) {
+        //     cout << prediction[i].item<int>() << ", ";
+        // }
+        // cout << "}" << endl;
        //
        // SVC
        //
-        auto svc = pywrap::SVC();
-        cout << "SVC with hyperparameters" << endl;
-        svc.fit(X, y);
+        // auto svc = pywrap::SVC();
+        // cout << "SVC with hyperparameters" << endl;
+        // svc.fit(Xtrain, ytrain);
        //
        // Random Forest
        //
-        cout << "Building Random Forest" << endl;
-        auto rf = pywrap::RandomForest();
-        rf.fit(Xtrain, ytrain);
+        // cout << "Building Random Forest" << endl;
+        // auto rf = pywrap::RandomForest();
+        // rf.fit(Xtrain, ytrain);
        //
        // XGBoost
        //
-        cout << "Building XGBoost" << endl;
-        auto xg = pywrap::XGBoost();
-        cout << "Fitting XGBoost" << endl;
+        // cout << "Building XGBoost" << endl;
+        // auto xg = pywrap::XGBoost();
+        // cout << "Fitting XGBoost" << endl;
        // xg.fit(Xtrain, ytrain);
        // double xg_score = xg.score(Xtest, ytest);
        //
@@ -100,10 +121,10 @@ int main(int argc, char* argv[])
        cout << "Scoring dataset: " << datasetName << endl;
        cout << "Scores:" << endl;
        cout << "STree Score ......: " << clf_score << endl;
-        cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
-        cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
-        cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl;
-        cout << "SVC Score ........: " << svc.score(X, y) << endl;
+        // cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
+        // cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
+        // cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl;
+        // cout << "SVC Score ........: " << svc.score(Xtest, ytest) << endl;
        // cout << "XGBoost Score ....: " << xg_score << endl;
    }
    cout << "* End." << endl;