Refactor Classifier classes

2023-11-12 18:35:29 +01:00
parent c7372b7fc7
commit 0059e269dd
12 changed files with 133 additions and 83 deletions
--- a/example/example.cc
+++ b/example/example.cc
@@ -5,7 +5,9 @@
 #include <vector>
 #include <string>
 #include <iostream>
+#include "Classifier.h"
 #include "STree.h"
+#include "ODTE.h"
 #include "SVC.h"
 #include "RandomForest.h"
 #include "XGBoost.h"
@@ -47,11 +49,22 @@ pair<torch::Tensor, torch::Tensor> get_train_test_indices(int size)
    shuffle(indices.begin(), indices.end(), std::default_random_engine(seed));
    auto train_indices = torch::zeros({ train_size }, torch::kInt32);
    auto test_indices = torch::zeros({ test_size }, torch::kInt32);
+    int ti = 0, ei = 0;
+    cout << "Train indices [";
+    for (auto i = 0; i < train_size; ++i) {
+        cout << indices.at(i) << ", ";
+    }
+    cout << "]" << endl;
+    cout << "Test indices [";
+    for (auto i = train_size; i < size; ++i) {
+        cout << indices.at(i) << ", ";
+    }
+    cout << "]" << endl;
    for (auto i = 0; i < size; ++i) {
        if (i < train_size) {
-            train_indices[i] = indices[i];
+            train_indices[ti++] = indices.at(i);
        } else if (i < size) {
-            test_indices[i - train_size] = indices[i];
+            test_indices[ei++] = indices.at(i);
        }
    }
    return { train_indices, test_indices };
@@ -61,71 +74,52 @@ int main(int argc, char* argv[])
 {
    using json = nlohmann::json;
    cout << "* Begin." << endl;
-    {
-        using namespace torch::indexing;
-        auto datasetName = "wine";
-        bool class_last = true;
-        auto [X, y] = loadDataset(datasetName, class_last);
-        // Split train/test
-        auto [train_indices, test_indices] = get_train_test_indices(X.size(1));
-        auto Xtrain = X.index({ "...", train_indices });
-        auto ytrain = y.index({ train_indices });
-        auto Xtest = X.index({ "...", test_indices });
-        auto ytest = y.index({ test_indices });
-        cout << "Dataset: " << datasetName << endl;
-        cout << "X: " << X.sizes() << endl;
-        cout << "y: " << y.sizes() << endl;
-        cout << "Xtrain: " << Xtrain.sizes() << endl;
-        cout << "ytrain: " << ytrain.sizes() << endl;
-        cout << "Xtest : " << Xtest.sizes() << endl;
-        cout << "ytest : " << ytest.sizes() << endl;
-        //
-        // STree
-        //
-        auto clf = pywrap::STree();
-        clf.fit(Xtrain, ytest);
-        double clf_score = clf.score(Xtest, ytest);
-        // auto stree = pywrap::STree();
-        // auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}");
-        // stree.setHyperparameters(hyperparameters);
-        // cout << "STree Version: " << clf.version() << endl;
-        // auto prediction = clf.predict(X);
-        // cout << "Prediction: " << endl << "{";
-        // for (int i = 0; i < prediction.size(0); ++i) {
-        //     cout << prediction[i].item<int>() << ", ";
-        // }
-        // cout << "}" << endl;
-        //
-        // SVC
-        //
-        // auto svc = pywrap::SVC();
-        // cout << "SVC with hyperparameters" << endl;
-        // svc.fit(Xtrain, ytrain);
-        //
-        // Random Forest
-        //
-        // cout << "Building Random Forest" << endl;
-        // auto rf = pywrap::RandomForest();
-        // rf.fit(Xtrain, ytrain);
-        //
-        // XGBoost
-        //
-        // cout << "Building XGBoost" << endl;
-        // auto xg = pywrap::XGBoost();
-        // cout << "Fitting XGBoost" << endl;
-        // xg.fit(Xtrain, ytrain);
-        // double xg_score = xg.score(Xtest, ytest);
-        //
-        // Scoring
-        //
-        cout << "Scoring dataset: " << datasetName << endl;
-        cout << "Scores:" << endl;
-        cout << "STree Score ......: " << clf_score << endl;
-        // cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
-        // cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
-        // cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl;
-        // cout << "SVC Score ........: " << svc.score(Xtest, ytest) << endl;
-        // cout << "XGBoost Score ....: " << xg_score << endl;
+    using namespace torch::indexing;
+    map<string, pywrap::Classifier*> classifiers = {
+        {"STree", new pywrap::STree()}, {"SVC", new pywrap::SVC()},
+        {"RandomForest", new pywrap::RandomForest()},// {"XGBoost", new XGBoost()},
+        {"ODTE", new pywrap::ODTE()}
+    };
+    //
+    // Load dataset
+    //
+    auto datasetName = "wine";
+    bool class_last = false;
+    auto [X, y] = loadDataset(datasetName, class_last);
+    //
+    // Split train/test
+    //
+    auto [train_indices, test_indices] = get_train_test_indices(X.size(1));
+    auto Xtrain = X.index({ "...", train_indices });
+    auto ytrain = y.index({ train_indices });
+    auto Xtest = X.index({ "...", test_indices });
+    auto ytest = y.index({ test_indices });
+    cout << "Dataset: " << datasetName << endl;
+    cout << "X: " << X.sizes() << endl;
+    cout << "y: " << y.sizes() << endl;
+    cout << "Xtrain: " << Xtrain.sizes() << endl;
+    cout << "ytrain: " << ytrain.sizes() << endl;
+    cout << "Xtest : " << Xtest.sizes() << endl;
+    cout << "ytest : " << ytest.sizes() << endl;
+    //
+    // Train classifiers
+    //
+    for (auto& [name, clf] : classifiers) {
+        cout << "Training " << name << endl;
+        clf->fit(Xtrain, ytrain);
+    }
+    //
+    // Show scores
+    //
+    for (auto& [name, clf] : classifiers) {
+        cout << "Score " << setw(10) << name << "(Ver. " << clf->version() << "): "
+            << clf->score(Xtest, ytest) << endl;
+    }
+    //
+    // Free classifiers
+    //
+    for (auto& [name, clf] : classifiers) {
+        delete clf;
    }
    cout << "* End." << endl;
 }