PyWrap/example/example.cc

#include <torch/torch.h>
#include "ArffFiles.h"
#include <vector>
#include <string>
#include <iostream>
#include <map>
#include <tuple>
#include "STree.h"
#include "SVC.h"
#include "RandomForest.h"
#include "XGBoost.h"

using namespace std;
using namespace torch;

class Paths {
public:
    static string datasets()
    {
        return "../discretizbench/datasets/";
    }
};

tuple<Tensor, Tensor> loadDataset(const string& name, bool class_last)
{
    auto handler = ArffFiles();
    handler.load(Paths::datasets() + static_cast<string>(name) + ".arff", class_last);
    // Get Dataset X, y
    vector<vector<float>> X = handler.getX();
    vector<int> y = handler.getY();
    Tensor Xd;
    Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
    for (int i = 0; i < X.size(); ++i) {
        Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32));
    }
    return { Xd, torch::tensor(y, torch::kInt32) };
}

int main(int argc, char* argv[])
{
    using json = nlohmann::json;
    cout << "* Begin." << endl;
    {
        using namespace torch::indexing;
        auto datasetName = "iris";
        bool class_last = true;
        auto [X, y] = loadDataset(datasetName, class_last);
        auto m = y.size(0);
        int train_split = m * .7;
        auto Xtrain = X.index({ "...", Slice(0, train_split) });
        auto ytrain = y.index({ Slice(0, train_split) });
        auto Xtest = X.index({ "...", Slice(train_split, m) });
        auto ytest = y.index({ Slice(train_split, m) });
        cout << "Dataset: " << datasetName << endl;
        cout << "X: " << X.sizes() << endl;
        cout << "y: " << y.sizes() << endl;
        cout << "Xtrain: " << Xtrain.sizes() << endl;
        cout << "ytrain: " << ytrain.sizes() << endl;
        cout << "Xtest : " << Xtest.sizes() << endl;
        cout << "ytest : " << ytest.sizes() << endl;
        //
        // STree
        //
        auto clf = pywrap::STree();
        clf.fit(X, y);
        double clf_score = clf.score(X, y);
        auto stree = pywrap::STree();
        auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}");
        stree.setHyperparameters(hyperparameters);
        cout << "STree Version: " << clf.version() << endl;
        auto prediction = clf.predict(X);
        cout << "Prediction: " << endl << "{";
        for (int i = 0; i < prediction.size(0); ++i) {
            cout << prediction[i].item<int>() << ", ";
        }
        cout << "}" << endl;
        //
        // SVC
        //
        auto svc = pywrap::SVC();
        cout << "SVC with hyperparameters" << endl;
        svc.fit(X, y);
        //
        // Random Forest
        //
        cout << "Building Random Forest" << endl;
        auto rf = pywrap::RandomForest();
        rf.fit(Xtrain, ytrain);
        //
        // XGBoost
        //
        cout << "Building XGBoost" << endl;
        auto xg = pywrap::XGBoost();
        cout << "Fitting XGBoost" << endl;
        // xg.fit(Xtrain, ytrain);
        // double xg_score = xg.score(Xtest, ytest);
        //
        // Scoring
        //
        cout << "Scoring dataset: " << datasetName << endl;
        cout << "Scores:" << endl;
        cout << "STree Score ......: " << clf_score << endl;
        cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
        cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
        cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl;
        cout << "SVC Score ........: " << svc.score(X, y) << endl;
        // cout << "XGBoost Score ....: " << xg_score << endl;
    }
    cout << "* End." << endl;
}