#include #include "ArffFiles.h" #include #include #include #include #include #include "STree.h" #include "SVC.h" #include "RandomForest.h" #include "XGBoost.h" using namespace std; using namespace torch; class Paths { public: static string datasets() { return "../discretizbench/datasets/"; } }; tuple loadDataset(const string& name, bool class_last) { auto handler = ArffFiles(); handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); // Get Dataset X, y vector> X = handler.getX(); vector y = handler.getY(); Tensor Xd; Xd = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kFloat32); for (int i = 0; i < X.size(); ++i) { Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32)); } return { Xd, torch::tensor(y, torch::kInt32) }; } int main(int argc, char* argv[]) { using json = nlohmann::json; cout << "* Begin." << endl; { using namespace torch::indexing; auto datasetName = "iris"; bool class_last = true; auto [X, y] = loadDataset(datasetName, class_last); auto m = y.size(0); int train_split = m * .7; auto Xtrain = X.index({ "...", Slice(0, train_split) }); auto ytrain = y.index({ Slice(0, train_split) }); auto Xtest = X.index({ "...", Slice(train_split, m) }); auto ytest = y.index({ Slice(train_split, m) }); cout << "Dataset: " << datasetName << endl; cout << "X: " << X.sizes() << endl; cout << "y: " << y.sizes() << endl; cout << "Xtrain: " << Xtrain.sizes() << endl; cout << "ytrain: " << ytrain.sizes() << endl; cout << "Xtest : " << Xtest.sizes() << endl; cout << "ytest : " << ytest.sizes() << endl; // // STree // auto clf = pywrap::STree(); clf.fit(X, y); double clf_score = clf.score(X, y); auto stree = pywrap::STree(); auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}"); stree.setHyperparameters(hyperparameters); cout << "STree Version: " << clf.version() << endl; auto prediction = clf.predict(X); cout << "Prediction: " << endl << "{"; for (int i = 0; i < prediction.size(0); ++i) { cout << prediction[i].item() << ", "; } cout << "}" << endl; // // SVC // auto svc = pywrap::SVC(); cout << "SVC with hyperparameters" << endl; svc.fit(X, y); // // Random Forest // cout << "Building Random Forest" << endl; auto rf = pywrap::RandomForest(); rf.fit(Xtrain, ytrain); // // XGBoost // cout << "Building XGBoost" << endl; auto xg = pywrap::XGBoost(); cout << "Fitting XGBoost" << endl; // xg.fit(Xtrain, ytrain); // double xg_score = xg.score(Xtest, ytest); // // Scoring // cout << "Scoring dataset: " << datasetName << endl; cout << "Scores:" << endl; cout << "STree Score ......: " << clf_score << endl; cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl; cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl; cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl; cout << "SVC Score ........: " << svc.score(X, y) << endl; // cout << "XGBoost Score ....: " << xg_score << endl; } cout << "* End." << endl; }