#include #include "ArffFiles.h" #include #include #include #include #include #include "Classifier.h" #include "STree.h" #include "ODTE.h" #include "SVC.h" #include "RandomForest.h" #include "XGBoost.h" using namespace std; using namespace torch; class Paths { public: static string datasets() { return "../discretizbench/datasets/"; } }; tuple loadDataset(const string& name, bool class_last) { auto handler = ArffFiles(); handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); // Get Dataset X, y vector> X = handler.getX(); vector y = handler.getY(); Tensor Xd; Xd = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kFloat32); for (int i = 0; i < X.size(); ++i) { Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32)); } return { Xd, torch::tensor(y, torch::kInt32) }; } pair get_train_test_indices(int size) { int seed = 17; float train_size_p = 0.7; int train_size = static_cast(size * train_size_p); int test_size = size - train_size; std::vector indices(size); std::iota(indices.begin(), indices.end(), 0); shuffle(indices.begin(), indices.end(), std::default_random_engine(seed)); auto train_indices = torch::zeros({ train_size }, torch::kInt32); auto test_indices = torch::zeros({ test_size }, torch::kInt32); int ti = 0, ei = 0; cout << "Train indices ["; for (auto i = 0; i < train_size; ++i) { cout << indices.at(i) << ", "; } cout << "]" << endl; cout << "Test indices ["; for (auto i = train_size; i < size; ++i) { cout << indices.at(i) << ", "; } cout << "]" << endl; for (auto i = 0; i < size; ++i) { if (i < train_size) { train_indices[ti++] = indices.at(i); } else if (i < size) { test_indices[ei++] = indices.at(i); } } return { train_indices, test_indices }; } int main(int argc, char* argv[]) { using json = nlohmann::json; cout << "* Begin." << endl; using namespace torch::indexing; map classifiers = { {"STree", new pywrap::STree()}, {"SVC", new pywrap::SVC()}, {"RandomForest", new pywrap::RandomForest()},// {"XGBoost", new XGBoost()}, {"ODTE", new pywrap::ODTE()} }; // // Load dataset // auto datasetName = "wine"; bool class_last = false; auto [X, y] = loadDataset(datasetName, class_last); // // Split train/test // auto [train_indices, test_indices] = get_train_test_indices(X.size(1)); auto Xtrain = X.index({ "...", train_indices }); auto ytrain = y.index({ train_indices }); auto Xtest = X.index({ "...", test_indices }); auto ytest = y.index({ test_indices }); cout << "Dataset: " << datasetName << endl; cout << "X: " << X.sizes() << endl; cout << "y: " << y.sizes() << endl; cout << "Xtrain: " << Xtrain.sizes() << endl; cout << "ytrain: " << ytrain.sizes() << endl; cout << "Xtest : " << Xtest.sizes() << endl; cout << "ytest : " << ytest.sizes() << endl; // // Train classifiers // for (auto& [name, clf] : classifiers) { cout << "Training " << name << endl; clf->fit(Xtrain, ytrain); } // // Show scores // for (auto& [name, clf] : classifiers) { cout << "Score " << setw(10) << name << "(Ver. " << clf->version() << "): " << clf->score(Xtest, ytest) << endl; } // // Free classifiers // for (auto& [name, clf] : classifiers) { delete clf; } cout << "* End." << endl; }