125 lines
3.7 KiB
C++
125 lines
3.7 KiB
C++
#include <torch/torch.h>
|
|
#include "ArffFiles.h"
|
|
#include <random>
|
|
#include <algorithm>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <iostream>
|
|
#include "Classifier.h"
|
|
#include "STree.h"
|
|
#include "ODTE.h"
|
|
#include "SVC.h"
|
|
#include "RandomForest.h"
|
|
#include "XGBoost.h"
|
|
|
|
using namespace std;
|
|
using namespace torch;
|
|
|
|
class Paths {
|
|
public:
|
|
static string datasets()
|
|
{
|
|
return "../discretizbench/datasets/";
|
|
}
|
|
};
|
|
|
|
tuple<Tensor, Tensor> loadDataset(const string& name, bool class_last)
|
|
{
|
|
auto handler = ArffFiles();
|
|
handler.load(Paths::datasets() + static_cast<string>(name) + ".arff", class_last);
|
|
// Get Dataset X, y
|
|
vector<vector<float>> X = handler.getX();
|
|
vector<int> y = handler.getY();
|
|
Tensor Xd;
|
|
Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
|
|
for (int i = 0; i < X.size(); ++i) {
|
|
Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32));
|
|
}
|
|
return { Xd, torch::tensor(y, torch::kInt32) };
|
|
}
|
|
|
|
pair<torch::Tensor, torch::Tensor> get_train_test_indices(int size)
|
|
{
|
|
int seed = 17;
|
|
float train_size_p = 0.7;
|
|
int train_size = static_cast<int>(size * train_size_p);
|
|
int test_size = size - train_size;
|
|
std::vector<int> indices(size);
|
|
std::iota(indices.begin(), indices.end(), 0);
|
|
shuffle(indices.begin(), indices.end(), std::default_random_engine(seed));
|
|
auto train_indices = torch::zeros({ train_size }, torch::kInt32);
|
|
auto test_indices = torch::zeros({ test_size }, torch::kInt32);
|
|
int ti = 0, ei = 0;
|
|
cout << "Train indices [";
|
|
for (auto i = 0; i < train_size; ++i) {
|
|
cout << indices.at(i) << ", ";
|
|
}
|
|
cout << "]" << endl;
|
|
cout << "Test indices [";
|
|
for (auto i = train_size; i < size; ++i) {
|
|
cout << indices.at(i) << ", ";
|
|
}
|
|
cout << "]" << endl;
|
|
for (auto i = 0; i < size; ++i) {
|
|
if (i < train_size) {
|
|
train_indices[ti++] = indices.at(i);
|
|
} else if (i < size) {
|
|
test_indices[ei++] = indices.at(i);
|
|
}
|
|
}
|
|
return { train_indices, test_indices };
|
|
}
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
using json = nlohmann::json;
|
|
cout << "* Begin." << endl;
|
|
using namespace torch::indexing;
|
|
map<string, pywrap::Classifier*> classifiers = {
|
|
{"STree", new pywrap::STree()}, {"SVC", new pywrap::SVC()},
|
|
{"RandomForest", new pywrap::RandomForest()},// {"XGBoost", new XGBoost()},
|
|
{"ODTE", new pywrap::ODTE()}
|
|
};
|
|
//
|
|
// Load dataset
|
|
//
|
|
auto datasetName = "wine";
|
|
bool class_last = false;
|
|
auto [X, y] = loadDataset(datasetName, class_last);
|
|
//
|
|
// Split train/test
|
|
//
|
|
auto [train_indices, test_indices] = get_train_test_indices(X.size(1));
|
|
auto Xtrain = X.index({ "...", train_indices });
|
|
auto ytrain = y.index({ train_indices });
|
|
auto Xtest = X.index({ "...", test_indices });
|
|
auto ytest = y.index({ test_indices });
|
|
cout << "Dataset: " << datasetName << endl;
|
|
cout << "X: " << X.sizes() << endl;
|
|
cout << "y: " << y.sizes() << endl;
|
|
cout << "Xtrain: " << Xtrain.sizes() << endl;
|
|
cout << "ytrain: " << ytrain.sizes() << endl;
|
|
cout << "Xtest : " << Xtest.sizes() << endl;
|
|
cout << "ytest : " << ytest.sizes() << endl;
|
|
//
|
|
// Train classifiers
|
|
//
|
|
for (auto& [name, clf] : classifiers) {
|
|
cout << "Training " << name << endl;
|
|
clf->fit(Xtrain, ytrain);
|
|
}
|
|
//
|
|
// Show scores
|
|
//
|
|
for (auto& [name, clf] : classifiers) {
|
|
cout << "Score " << setw(10) << name << "(Ver. " << clf->version() << "): "
|
|
<< clf->score(Xtest, ytest) << endl;
|
|
}
|
|
//
|
|
// Free classifiers
|
|
//
|
|
for (auto& [name, clf] : classifiers) {
|
|
delete clf;
|
|
}
|
|
cout << "* End." << endl;
|
|
} |