From a3bf97e5010043fbf5b416a2b697e2bf9d6b2e2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sat, 11 Nov 2023 11:19:33 +0100 Subject: [PATCH] refactor fit parameters --- example/example.cc | 91 ++++++++++++++++++++++++++------------------- src/PyClassifier.cc | 6 ++- src/PyClassifier.h | 1 + 3 files changed, 58 insertions(+), 40 deletions(-) diff --git a/example/example.cc b/example/example.cc index 3d17294..1394a6a 100644 --- a/example/example.cc +++ b/example/example.cc @@ -21,25 +21,19 @@ public: } }; -tuple, string, map>> loadDataset(const string& name, bool class_last) +tuple loadDataset(const string& name, bool class_last) { auto handler = ArffFiles(); handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); // Get Dataset X, y vector> X = handler.getX(); vector y = handler.getY(); - // Get className & Features - auto className = handler.getClassName(); - vector features; - auto attributes = handler.getAttributes(); - transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); Tensor Xd; - auto states = map>(); Xd = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kFloat32); - for (int i = 0; i < features.size(); ++i) { + for (int i = 0; i < X.size(); ++i) { Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32)); } - return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; + return { Xd, torch::tensor(y, torch::kInt32) }; } int main(int argc, char* argv[]) @@ -50,7 +44,7 @@ int main(int argc, char* argv[]) using namespace torch::indexing; auto datasetName = "iris"; bool class_last = true; - auto [X, y, features, className, states] = loadDataset(datasetName, class_last); + auto [X, y] = loadDataset(datasetName, class_last); auto m = y.size(0); int train_split = m * .7; auto Xtrain = X.index({ "...", Slice(0, train_split) }); @@ -60,38 +54,57 @@ int main(int argc, char* argv[]) cout << "Dataset: " << datasetName << endl; cout << "X: " << X.sizes() << endl; cout << "y: " << y.sizes() << endl; - // auto clf = pywrap::STree(); - // auto stree = pywrap::STree(); - // auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}"); - // stree.setHyperparameters(hyperparameters); - // cout << "STree Version: " << clf.version() << endl; - // auto svc = pywrap::SVC(); - // cout << "SVC with hyperparameters" << endl; - // svc.fit(X, y, features, className, states); - // cout << "Graph: " << endl << clf.graph() << endl; - // double clf_score = clf.fit(X, y, features, className, states).score(X, y); - // double stree_score = stree.fit(X, y, features, className, states).score(X, y); - // auto prediction = clf.predict(X); - // cout << "Prediction: " << endl << "{"; - // for (int i = 0; i < prediction.size(0); ++i) { - // cout << prediction[i].item() << ", "; - // } - // cout << "}" << endl; - // cout << "Building Random Forest" << endl; - // auto rf = pywrap::RandomForest(); - // rf.fit(X, y, features, className, states); + cout << "Xtrain: " << Xtrain.sizes() << endl; + cout << "ytrain: " << ytrain.sizes() << endl; + cout << "Xtest : " << Xtest.sizes() << endl; + cout << "ytest : " << ytest.sizes() << endl; + // + // STree + // + auto clf = pywrap::STree(); + clf.fit(X, y); + double clf_score = clf.score(X, y); + auto stree = pywrap::STree(); + auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}"); + stree.setHyperparameters(hyperparameters); + cout << "STree Version: " << clf.version() << endl; + auto prediction = clf.predict(X); + cout << "Prediction: " << endl << "{"; + for (int i = 0; i < prediction.size(0); ++i) { + cout << prediction[i].item() << ", "; + } + cout << "}" << endl; + // + // SVC + // + auto svc = pywrap::SVC(); + cout << "SVC with hyperparameters" << endl; + svc.fit(X, y); + // + // Random Forest + // + cout << "Building Random Forest" << endl; + auto rf = pywrap::RandomForest(); + rf.fit(Xtrain, ytrain); + // + // XGBoost + // cout << "Building XGBoost" << endl; auto xg = pywrap::XGBoost(); cout << "Fitting XGBoost" << endl; - xg.fit(Xtrain, ytrain, features, className, states); - cout << "Scoring dataset" << endl; - double xg_score = xg.score(Xtest, ytest); - // cout << "Scores:" << endl; - // cout << "STree Score ......: " << clf_score << endl; - // cout << "STree hyper score : " << stree_score << endl; - // cout << "RandomForest Score: " << rf.score(X, y) << endl; - // cout << "SVC Score ........: " << svc.score(X, y) << endl; - cout << "XGBoost Score ....: " << xg_score << endl; + // xg.fit(Xtrain, ytrain); + // double xg_score = xg.score(Xtest, ytest); + // + // Scoring + // + cout << "Scoring dataset: " << datasetName << endl; + cout << "Scores:" << endl; + cout << "STree Score ......: " << clf_score << endl; + cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl; + cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl; + cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl; + cout << "SVC Score ........: " << svc.score(X, y) << endl; + // cout << "XGBoost Score ....: " << xg_score << endl; } cout << "* End." << endl; } \ No newline at end of file diff --git a/src/PyClassifier.cc b/src/PyClassifier.cc index 16de281..312202e 100644 --- a/src/PyClassifier.cc +++ b/src/PyClassifier.cc @@ -35,7 +35,7 @@ namespace pywrap { { return pyWrap->callMethodString(id, method); } - PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) + PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y) { if (!fitted && hyperparameters.size() > 0) { pyWrap->setHyperparameters(id, hyperparameters); @@ -47,6 +47,10 @@ namespace pywrap { fitted = true; return *this; } + PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) + { + return fit(X, y); + } torch::Tensor PyClassifier::predict(torch::Tensor& X) { int dimension = X.size(1); diff --git a/src/PyClassifier.h b/src/PyClassifier.h index 8072e59..39dff49 100644 --- a/src/PyClassifier.h +++ b/src/PyClassifier.h @@ -18,6 +18,7 @@ namespace pywrap { PyClassifier(const std::string& module, const std::string& className); virtual ~PyClassifier(); PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states); + PyClassifier& fit(torch::Tensor& X, torch::Tensor& y); torch::Tensor predict(torch::Tensor& X); double score(torch::Tensor& X, torch::Tensor& y); std::string version();