From b6a3a05020e86d9ed4d0d90fea74ea97c44d73c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sat, 11 Nov 2023 10:52:35 +0100 Subject: [PATCH] refactor folders --- CMakeLists.txt | 3 +- example/CMakeLists.txt | 8 ++++ example/example.cc | 97 ++++++++++++++++++++++++++++++++++++++++++ pspp.jnl | 3 -- src/CMakeLists.txt | 6 +-- src/PyClassifier.cc | 4 +- src/PyWrap.cc | 15 +++---- src/TypeId.h | 4 +- src/example.cc | 84 ------------------------------------ 9 files changed, 119 insertions(+), 105 deletions(-) create mode 100644 example/CMakeLists.txt create mode 100644 example/example.cc delete mode 100644 pspp.jnl delete mode 100644 src/example.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c9142b..c7227b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,4 +40,5 @@ add_subdirectory(lib/Files) # Include directories # ------------------- -add_subdirectory(src) \ No newline at end of file +add_subdirectory(src) +add_subdirectory(example) \ No newline at end of file diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt new file mode 100644 index 0000000..32366c8 --- /dev/null +++ b/example/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories(${PyWrap_SOURCE_DIR}/lib/Files) +include_directories(${PyWrap_SOURCE_DIR}/lib/json/include) +include_directories(${Python3_INCLUDE_DIRS}) +include_directories(${PyWrap_SOURCE_DIR}/src) +include_directories(${TORCH_INCLUDE_DIRS}) + +add_executable(example example.cc) +target_link_libraries(example PyWrap) \ No newline at end of file diff --git a/example/example.cc b/example/example.cc new file mode 100644 index 0000000..3d17294 --- /dev/null +++ b/example/example.cc @@ -0,0 +1,97 @@ +#include +#include "ArffFiles.h" +#include +#include +#include +#include +#include +#include "STree.h" +#include "SVC.h" +#include "RandomForest.h" +#include "XGBoost.h" + +using namespace std; +using namespace torch; + +class Paths { +public: + static string datasets() + { + return "../discretizbench/datasets/"; + } +}; + +tuple, string, map>> loadDataset(const string& name, bool class_last) +{ + auto handler = ArffFiles(); + handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); + // Get Dataset X, y + vector> X = handler.getX(); + vector y = handler.getY(); + // Get className & Features + auto className = handler.getClassName(); + vector features; + auto attributes = handler.getAttributes(); + transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); + Tensor Xd; + auto states = map>(); + Xd = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kFloat32); + for (int i = 0; i < features.size(); ++i) { + Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32)); + } + return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; +} + +int main(int argc, char* argv[]) +{ + using json = nlohmann::json; + cout << "* Begin." << endl; + { + using namespace torch::indexing; + auto datasetName = "iris"; + bool class_last = true; + auto [X, y, features, className, states] = loadDataset(datasetName, class_last); + auto m = y.size(0); + int train_split = m * .7; + auto Xtrain = X.index({ "...", Slice(0, train_split) }); + auto ytrain = y.index({ Slice(0, train_split) }); + auto Xtest = X.index({ "...", Slice(train_split, m) }); + auto ytest = y.index({ Slice(train_split, m) }); + cout << "Dataset: " << datasetName << endl; + cout << "X: " << X.sizes() << endl; + cout << "y: " << y.sizes() << endl; + // auto clf = pywrap::STree(); + // auto stree = pywrap::STree(); + // auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}"); + // stree.setHyperparameters(hyperparameters); + // cout << "STree Version: " << clf.version() << endl; + // auto svc = pywrap::SVC(); + // cout << "SVC with hyperparameters" << endl; + // svc.fit(X, y, features, className, states); + // cout << "Graph: " << endl << clf.graph() << endl; + // double clf_score = clf.fit(X, y, features, className, states).score(X, y); + // double stree_score = stree.fit(X, y, features, className, states).score(X, y); + // auto prediction = clf.predict(X); + // cout << "Prediction: " << endl << "{"; + // for (int i = 0; i < prediction.size(0); ++i) { + // cout << prediction[i].item() << ", "; + // } + // cout << "}" << endl; + // cout << "Building Random Forest" << endl; + // auto rf = pywrap::RandomForest(); + // rf.fit(X, y, features, className, states); + cout << "Building XGBoost" << endl; + auto xg = pywrap::XGBoost(); + cout << "Fitting XGBoost" << endl; + xg.fit(Xtrain, ytrain, features, className, states); + cout << "Scoring dataset" << endl; + double xg_score = xg.score(Xtest, ytest); + // cout << "Scores:" << endl; + // cout << "STree Score ......: " << clf_score << endl; + // cout << "STree hyper score : " << stree_score << endl; + // cout << "RandomForest Score: " << rf.score(X, y) << endl; + // cout << "SVC Score ........: " << svc.score(X, y) << endl; + cout << "XGBoost Score ....: " << xg_score << endl; + } + cout << "* End." << endl; +} \ No newline at end of file diff --git a/pspp.jnl b/pspp.jnl deleted file mode 100644 index 3072ca7..0000000 --- a/pspp.jnl +++ /dev/null @@ -1,3 +0,0 @@ -GET FILE="/home/rmontanana/Code/covbench/data/covid_v9_20220630.sav". -SHOW SYSTEM. -SHOW SYSTEM. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index afb0379..6970ec1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,9 +3,5 @@ include_directories(${PyWrap_SOURCE_DIR}/lib/json/include) include_directories(${Python3_INCLUDE_DIRS}) include_directories(${TORCH_INCLUDE_DIRS}) - add_library(PyWrap SHARED PyWrap.cc STree.cc SVC.cc RandomForest.cc PyClassifier.cc) -target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles) - -add_executable(example example.cc) -target_link_libraries(example PyWrap) \ No newline at end of file +target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles) \ No newline at end of file diff --git a/src/PyClassifier.cc b/src/PyClassifier.cc index 8fb9c02..16de281 100644 --- a/src/PyClassifier.cc +++ b/src/PyClassifier.cc @@ -61,8 +61,8 @@ namespace pywrap { throw std::runtime_error("Error creating object for predict in " + module + " and class " + className); } int* data = reinterpret_cast(prediction.get_data()); - std::vector v1(data, data + prediction.shape(0)); - auto resultTensor = torch::tensor(v1, torch::kInt32); + std::vector vPrediction(data, data + prediction.shape(0)); + auto resultTensor = torch::tensor(vPrediction, torch::kInt32); Py_XDECREF(incoming); return resultTensor; } diff --git a/src/PyWrap.cc b/src/PyWrap.cc index 0a131a2..647cdff 100644 --- a/src/PyWrap.cc +++ b/src/PyWrap.cc @@ -142,24 +142,22 @@ namespace pywrap { { PyObject* instance = getClass(id); CPyObject result; - std::string method = "fit"; + CPyObject method = PyUnicode_FromString("fit"); try { - if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), X.getObject(), y.getObject(), NULL))) + if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL))) errorAbort("Couldn't call method fit"); } catch (const std::exception& e) { errorAbort(e.what()); } - // Py_XDECREF(result); } - PyObject* PyWrap::predict(const clfId_t id, CPyObject& X) { PyObject* instance = getClass(id); PyObject* result; - std::string method = "predict"; + CPyObject method = PyUnicode_FromString("predict"); try { - if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), X.getObject(), NULL))) + if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), NULL))) errorAbort("Couldn't call method predict"); } catch (const std::exception& e) { @@ -172,16 +170,15 @@ namespace pywrap { { PyObject* instance = getClass(id); CPyObject result; - std::string method = "score"; + CPyObject method = PyUnicode_FromString("score"); try { - if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), X.getObject(), y.getObject(), NULL))) + if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL))) errorAbort("Couldn't call method score"); } catch (const std::exception& e) { errorAbort(e.what()); } double resultValue = PyFloat_AsDouble(result); - // Py_XDECREF(result); return resultValue; } } \ No newline at end of file diff --git a/src/TypeId.h b/src/TypeId.h index 54b08a4..d519367 100644 --- a/src/TypeId.h +++ b/src/TypeId.h @@ -1,4 +1,6 @@ #ifndef TYPEDEF_H #define TYPEDEF_H -typedef uint64_t clfId_t; +namespace pywrap { + typedef uint64_t clfId_t; +} #endif /* TYPEDEF_H */ \ No newline at end of file diff --git a/src/example.cc b/src/example.cc deleted file mode 100644 index f5983cc..0000000 --- a/src/example.cc +++ /dev/null @@ -1,84 +0,0 @@ -#include -#include "ArffFiles.h" -#include -#include -#include -#include -#include -#include "STree.h" -#include "SVC.h" -#include "RandomForest.h" -#include "XGBoost.h" - -using namespace std; -using namespace torch; - -class Paths { -public: - static string datasets() - { - return "../discretizbench/datasets/"; - } -}; - -tuple, string, map>> loadDataset(const string& name, bool class_last) -{ - auto handler = ArffFiles(); - handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); - // Get Dataset X, y - vector> X = handler.getX(); - vector y = handler.getY(); - // Get className & Features - auto className = handler.getClassName(); - vector features; - auto attributes = handler.getAttributes(); - transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); - Tensor Xd; - auto states = map>(); - Xd = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kFloat32); - for (int i = 0; i < features.size(); ++i) { - Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32)); - } - return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; -} - -int main(int argc, char* argv[]) -{ - using json = nlohmann::json; - cout << "* Begin." << endl; - { - auto datasetName = "iris"; - bool class_last = true; - auto [X, y, features, className, states] = loadDataset(datasetName, class_last); - cout << "Dataset: " << datasetName << endl; - cout << "X: " << X.sizes() << endl; - cout << "y: " << y.sizes() << endl; - auto clf = pywrap::STree(); - auto stree = pywrap::STree(); - auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}"); - stree.setHyperparameters(hyperparameters); - cout << "STree Version: " << clf.version() << endl; - auto svc = pywrap::SVC(); - cout << "SVC with hyperparameters" << endl; - svc.fit(X, y, features, className, states); - cout << "Graph: " << endl << clf.graph() << endl; - clf.fit(X, y, features, className, states); - stree.fit(X, y, features, className, states); - auto prediction = clf.predict(X); - cout << "Prediction: " << endl << "{"; - for (int i = 0; i < prediction.size(0); ++i) { - cout << prediction[i].item() << ", "; - } - cout << "}" << endl; - auto rf = pywrap::RandomForest(); - rf.fit(X, y, features, className, states); - auto xg = pywrap::XGBoost(); - xg.fit(X, y, features, className, states); - cout << "STree Score ......: " << clf.score(X, y) << endl; - cout << "STree hyper score : " << stree.score(X, y) << endl; - cout << "RandomForest Score: " << rf.score(X, y) << endl; - cout << "SVC Score ........: " << svc.score(X, y) << endl; - cout << "XGBoost Score ....: " << xg.score(X, y) << endl; - } - cout << "* End." << endl; -} \ No newline at end of file