Refactor Classifier classes
This commit is contained in:
@@ -23,6 +23,7 @@ include(AddGitSubmodule)
|
|||||||
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
|
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
|
||||||
find_package(Torch REQUIRED)
|
find_package(Torch REQUIRED)
|
||||||
find_package(Boost REQUIRED COMPONENTS python3 numpy3)
|
find_package(Boost REQUIRED COMPONENTS python3 numpy3)
|
||||||
|
# find_package(xgboost REQUIRED)
|
||||||
|
|
||||||
# Temporary patch while find_package(Torch) is not fixed
|
# Temporary patch while find_package(Torch) is not fixed
|
||||||
file(
|
file(
|
||||||
|
@@ -5,7 +5,9 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include "Classifier.h"
|
||||||
#include "STree.h"
|
#include "STree.h"
|
||||||
|
#include "ODTE.h"
|
||||||
#include "SVC.h"
|
#include "SVC.h"
|
||||||
#include "RandomForest.h"
|
#include "RandomForest.h"
|
||||||
#include "XGBoost.h"
|
#include "XGBoost.h"
|
||||||
@@ -47,11 +49,22 @@ pair<torch::Tensor, torch::Tensor> get_train_test_indices(int size)
|
|||||||
shuffle(indices.begin(), indices.end(), std::default_random_engine(seed));
|
shuffle(indices.begin(), indices.end(), std::default_random_engine(seed));
|
||||||
auto train_indices = torch::zeros({ train_size }, torch::kInt32);
|
auto train_indices = torch::zeros({ train_size }, torch::kInt32);
|
||||||
auto test_indices = torch::zeros({ test_size }, torch::kInt32);
|
auto test_indices = torch::zeros({ test_size }, torch::kInt32);
|
||||||
|
int ti = 0, ei = 0;
|
||||||
|
cout << "Train indices [";
|
||||||
|
for (auto i = 0; i < train_size; ++i) {
|
||||||
|
cout << indices.at(i) << ", ";
|
||||||
|
}
|
||||||
|
cout << "]" << endl;
|
||||||
|
cout << "Test indices [";
|
||||||
|
for (auto i = train_size; i < size; ++i) {
|
||||||
|
cout << indices.at(i) << ", ";
|
||||||
|
}
|
||||||
|
cout << "]" << endl;
|
||||||
for (auto i = 0; i < size; ++i) {
|
for (auto i = 0; i < size; ++i) {
|
||||||
if (i < train_size) {
|
if (i < train_size) {
|
||||||
train_indices[i] = indices[i];
|
train_indices[ti++] = indices.at(i);
|
||||||
} else if (i < size) {
|
} else if (i < size) {
|
||||||
test_indices[i - train_size] = indices[i];
|
test_indices[ei++] = indices.at(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return { train_indices, test_indices };
|
return { train_indices, test_indices };
|
||||||
@@ -61,71 +74,52 @@ int main(int argc, char* argv[])
|
|||||||
{
|
{
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
cout << "* Begin." << endl;
|
cout << "* Begin." << endl;
|
||||||
{
|
using namespace torch::indexing;
|
||||||
using namespace torch::indexing;
|
map<string, pywrap::Classifier*> classifiers = {
|
||||||
auto datasetName = "wine";
|
{"STree", new pywrap::STree()}, {"SVC", new pywrap::SVC()},
|
||||||
bool class_last = true;
|
{"RandomForest", new pywrap::RandomForest()},// {"XGBoost", new XGBoost()},
|
||||||
auto [X, y] = loadDataset(datasetName, class_last);
|
{"ODTE", new pywrap::ODTE()}
|
||||||
// Split train/test
|
};
|
||||||
auto [train_indices, test_indices] = get_train_test_indices(X.size(1));
|
//
|
||||||
auto Xtrain = X.index({ "...", train_indices });
|
// Load dataset
|
||||||
auto ytrain = y.index({ train_indices });
|
//
|
||||||
auto Xtest = X.index({ "...", test_indices });
|
auto datasetName = "wine";
|
||||||
auto ytest = y.index({ test_indices });
|
bool class_last = false;
|
||||||
cout << "Dataset: " << datasetName << endl;
|
auto [X, y] = loadDataset(datasetName, class_last);
|
||||||
cout << "X: " << X.sizes() << endl;
|
//
|
||||||
cout << "y: " << y.sizes() << endl;
|
// Split train/test
|
||||||
cout << "Xtrain: " << Xtrain.sizes() << endl;
|
//
|
||||||
cout << "ytrain: " << ytrain.sizes() << endl;
|
auto [train_indices, test_indices] = get_train_test_indices(X.size(1));
|
||||||
cout << "Xtest : " << Xtest.sizes() << endl;
|
auto Xtrain = X.index({ "...", train_indices });
|
||||||
cout << "ytest : " << ytest.sizes() << endl;
|
auto ytrain = y.index({ train_indices });
|
||||||
//
|
auto Xtest = X.index({ "...", test_indices });
|
||||||
// STree
|
auto ytest = y.index({ test_indices });
|
||||||
//
|
cout << "Dataset: " << datasetName << endl;
|
||||||
auto clf = pywrap::STree();
|
cout << "X: " << X.sizes() << endl;
|
||||||
clf.fit(Xtrain, ytest);
|
cout << "y: " << y.sizes() << endl;
|
||||||
double clf_score = clf.score(Xtest, ytest);
|
cout << "Xtrain: " << Xtrain.sizes() << endl;
|
||||||
// auto stree = pywrap::STree();
|
cout << "ytrain: " << ytrain.sizes() << endl;
|
||||||
// auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}");
|
cout << "Xtest : " << Xtest.sizes() << endl;
|
||||||
// stree.setHyperparameters(hyperparameters);
|
cout << "ytest : " << ytest.sizes() << endl;
|
||||||
// cout << "STree Version: " << clf.version() << endl;
|
//
|
||||||
// auto prediction = clf.predict(X);
|
// Train classifiers
|
||||||
// cout << "Prediction: " << endl << "{";
|
//
|
||||||
// for (int i = 0; i < prediction.size(0); ++i) {
|
for (auto& [name, clf] : classifiers) {
|
||||||
// cout << prediction[i].item<int>() << ", ";
|
cout << "Training " << name << endl;
|
||||||
// }
|
clf->fit(Xtrain, ytrain);
|
||||||
// cout << "}" << endl;
|
}
|
||||||
//
|
//
|
||||||
// SVC
|
// Show scores
|
||||||
//
|
//
|
||||||
// auto svc = pywrap::SVC();
|
for (auto& [name, clf] : classifiers) {
|
||||||
// cout << "SVC with hyperparameters" << endl;
|
cout << "Score " << setw(10) << name << "(Ver. " << clf->version() << "): "
|
||||||
// svc.fit(Xtrain, ytrain);
|
<< clf->score(Xtest, ytest) << endl;
|
||||||
//
|
}
|
||||||
// Random Forest
|
//
|
||||||
//
|
// Free classifiers
|
||||||
// cout << "Building Random Forest" << endl;
|
//
|
||||||
// auto rf = pywrap::RandomForest();
|
for (auto& [name, clf] : classifiers) {
|
||||||
// rf.fit(Xtrain, ytrain);
|
delete clf;
|
||||||
//
|
|
||||||
// XGBoost
|
|
||||||
//
|
|
||||||
// cout << "Building XGBoost" << endl;
|
|
||||||
// auto xg = pywrap::XGBoost();
|
|
||||||
// cout << "Fitting XGBoost" << endl;
|
|
||||||
// xg.fit(Xtrain, ytrain);
|
|
||||||
// double xg_score = xg.score(Xtest, ytest);
|
|
||||||
//
|
|
||||||
// Scoring
|
|
||||||
//
|
|
||||||
cout << "Scoring dataset: " << datasetName << endl;
|
|
||||||
cout << "Scores:" << endl;
|
|
||||||
cout << "STree Score ......: " << clf_score << endl;
|
|
||||||
// cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
|
|
||||||
// cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
|
|
||||||
// cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl;
|
|
||||||
// cout << "SVC Score ........: " << svc.score(Xtest, ytest) << endl;
|
|
||||||
// cout << "XGBoost Score ....: " << xg_score << endl;
|
|
||||||
}
|
}
|
||||||
cout << "* End." << endl;
|
cout << "* End." << endl;
|
||||||
}
|
}
|
@@ -3,5 +3,6 @@ include_directories(${PyWrap_SOURCE_DIR}/lib/json/include)
|
|||||||
include_directories(${Python3_INCLUDE_DIRS})
|
include_directories(${Python3_INCLUDE_DIRS})
|
||||||
include_directories(${TORCH_INCLUDE_DIRS})
|
include_directories(${TORCH_INCLUDE_DIRS})
|
||||||
|
|
||||||
add_library(PyWrap SHARED PyWrap.cc STree.cc SVC.cc RandomForest.cc PyClassifier.cc)
|
add_library(PyWrap SHARED PyWrap.cc STree.cc ODTE.cc SVC.cc RandomForest.cc PyClassifier.cc)
|
||||||
|
#target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy xgboost::xgboost ArffFiles)
|
||||||
target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles)
|
target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles)
|
@@ -1,13 +1,25 @@
|
|||||||
#ifndef CLASSIFER_H
|
#ifndef CLASSIFIER_H
|
||||||
#define CLASSIFER_H
|
#define CLASSIFIER_H
|
||||||
|
#include <torch/torch.h>
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace pywrap {
|
namespace pywrap {
|
||||||
class Classifier {
|
class Classifier {
|
||||||
public:
|
public:
|
||||||
Classifier() = default;
|
Classifier() = default;
|
||||||
virtual ~Classifier() = default;
|
virtual ~Classifier() = default;
|
||||||
|
virtual Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||||
|
virtual Classifier& fit(torch::Tensor& X, torch::Tensor& y) = 0;
|
||||||
|
virtual torch::Tensor predict(torch::Tensor& X) = 0;
|
||||||
|
virtual double score(torch::Tensor& X, torch::Tensor& y) = 0;
|
||||||
|
virtual std::string version() = 0;
|
||||||
|
virtual std::string sklearnVersion() = 0;
|
||||||
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
||||||
|
protected:
|
||||||
|
virtual void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters) = 0;
|
||||||
};
|
};
|
||||||
} /* namespace pywrap */
|
} /* namespace pywrap */
|
||||||
#endif /* CLASSIFER_H */
|
#endif /* CLASSIFIER_H */
|
15
src/ODTE.cc
Normal file
15
src/ODTE.cc
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#include "ODTE.h"
|
||||||
|
|
||||||
|
namespace pywrap {
|
||||||
|
std::string ODTE::graph()
|
||||||
|
{
|
||||||
|
return callMethodString("graph");
|
||||||
|
}
|
||||||
|
void ODTE::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||||
|
{
|
||||||
|
// Check if hyperparameters are valid
|
||||||
|
const std::vector<std::string> validKeys = { "n_jobs", "n_estimators", "random_state" };
|
||||||
|
checkHyperparameters(validKeys, hyperparameters);
|
||||||
|
this->hyperparameters = hyperparameters;
|
||||||
|
}
|
||||||
|
} /* namespace pywrap */
|
15
src/ODTE.h
Normal file
15
src/ODTE.h
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#ifndef ODTE_H
|
||||||
|
#define ODTE_H
|
||||||
|
#include "nlohmann/json.hpp"
|
||||||
|
#include "PyClassifier.h"
|
||||||
|
|
||||||
|
namespace pywrap {
|
||||||
|
class ODTE : public PyClassifier {
|
||||||
|
public:
|
||||||
|
ODTE() : PyClassifier("odte", "Odte") {};
|
||||||
|
~ODTE() = default;
|
||||||
|
std::string graph();
|
||||||
|
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||||
|
};
|
||||||
|
} /* namespace pywrap */
|
||||||
|
#endif /* ODTE_H */
|
@@ -31,6 +31,10 @@ namespace pywrap {
|
|||||||
{
|
{
|
||||||
return pyWrap->version(id);
|
return pyWrap->version(id);
|
||||||
}
|
}
|
||||||
|
std::string PyClassifier::sklearnVersion()
|
||||||
|
{
|
||||||
|
return pyWrap->sklearnVersion();
|
||||||
|
}
|
||||||
std::string PyClassifier::callMethodString(const std::string& method)
|
std::string PyClassifier::callMethodString(const std::string& method)
|
||||||
{
|
{
|
||||||
return pyWrap->callMethodString(id, method);
|
return pyWrap->callMethodString(id, method);
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
#ifndef PYCLASSIFER_H
|
#ifndef PYCLASSIFIER_H
|
||||||
#define PYCLASSIFER_H
|
#define PYCLASSIFIER_H
|
||||||
#include "boost/python/detail/wrap_python.hpp"
|
#include "boost/python/detail/wrap_python.hpp"
|
||||||
#include <boost/python/numpy.hpp>
|
#include <boost/python/numpy.hpp>
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
@@ -17,15 +17,16 @@ namespace pywrap {
|
|||||||
public:
|
public:
|
||||||
PyClassifier(const std::string& module, const std::string& className);
|
PyClassifier(const std::string& module, const std::string& className);
|
||||||
virtual ~PyClassifier();
|
virtual ~PyClassifier();
|
||||||
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states);
|
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||||
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y);
|
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y) override;
|
||||||
torch::Tensor predict(torch::Tensor& X);
|
torch::Tensor predict(torch::Tensor& X) override;
|
||||||
double score(torch::Tensor& X, torch::Tensor& y);
|
double score(torch::Tensor& X, torch::Tensor& y) override;
|
||||||
std::string version();
|
std::string version() override;
|
||||||
|
std::string sklearnVersion() override;
|
||||||
std::string callMethodString(const std::string& method);
|
std::string callMethodString(const std::string& method);
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||||
protected:
|
protected:
|
||||||
void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters);
|
void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters) override;
|
||||||
nlohmann::json hyperparameters;
|
nlohmann::json hyperparameters;
|
||||||
private:
|
private:
|
||||||
PyWrap* pyWrap;
|
PyWrap* pyWrap;
|
||||||
@@ -35,4 +36,4 @@ namespace pywrap {
|
|||||||
bool fitted;
|
bool fitted;
|
||||||
};
|
};
|
||||||
} /* namespace pywrap */
|
} /* namespace pywrap */
|
||||||
#endif /* PYCLASSIFER_H */
|
#endif /* PYCLASSIFIER_H */
|
@@ -42,7 +42,6 @@ namespace pywrap {
|
|||||||
if (result != moduleClassMap.end()) {
|
if (result != moduleClassMap.end()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::cout << "1a" << std::endl;
|
|
||||||
PyObject* module = PyImport_ImportModule(moduleName.c_str());
|
PyObject* module = PyImport_ImportModule(moduleName.c_str());
|
||||||
if (PyErr_Occurred()) {
|
if (PyErr_Occurred()) {
|
||||||
errorAbort("Couldn't import module " + moduleName);
|
errorAbort("Couldn't import module " + moduleName);
|
||||||
@@ -107,6 +106,13 @@ namespace pywrap {
|
|||||||
Py_XDECREF(result);
|
Py_XDECREF(result);
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
std::string PyWrap::sklearnVersion()
|
||||||
|
{
|
||||||
|
return "1.0";
|
||||||
|
// CPyObject data = PyRun_SimpleString("import sklearn;return sklearn.__version__");
|
||||||
|
// std::string result = PyUnicode_AsUTF8(data);
|
||||||
|
// return result;
|
||||||
|
}
|
||||||
std::string PyWrap::version(const clfId_t id)
|
std::string PyWrap::version(const clfId_t id)
|
||||||
{
|
{
|
||||||
return callMethodString(id, "version");
|
return callMethodString(id, "version");
|
||||||
|
@@ -24,6 +24,7 @@ namespace pywrap {
|
|||||||
void operator=(const PyWrap&) = delete;
|
void operator=(const PyWrap&) = delete;
|
||||||
~PyWrap() = default;
|
~PyWrap() = default;
|
||||||
std::string callMethodString(const clfId_t id, const std::string& method);
|
std::string callMethodString(const clfId_t id, const std::string& method);
|
||||||
|
std::string sklearnVersion();
|
||||||
std::string version(const clfId_t id);
|
std::string version(const clfId_t id);
|
||||||
void setHyperparameters(const clfId_t id, const json& hyperparameters);
|
void setHyperparameters(const clfId_t id, const json& hyperparameters);
|
||||||
void fit(const clfId_t id, CPyObject& X, CPyObject& y);
|
void fit(const clfId_t id, CPyObject& X, CPyObject& y);
|
||||||
|
@@ -3,6 +3,6 @@
|
|||||||
namespace pywrap {
|
namespace pywrap {
|
||||||
std::string RandomForest::version()
|
std::string RandomForest::version()
|
||||||
{
|
{
|
||||||
return callMethodString("1.0");
|
return sklearnVersion();
|
||||||
}
|
}
|
||||||
} /* namespace pywrap */
|
} /* namespace pywrap */
|
@@ -3,7 +3,7 @@
|
|||||||
namespace pywrap {
|
namespace pywrap {
|
||||||
std::string SVC::version()
|
std::string SVC::version()
|
||||||
{
|
{
|
||||||
return callMethodString("1.0");
|
return sklearnVersion();
|
||||||
}
|
}
|
||||||
void SVC::setHyperparameters(const nlohmann::json& hyperparameters)
|
void SVC::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user