diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index 0791b96..be825a6 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -3,35 +3,45 @@ #include namespace CPPFImdlp { - CPPFImdlp::CPPFImdlp() + CPPFImdlp::CPPFImdlp() : debug(false), precision(6) { + divider = pow(10, precision); + } + CPPFImdlp::CPPFImdlp(int precision, bool debug) : debug(debug), precision(precision) + { + divider = pow(10, precision); } CPPFImdlp::~CPPFImdlp() { } - std::vector CPPFImdlp::cutPoints(std::vector &X, std::vector &y) + std::vector CPPFImdlp::cutPoints(std::vector &X, std::vector &y) { - std::vector cutPts; - double antx; - // int anty; + std::vector cutPts; + float antx, cutPoint; + int anty; std::vector indices = sortIndices(X); antx = X.at(indices[0]); - // anty = y.at(indices[0]); + anty = y.at(indices[0]); for (auto index = indices.begin(); index != indices.end(); ++index) { // std::cout << X.at(*index) << " -> " << y.at(*index) << " // "; // Definition 2 Cut points are always on boundaries - // if (y.at(*index) != anty && antx < X.at(*index)) + if (y.at(*index) != anty && antx < X.at(*index)) // Weka implementation - if (antx < X.at(*index)) + // if (antx < X.at(*index)) { - // std::cout << "* (" << X.at(*index) << ", " << antx << ") // "; - cutPts.push_back((X.at(*index) + antx) / 2); - // anty = y.at(*index); + cutPoint = round((X.at(*index) + antx) / 2 * divider) / divider; + if (debug) + { + std::cout << "Cut point: " << (antx + X.at(*index)) / 2 << " //"; + std::cout << X.at(*index) << " -> " << y.at(*index) << " anty= " << anty; + std::cout << "* (" << X.at(*index) << ", " << antx << ")=" << ((X.at(*index) + antx) / 2) << std::endl; + } + cutPts.push_back(cutPoint); } antx = X.at(*index); + anty = y.at(*index); } - // std::cout << std::endl; return cutPts; } std::vector CPPFImdlp::sortIndices(std::vector &X) diff --git a/fimdlp/CPPFImdlp.h b/fimdlp/CPPFImdlp.h index 7c01eb1..21febb8 100644 --- a/fimdlp/CPPFImdlp.h +++ b/fimdlp/CPPFImdlp.h @@ -8,12 +8,17 @@ namespace CPPFImdlp class CPPFImdlp { private: - std::vector sortIndices(std::vector &); + bool debug; + int precision; + float divider; + std::vector + sortIndices(std::vector &); public: CPPFImdlp(); + CPPFImdlp(int, bool debug = false); ~CPPFImdlp(); - std::vector cutPoints(std::vector &, std::vector &); + std::vector cutPoints(std::vector &, std::vector &); }; } #endif \ No newline at end of file diff --git a/fimdlp/cfimdlp.pyx b/fimdlp/cfimdlp.pyx index 33f7542..64ab59a 100644 --- a/fimdlp/cfimdlp.pyx +++ b/fimdlp/cfimdlp.pyx @@ -1,16 +1,18 @@ # distutils: language = c++ # cython: language_level = 3 from libcpp.vector cimport vector +from libcpp cimport bool cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp": cdef cppclass CPPFImdlp: CPPFImdlp() except + - vector[double] cutPoints(vector[float]&, vector[int]&) + CPPFImdlp(int, bool) except + + vector[float] cutPoints(vector[float]&, vector[int]&) cdef class CFImdlp: cdef CPPFImdlp *thisptr - def __cinit__(self): - self.thisptr = new CPPFImdlp() + def __cinit__(self, precision=6, debug=False): + self.thisptr = new CPPFImdlp(precision, debug) def __dealloc__(self): del self.thisptr def cut_points(self, X, y): diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index 633001c..e960773 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/mdlp.py b/fimdlp/mdlp.py index 14888b3..3959271 100644 --- a/fimdlp/mdlp.py +++ b/fimdlp/mdlp.py @@ -19,9 +19,6 @@ class FImdlp(TransformerMixin, BaseEstimator): The number of features of the data passed to :meth:`fit`. """ - def __init__(self): - pass - def _check_params_fit(self, X, y, expected_args, kwargs): """Check the common parameters passed to fit""" # Check that X and y have correct shape @@ -93,7 +90,7 @@ class FImdlp(TransformerMixin, BaseEstimator): # during fit. if X.shape[1] != self.n_features_: raise ValueError( - "Shape of input is different from what was seen" "in `fit`" + "Shape of input is different from what was seen in `fit`" ) print("Cut points for each feature in Iris dataset:") yz = self.y_.copy() diff --git a/sample.py b/sample.py index 162268e..911dee0 100644 --- a/sample.py +++ b/sample.py @@ -6,8 +6,8 @@ data = load_iris() X = data.data y = data.target features = data.feature_names -# test = FImdlp() +test = FImdlp() # Xcutpoints = test.fit(X, y, features=features).transform(X) -clf = CFImdlp() +clf = CFImdlp(debug=True) print("Cut points for feature 0 in Iris dataset:") print(clf.cut_points(X[:, 0], y))