diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index 5f9039b..0791b96 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -1,4 +1,6 @@ #include "CPPFImdlp.h" +#include +#include namespace CPPFImdlp { CPPFImdlp::CPPFImdlp() @@ -7,23 +9,38 @@ namespace CPPFImdlp CPPFImdlp::~CPPFImdlp() { } - std::vector CPPFImdlp::cutPoints(std::vector &X, std::vector &y) + std::vector CPPFImdlp::cutPoints(std::vector &X, std::vector &y) { - std::vector cutPts; - int i, ant = X.at(0), anty = y.at(0); - int n = X.size(); - for (i = 1; i < n; i++) + std::vector cutPts; + double antx; + // int anty; + std::vector indices = sortIndices(X); + antx = X.at(indices[0]); + // anty = y.at(indices[0]); + for (auto index = indices.begin(); index != indices.end(); ++index) { - if (X.at(i) != ant) + // std::cout << X.at(*index) << " -> " << y.at(*index) << " // "; + // Definition 2 Cut points are always on boundaries + // if (y.at(*index) != anty && antx < X.at(*index)) + // Weka implementation + if (antx < X.at(*index)) { - if (y.at(i) != anty) - { - cutPts.push_back(float(X.at(i) + ant) / 2); - ant = X.at(i); - anty = y.at(i); - } + // std::cout << "* (" << X.at(*index) << ", " << antx << ") // "; + cutPts.push_back((X.at(*index) + antx) / 2); + // anty = y.at(*index); } + antx = X.at(*index); } + // std::cout << std::endl; return cutPts; } -} \ No newline at end of file + std::vector CPPFImdlp::sortIndices(std::vector &X) + { + std::vector idx(X.size()); + std::iota(idx.begin(), idx.end(), 0); + for (std::size_t i = 0; i < X.size(); i++) + stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2) + { return X[i1] < X[i2]; }); + return idx; + } +} diff --git a/fimdlp/CPPFImdlp.h b/fimdlp/CPPFImdlp.h index 81f589a..7c01eb1 100644 --- a/fimdlp/CPPFImdlp.h +++ b/fimdlp/CPPFImdlp.h @@ -2,14 +2,18 @@ #define CPPFIMDLP_H #include #include +#include namespace CPPFImdlp { class CPPFImdlp { + private: + std::vector sortIndices(std::vector &); + public: CPPFImdlp(); ~CPPFImdlp(); - std::vector cutPoints(std::vector &, std::vector &); + std::vector cutPoints(std::vector &, std::vector &); }; } #endif \ No newline at end of file diff --git a/fimdlp/cfimdlp.pyx b/fimdlp/cfimdlp.pyx index 8317808..33f7542 100644 --- a/fimdlp/cfimdlp.pyx +++ b/fimdlp/cfimdlp.pyx @@ -5,7 +5,7 @@ from libcpp.vector cimport vector cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp": cdef cppclass CPPFImdlp: CPPFImdlp() except + - vector[float] cutPoints(vector[int]&, vector[int]&) + vector[double] cutPoints(vector[float]&, vector[int]&) cdef class CFImdlp: cdef CPPFImdlp *thisptr diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index 4e5cb5b..633001c 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/mdlp.py b/fimdlp/mdlp.py index 34584d0..14888b3 100644 --- a/fimdlp/mdlp.py +++ b/fimdlp/mdlp.py @@ -96,8 +96,17 @@ class FImdlp(TransformerMixin, BaseEstimator): "Shape of input is different from what was seen" "in `fit`" ) print("Cut points for each feature in Iris dataset:") - for i in range(0, self.n_features_): - data = np.sort(X[:, i]) - Xcutpoints = self.discretizer_.cut_points(data, self.y_) + yz = self.y_.copy() + xz = X[:, 0].copy() + xzz = self.discretizer_.sort_vectors(xz, yz) + print("Xz: ", xz) + print("Yz: ", yz) + print("Xzz: ", xzz) + print("Solución:") + print("Xz*: ", np.sort(X[:, 0])) + print("yz*: ", yz[np.argsort(X[:, 0])]) + for i in range(0, 1): # self.n_features_): + datax = np.sort(X[:, i]) + Xcutpoints = self.discretizer_.cut_points(datax, self.y_) print(f"{self.features_[i]:20s}: {Xcutpoints}") return X diff --git a/sample.py b/sample.py index 85f43d8..162268e 100644 --- a/sample.py +++ b/sample.py @@ -1,9 +1,13 @@ from sklearn.datasets import load_iris from fimdlp.mdlp import FImdlp +from fimdlp.cppfimdlp import CFImdlp data = load_iris() X = data.data y = data.target features = data.feature_names -test = FImdlp() -Xcutpoints = test.fit(X, y, features=features).transform(X) +# test = FImdlp() +# Xcutpoints = test.fit(X, y, features=features).transform(X) +clf = CFImdlp() +print("Cut points for feature 0 in Iris dataset:") +print(clf.cut_points(X[:, 0], y)) diff --git a/setup.py b/setup.py index 6e9a25d..0c813a3 100644 --- a/setup.py +++ b/setup.py @@ -10,9 +10,13 @@ setup( ext_modules=[ Extension( name="cppfimdlp", - sources=["fimdlp/cfimdlp.pyx", "fimdlp/CPPFImdlp.cpp"], + sources=[ + "fimdlp/cfimdlp.pyx", + "fimdlp/CPPFImdlp.cpp", + ], language="c++", include_dirs=["fimdlp"], + extra_compile_args=["-std=c++20"], ), ] )