diff --git a/Makefile b/Makefile index ee03ff6..95c87ef 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,9 @@ buildext: ## Build extension rm -fr build/* make clean python setup.py build_ext - echo "Build extension success"; mv build/lib.macosx-12-x86_64-cpython-310/cppfimdlp.cpython-310-darwin.so fimdlp; + echo "Build extension success" + if [ -f build/lib.macosx-12-x86_64-cpython-310/cppfimdlp.cpython-310-darwin.so ] ; then mv build/lib.macosx-12-x86_64-cpython-310/cppfimdlp.cpython-310-darwin.so fimdlp; fi + if [ -f build/lib.macosx-10.9-universal2-3.10/cppfimdlp.cpython-310-darwin.so ] ; then mv build/lib.macosx-10.9-universal2-3.10/cppfimdlp.cpython-310-darwin.so fimdlp; fi audit: ## Audit pip pip-audit diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index d9a49ec..516e958 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -18,6 +18,55 @@ namespace CPPFImdlp { } std::vector CPPFImdlp::cutPoints(std::vector &X, std::vector &y) + { + std::vector cutPts; + std::vector cutIdx; + float xPrev, cutPoint, curx; + int yPrev, cury; + size_t idxPrev, idx; + bool first = true; + std::vector indices = sortIndices(X); + xPrev = X.at(indices.at(0)); + yPrev = y.at(indices.at(0)); + idxPrev = indices.at(0); + idx = 0; + while (idx < indices.size() - 1) + { + if (first) + { + first = false; + curx = X.at(indices.at(idx)); + cury = y.at(indices.at(idx)); + } + if (debug) + printf(" (%3.1f, %d) Prev(%3.1f, %d)\n", idx, curx, cury, xPrev, yPrev); + // Read the same values and check class changes + while (idx < indices.size() - 1 && curx == xPrev) + { + idx++; + curx = X.at(indices.at(idx)); + cury = y.at(indices.at(idx)); + if (cury != yPrev && curx == xPrev) + { + yPrev = -1; + } + if (debug) + printf(">idx=%lu -> (%3.1f, %d) Prev(%3.1f, %d)\n", idx, curx, cury, xPrev, yPrev); + } + if (yPrev == -1 || yPrev != cury) + { + cutPoint = (xPrev + curx) / 2; + printf("Cutpoint (%3.1f, %d) -> (%3.1f, %d) = %3.1f", xPrev, yPrev, curx, cury, cutPoint); + cutPts.push_back(cutPoint); + cutIdx.push_back(idxPrev); + } + yPrev = cury; + xPrev = curx; + idxPrev = indices.at(idx); + } + return cutPts; + } + std::vector CPPFImdlp::cutPointsAnt(std::vector &X, std::vector &y) { std::vector cutPts; std::vector cutIdx; diff --git a/fimdlp/CPPFImdlp.h b/fimdlp/CPPFImdlp.h index 21febb8..dbfb4c4 100644 --- a/fimdlp/CPPFImdlp.h +++ b/fimdlp/CPPFImdlp.h @@ -19,6 +19,7 @@ namespace CPPFImdlp CPPFImdlp(int, bool debug = false); ~CPPFImdlp(); std::vector cutPoints(std::vector &, std::vector &); + std::vector cutPointsAnt(std::vector &, std::vector &); }; } #endif \ No newline at end of file diff --git a/fimdlp/cfimdlp.pyx b/fimdlp/cfimdlp.pyx index 64ab59a..34fc88c 100644 --- a/fimdlp/cfimdlp.pyx +++ b/fimdlp/cfimdlp.pyx @@ -8,6 +8,7 @@ cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp": CPPFImdlp() except + CPPFImdlp(int, bool) except + vector[float] cutPoints(vector[float]&, vector[int]&) + vector[float] cutPointsAnt(vector[float]&, vector[int]&) cdef class CFImdlp: cdef CPPFImdlp *thisptr @@ -17,3 +18,5 @@ cdef class CFImdlp: del self.thisptr def cut_points(self, X, y): return self.thisptr.cutPoints(X, y) + def cut_points_ant(self, X, y): + return self.thisptr.cutPointsAnt(X, y) diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index 887b6f8..1681329 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/mdlp.py b/fimdlp/mdlp.py index 38e1e6e..4f0b227 100644 --- a/fimdlp/mdlp.py +++ b/fimdlp/mdlp.py @@ -61,7 +61,7 @@ class FImdlp(TransformerMixin, BaseEstimator): self.n_features_ = X.shape[1] self.X_ = X self.y_ = y - self.discretizer_ = CFImdlp() + self.discretizer_ = CFImdlp(debug=True) return self def transform(self, X): @@ -108,5 +108,7 @@ class FImdlp(TransformerMixin, BaseEstimator): datax = self.X_[np.argsort(self.X_[:, i]), i] y_ = self.y_[np.argsort(self.X_[:, i])] Xcutpoints = self.discretizer_.cut_points(datax, y_) - print(f"{self.features_[i]:20s}: {Xcutpoints}") + print(f"New:{self.features_[i]:20s}: {Xcutpoints}") + Xcutpoints = self.discretizer_.cut_points_ant(datax, y_) + print(f"Ant:{self.features_[i]:20s}: {Xcutpoints}") return X diff --git a/sample.py b/sample.py index f95e615..3768a2a 100644 --- a/sample.py +++ b/sample.py @@ -1,10 +1,22 @@ from sklearn.datasets import load_iris from fimdlp.mdlp import FImdlp from fimdlp.cppfimdlp import CFImdlp +import numpy as np data = load_iris() X = data.data y = data.target features = data.feature_names test = FImdlp() -test.fit(X, y, features=features).transform(X) +# test.fit(X, y, features=features).transform(X) + +X = np.array( + [ + [5.1, 3.5, 1.4, 0.2], + [5.2, 3.0, 1.4, 0.2], + [5.3, 3.2, 1.3, 0.2], + [5.3, 3.1, 1.5, 0.2], + ] +) +y = np.array([0, 0, 0, 1]) +test.fit(X, y).transform(X)