diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index be825a6..990cd6a 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -1,6 +1,7 @@ #include "CPPFImdlp.h" #include #include +#include "Metrics.h" namespace CPPFImdlp { CPPFImdlp::CPPFImdlp() : debug(false), precision(6) @@ -17,33 +18,35 @@ namespace CPPFImdlp std::vector CPPFImdlp::cutPoints(std::vector &X, std::vector &y) { std::vector cutPts; - float antx, cutPoint; - int anty; + float xPrev, cutPoint; + int yPrev; std::vector indices = sortIndices(X); - antx = X.at(indices[0]); - anty = y.at(indices[0]); + xPrev = X.at(indices[0]); + yPrev = y.at(indices[0]); + if (debug) + { + std::cout << "Entropy: " << Metrics::entropy(y, 0, y.size(), Metrics::numClasses(y)) << std::endl; + } for (auto index = indices.begin(); index != indices.end(); ++index) { - // std::cout << X.at(*index) << " -> " << y.at(*index) << " // "; // Definition 2 Cut points are always on boundaries - if (y.at(*index) != anty && antx < X.at(*index)) - // Weka implementation - // if (antx < X.at(*index)) + if (y.at(*index) != yPrev && xPrev < X.at(*index)) { - cutPoint = round((X.at(*index) + antx) / 2 * divider) / divider; + cutPoint = round((X.at(*index) + xPrev) / 2 * divider) / divider; if (debug) { - std::cout << "Cut point: " << (antx + X.at(*index)) / 2 << " //"; - std::cout << X.at(*index) << " -> " << y.at(*index) << " anty= " << anty; - std::cout << "* (" << X.at(*index) << ", " << antx << ")=" << ((X.at(*index) + antx) / 2) << std::endl; + std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //"; + std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev; + std::cout << "* (" << X.at(*index) << ", " << xPrev << ")=" << ((X.at(*index) + xPrev) / 2) << std::endl; } cutPts.push_back(cutPoint); } - antx = X.at(*index); - anty = y.at(*index); + xPrev = X.at(*index); + yPrev = y.at(*index); } return cutPts; } + // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes std::vector CPPFImdlp::sortIndices(std::vector &X) { std::vector idx(X.size()); diff --git a/fimdlp/Metrics.cpp b/fimdlp/Metrics.cpp new file mode 100644 index 0000000..81ec93c --- /dev/null +++ b/fimdlp/Metrics.cpp @@ -0,0 +1,40 @@ +#include "Metrics.h" +namespace CPPFImdlp +{ + Metrics::Metrics() + { + } + float Metrics::entropy(std::vector &y, int start, int end, int nClasses) + { + float entropy = 0; + int nElements = end - start; + std::vector + counts(nClasses, 0); + for (auto i = start; i < end; i++) + { + counts[y[i]]++; + } + for (auto i = 0; i < nClasses; i++) + { + if (counts[i] > 0) + { + float p = (float)counts[i] / nElements; + entropy -= p * log2(p); + } + } + return entropy; + } + int Metrics::numClasses(std::vector &y) + { + int nClasses = 1; + int yAnt = y.at(0); + for (auto i = y.begin(); i != y.end(); ++i) + { + if (*i != yAnt) + { + nClasses++; + } + } + return nClasses; + } +} diff --git a/fimdlp/Metrics.h b/fimdlp/Metrics.h new file mode 100644 index 0000000..f4da032 --- /dev/null +++ b/fimdlp/Metrics.h @@ -0,0 +1,16 @@ +#ifndef METRICS_H +#define METRICS_H +#include +#include +#include +namespace CPPFImdlp +{ + class Metrics + { + public: + Metrics(); + static float entropy(std::vector &, int, int, int); + static int numClasses(std::vector &); + }; +} +#endif \ No newline at end of file diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index e960773..cb7a931 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/mdlp.py b/fimdlp/mdlp.py index 3959271..1e35334 100644 --- a/fimdlp/mdlp.py +++ b/fimdlp/mdlp.py @@ -95,10 +95,8 @@ class FImdlp(TransformerMixin, BaseEstimator): print("Cut points for each feature in Iris dataset:") yz = self.y_.copy() xz = X[:, 0].copy() - xzz = self.discretizer_.sort_vectors(xz, yz) print("Xz: ", xz) print("Yz: ", yz) - print("Xzz: ", xzz) print("Solución:") print("Xz*: ", np.sort(X[:, 0])) print("yz*: ", yz[np.argsort(X[:, 0])]) diff --git a/setup.py b/setup.py index 0c813a3..7851132 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ setup( sources=[ "fimdlp/cfimdlp.pyx", "fimdlp/CPPFImdlp.cpp", + "fimdlp/Metrics.cpp", ], language="c++", include_dirs=["fimdlp"],