diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index af89e5d..c3b650a 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -1,6 +1,7 @@ #include "CPPFImdlp.h" #include #include +#include #include "Metrics.h" namespace CPPFImdlp { @@ -20,7 +21,8 @@ namespace CPPFImdlp std::vector cutPts; std::vector cutIdx; float xPrev, cutPoint; - int yPrev, idxPrev; + int yPrev; + size_t idxPrev; std::vector indices = sortIndices(X); xPrev = X.at(indices[0]); yPrev = y.at(indices[0]); @@ -34,7 +36,7 @@ namespace CPPFImdlp // Definition 2 Cut points are always on boundaries if (y.at(*index) != yPrev && xPrev < X.at(*index)) { - cutPoint = round((X.at(*index) + xPrev) / 2 * divider) / divider; + cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider; if (debug) { std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //"; @@ -57,6 +59,13 @@ namespace CPPFImdlp std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl; // << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl; } + std::cout << "+++++++++++++++++++++++" << std::endl; + for (size_t i = 0; i < y.size(); i++) + { + printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]); + } + std::cout << "+++++++++++++++++++++++" << std::endl; + return cutPts; } // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes diff --git a/fimdlp/Metrics.cpp b/fimdlp/Metrics.cpp index 682b8f7..9613bd4 100644 --- a/fimdlp/Metrics.cpp +++ b/fimdlp/Metrics.cpp @@ -4,7 +4,7 @@ namespace CPPFImdlp Metrics::Metrics() { } - int Metrics::numClasses(std::vector &y, std::vector indices, int start, int end) + int Metrics::numClasses(std::vector &y, std::vector indices, size_t start, size_t end) { int nClasses = 1; int yAnt = y.at(start); @@ -18,7 +18,7 @@ namespace CPPFImdlp } return nClasses; } - float Metrics::entropy(std::vector &y, std::vector &indices, int start, int end, int nClasses) + float Metrics::entropy(std::vector &y, std::vector &indices, size_t start, size_t end, int nClasses) { float entropy = 0; int nElements = 0; @@ -38,7 +38,7 @@ namespace CPPFImdlp } return entropy; } - float Metrics::informationGain(std::vector &y, std::vector &indices, int start, int end, int cutPoint, int nClasses) + float Metrics::informationGain(std::vector &y, std::vector &indices, size_t start, size_t end, size_t cutPoint, int nClasses) { float iGain = 0.0; float entropy, entropyLeft, entropyRight; diff --git a/fimdlp/Metrics.h b/fimdlp/Metrics.h index 60ef9a5..96a9f44 100644 --- a/fimdlp/Metrics.h +++ b/fimdlp/Metrics.h @@ -9,9 +9,9 @@ namespace CPPFImdlp { public: Metrics(); - static int numClasses(std::vector &, std::vector, int, int); - static float entropy(std::vector &, std::vector &, int, int, int); - static float informationGain(std::vector &y, std::vector &indices, int start, int end, int cutPoint, int nClasses); + static int numClasses(std::vector &, std::vector, size_t, size_t); + static float entropy(std::vector &, std::vector &, size_t, size_t, int); + static float informationGain(std::vector &, std::vector &, size_t, size_t, size_t, int); }; } #endif \ No newline at end of file diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index 97e36f9..46640fb 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/mdlp.py b/fimdlp/mdlp.py index 1e35334..8f8399f 100644 --- a/fimdlp/mdlp.py +++ b/fimdlp/mdlp.py @@ -95,13 +95,21 @@ class FImdlp(TransformerMixin, BaseEstimator): print("Cut points for each feature in Iris dataset:") yz = self.y_.copy() xz = X[:, 0].copy() - print("Xz: ", xz) - print("Yz: ", yz) - print("Solución:") - print("Xz*: ", np.sort(X[:, 0])) - print("yz*: ", yz[np.argsort(X[:, 0])]) + xz = xz[np.argsort(X[:, 0])] + yz = yz[np.argsort(X[:, 0])] + cuts = [] + for i in range(1, len(yz)): + if yz[i] != yz[i - 1] and xz[i - 1] < xz[i]: + print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})") + cuts.append((xz[i] + xz[i - 1]) / 2) for i in range(0, 1): # self.n_features_): datax = np.sort(X[:, i]) Xcutpoints = self.discretizer_.cut_points(datax, self.y_) print(f"{self.features_[i]:20s}: {Xcutpoints}") + print("Solución cut_points: ", cuts) + print(xz) + print("***********") + for i in range(0, len(yz)): + print(f"({xz[i]}, {yz[i]})") + print("***********") return X diff --git a/sample.py b/sample.py index 911dee0..0382c30 100644 --- a/sample.py +++ b/sample.py @@ -7,7 +7,9 @@ X = data.data y = data.target features = data.feature_names test = FImdlp() -# Xcutpoints = test.fit(X, y, features=features).transform(X) +Xcutpoints = test.fit(X, y, features=features).transform(X) clf = CFImdlp(debug=True) print("Cut points for feature 0 in Iris dataset:") print(clf.cut_points(X[:, 0], y)) +print("Xcut") +print(Xcutpoints) diff --git a/setup.py b/setup.py index 7851132..b1ce695 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ setup( ], language="c++", include_dirs=["fimdlp"], - extra_compile_args=["-std=c++20"], + extra_compile_args=["-std=c++2a"], ), ] )