diff --git a/debug.cpp b/debug.cpp new file mode 100644 index 0000000..042092e --- /dev/null +++ b/debug.cpp @@ -0,0 +1,14 @@ +std::cout << "+++++++++++++++++++++++" << std::endl; +for (size_t i = 0; i < y.size(); i++) +{ + printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]); +} +std::cout << "+++++++++++++++++++++++" << std::endl; + +std::cout << "Information Gain:" << std::endl; +auto nc = Metrics::numClasses(y, indices, 0, indices.size()); +for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint) +{ + std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl; + // << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl; +} \ No newline at end of file diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index b6d671a..d9a49ec 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -53,20 +53,6 @@ namespace CPPFImdlp yPrev = y.at(*index); idxPrev = *index; } - std::cout << "Information Gain:" << std::endl; - auto nc = Metrics::numClasses(y, indices, 0, indices.size()); - for (auto cutPoint = cutIdx.begin(); cutPoint != cutIdx.end(); ++cutPoint) - { - std::cout << *cutPoint << " -> " << Metrics::informationGain(y, indices, 0, indices.size(), *cutPoint, nc) << std::endl; - // << Metrics::informationGain(y, 0, y.size(), *cutPoint, Metrics::numClasses(y, 0, y.size())) << std::endl; - } - std::cout << "+++++++++++++++++++++++" << std::endl; - for (size_t i = 0; i < y.size(); i++) - { - printf("(%3.1f, %d)\n", X[indices.at(i)], y[indices.at(i)]); - } - std::cout << "+++++++++++++++++++++++" << std::endl; - return cutPts; } // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index 98de2ce..887b6f8 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/mdlp.py b/fimdlp/mdlp.py index 8f8399f..38e1e6e 100644 --- a/fimdlp/mdlp.py +++ b/fimdlp/mdlp.py @@ -62,7 +62,6 @@ class FImdlp(TransformerMixin, BaseEstimator): self.X_ = X self.y_ = y self.discretizer_ = CFImdlp() - return self def transform(self, X): @@ -92,7 +91,7 @@ class FImdlp(TransformerMixin, BaseEstimator): raise ValueError( "Shape of input is different from what was seen in `fit`" ) - print("Cut points for each feature in Iris dataset:") + print("Calculating cut points in python for first feature") yz = self.y_.copy() xz = X[:, 0].copy() xz = xz[np.argsort(X[:, 0])] @@ -102,14 +101,12 @@ class FImdlp(TransformerMixin, BaseEstimator): if yz[i] != yz[i - 1] and xz[i - 1] < xz[i]: print(f"Cut point: ({xz[i-1]}, {xz[i]}) ({yz[i-1]}, {yz[i]})") cuts.append((xz[i] + xz[i - 1]) / 2) - for i in range(0, 1): # self.n_features_): - datax = np.sort(X[:, i]) - Xcutpoints = self.discretizer_.cut_points(datax, self.y_) + print("Cuts calculados en python: ", cuts) + print("Cuts calculados en C++") + print("Cut points for each feature in Iris dataset:") + for i in range(0, self.n_features_): + datax = self.X_[np.argsort(self.X_[:, i]), i] + y_ = self.y_[np.argsort(self.X_[:, i])] + Xcutpoints = self.discretizer_.cut_points(datax, y_) print(f"{self.features_[i]:20s}: {Xcutpoints}") - print("Solución cut_points: ", cuts) - print(xz) - print("***********") - for i in range(0, len(yz)): - print(f"({xz[i]}, {yz[i]})") - print("***********") return X diff --git a/sample.py b/sample.py index 0382c30..f95e615 100644 --- a/sample.py +++ b/sample.py @@ -7,9 +7,4 @@ X = data.data y = data.target features = data.feature_names test = FImdlp() -Xcutpoints = test.fit(X, y, features=features).transform(X) -clf = CFImdlp(debug=True) -print("Cut points for feature 0 in Iris dataset:") -print(clf.cut_points(X[:, 0], y)) -print("Xcut") -print(Xcutpoints) +test.fit(X, y, features=features).transform(X)