new cutPoints algo complete

2025-08-16 16:05:52 +00:00 · 2022-11-29 17:26:09 +01:00
parent 36c5930c5e
commit c965d392c4
8 changed files with 66 additions and 46 deletions
--- a/fimdlp/CPPFImdlp.cpp
+++ b/fimdlp/CPPFImdlp.cpp
@@ -21,48 +21,47 @@ namespace CPPFImdlp
    {
        std::vector<float> cutPts;
        std::vector<size_t> cutIdx;
-        float xPrev, cutPoint, curx;
-        int yPrev, cury;
-        size_t idxPrev, idx;
-        bool first = true;
+        float xPrev, xCur, xPivot, cutPoint;
+        int yPrev, yCur, yPivot;
+        size_t idxPrev, idx, numElements;
        std::vector<size_t> indices = sortIndices(X);
-        xPrev = X.at(indices.at(0));
-        yPrev = y.at(indices.at(0));
-        idxPrev = indices.at(0);
+        xCur = xPrev = X.at(indices.at(0));
+        yCur = yPrev = y.at(indices.at(0));
+        numElements = indices.size() - 1;
+        // idxPrev = indices.at(0);
        idx = 0;
-        while (idx < indices.size() - 1)
+        if (debug)
+            printf("*idx=%lu -> (-1, -1) Prev(%3.1f, %d) Elementos: %lu\n", idx, xCur, yCur, numElements);
+        while (idx < numElements)
        {
-            if (first)
-            {
-                first = false;
-                curx = X.at(indices.at(idx));
-                cury = y.at(indices.at(idx));
-            }
+            xPivot = xCur;
+            yPivot = yCur;
            if (debug)
-                printf("<idx=%lu -> (%3.1f, %d) Prev(%3.1f, %d)\n", idx, curx, cury, xPrev, yPrev);
+                printf("<idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
            // Read the same values and check class changes
-            while (idx < indices.size() - 1 && curx == xPrev)
+            do
            {
                idx++;
-                curx = X.at(indices.at(idx));
-                cury = y.at(indices.at(idx));
-                if (cury != yPrev && curx == xPrev)
+                xCur = X.at(indices.at(idx));
+                yCur = y.at(indices.at(idx));
+                if (yCur != yPivot && xCur == xPivot)
                {
-                    yPrev = -1;
+                    yPivot = -1;
                }
                if (debug)
-                    printf(">idx=%lu -> (%3.1f, %d) Prev(%3.1f, %d)\n", idx, curx, cury, xPrev, yPrev);
-            }
-            if (yPrev == -1 || yPrev != cury)
+                    printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
+            } while (idx < numElements && xCur == xPivot);
+            if (yPivot == -1 || yPrev != yCur)
            {
-                cutPoint = (xPrev + curx) / 2;
-                printf("Cutpoint (%3.1f, %d) -> (%3.1f, %d) = %3.1f", xPrev, yPrev, curx, cury, cutPoint);
+                cutPoint = (xPrev + xCur) / 2;
+                if (debug)
+                    printf("Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = %3.1f \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint);
                cutPts.push_back(cutPoint);
-                cutIdx.push_back(idxPrev);
+                // cutIdx.push_back(idxPrev);
            }
-            yPrev = cury;
-            xPrev = curx;
-            idxPrev = indices.at(idx);
+            yPrev = yPivot;
+            xPrev = xPivot;
+            // idxPrev = indices.at(idxPivot);
        }
        return cutPts;
    }
--- a/fimdlp/CPPFImdlp.h
+++ b/fimdlp/CPPFImdlp.h
@@ -1,7 +1,6 @@
 #ifndef CPPFIMDLP_H
 #define CPPFIMDLP_H
 #include <vector>
-#include <Python.h>
 #include <utility>
 namespace CPPFImdlp
 {
--- a/fimdlp/Metrics.h
+++ b/fimdlp/Metrics.h
@@ -1,8 +1,6 @@
 #ifndef METRICS_H
 #define METRICS_H
 #include <vector>
-#include <Python.h>
-#include <utility>
 namespace CPPFImdlp
 {
    class Metrics
--- a/fimdlp/cppfimdlp.cpython-310-darwin.so
+++ b/fimdlp/cppfimdlp.cpython-310-darwin.so
--- a/fimdlp/mdlp.py
+++ b/fimdlp/mdlp.py
@@ -61,7 +61,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
        self.n_features_ = X.shape[1]
        self.X_ = X
        self.y_ = y
-        self.discretizer_ = CFImdlp(debug=True)
+        self.discretizer_ = CFImdlp(debug=False)
        return self

    def transform(self, X):
@@ -108,7 +108,13 @@ class FImdlp(TransformerMixin, BaseEstimator):
            datax = self.X_[np.argsort(self.X_[:, i]), i]
            y_ = self.y_[np.argsort(self.X_[:, i])]
            Xcutpoints = self.discretizer_.cut_points(datax, y_)
-            print(f"New:{self.features_[i]:20s}: {Xcutpoints}")
+            print(
+                f"New ({len(Xcutpoints)}):{self.features_[i]:20s}: "
+                f"{Xcutpoints}"
+            )
            Xcutpoints = self.discretizer_.cut_points_ant(datax, y_)
-            print(f"Ant:{self.features_[i]:20s}: {Xcutpoints}")
+            print(
+                f"Ant ({len(Xcutpoints)}):{self.features_[i]:20s}: "
+                f"{Xcutpoints}"
+            )
        return X
--- a/fimdlp/test
+++ b/fimdlp/test
--- a/fimdlp/test.cpp
+++ b/fimdlp/test.cpp
@@ -0,0 +1,18 @@
+#include "CPPFImdlp.h"
+#include <iostream>
+
+using namespace std;
+int main(int argc, char *argv[], char *envp[])
+{
+    {
+        CPPFImdlp::CPPFImdlp fimdlp = CPPFImdlp::CPPFImdlp(true);
+        vector<float> X = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+        vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+        vector<float> cutPts = fimdlp.cutPoints(X, y);
+        for (auto &cutPt : cutPts)
+        {
+            cout << cutPt << endl;
+        }
+        return 0;
+    }
+}
--- a/sample.py
+++ b/sample.py
@@ -8,15 +8,15 @@ X = data.data
 y = data.target
 features = data.feature_names
 test = FImdlp()
-# test.fit(X, y, features=features).transform(X)
+test.fit(X, y, features=features).transform(X)

-X = np.array(
-    [
-        [5.1, 3.5, 1.4, 0.2],
-        [5.2, 3.0, 1.4, 0.2],
-        [5.3, 3.2, 1.3, 0.2],
-        [5.3, 3.1, 1.5, 0.2],
-    ]
-)
-y = np.array([0, 0, 0, 1])
-test.fit(X, y).transform(X)
+# X = np.array(
+#     [
+#         [5.1, 3.5, 1.4, 0.2],
+#         [5.2, 3.0, 1.4, 0.2],
+#         [5.3, 3.2, 1.3, 0.2],
+#         [5.3, 3.1, 1.5, 0.2],
+#     ]
+# )
+# y = np.array([0, 0, 0, 1])
+# test.fit(X, y).transform(X)