First try new cutPoints algorithm

2025-08-17 16:35:52 +00:00 · 2022-11-29 01:38:19 +01:00
parent 182c949713
commit 36c5930c5e
7 changed files with 73 additions and 4 deletions
--- a/4
+++ b/4
@@ -24,7 +24,9 @@ buildext:  ## Build extension
 	rm -fr build/*
 	make clean
 	python setup.py build_ext
-	echo "Build extension success"; mv build/lib.macosx-12-x86_64-cpython-310/cppfimdlp.cpython-310-darwin.so fimdlp;
+	echo "Build extension success"
 	if [ -f build/lib.macosx-12-x86_64-cpython-310/cppfimdlp.cpython-310-darwin.so ] ; then mv build/lib.macosx-12-x86_64-cpython-310/cppfimdlp.cpython-310-darwin.so fimdlp; fi
 	if [ -f build/lib.macosx-10.9-universal2-3.10/cppfimdlp.cpython-310-darwin.so ] ; then mv build/lib.macosx-10.9-universal2-3.10/cppfimdlp.cpython-310-darwin.so fimdlp; fi
 audit: ## Audit pip
 	pip-audit
--- a/fimdlp/CPPFImdlp.cpp
+++ b/fimdlp/CPPFImdlp.cpp
@@ -18,6 +18,55 @@ namespace CPPFImdlp
    {
    }
    std::vector<float> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
    {
        std::vector<float> cutPts;
        std::vector<size_t> cutIdx;
        float xPrev, cutPoint, curx;
        int yPrev, cury;
        size_t idxPrev, idx;
        bool first = true;
        std::vector<size_t> indices = sortIndices(X);
        xPrev = X.at(indices.at(0));
        yPrev = y.at(indices.at(0));
        idxPrev = indices.at(0);
        idx = 0;
        while (idx < indices.size() - 1)
        {
            if (first)
            {
                first = false;
                curx = X.at(indices.at(idx));
                cury = y.at(indices.at(idx));
            }
            if (debug)
                printf("<idx=%lu -> (%3.1f, %d) Prev(%3.1f, %d)\n", idx, curx, cury, xPrev, yPrev);
            // Read the same values and check class changes
            while (idx < indices.size() - 1 && curx == xPrev)
            {
                idx++;
                curx = X.at(indices.at(idx));
                cury = y.at(indices.at(idx));
                if (cury != yPrev && curx == xPrev)
                {
                    yPrev = -1;
                }
                if (debug)
                    printf(">idx=%lu -> (%3.1f, %d) Prev(%3.1f, %d)\n", idx, curx, cury, xPrev, yPrev);
            }
            if (yPrev == -1 || yPrev != cury)
            {
                cutPoint = (xPrev + curx) / 2;
                printf("Cutpoint (%3.1f, %d) -> (%3.1f, %d) = %3.1f", xPrev, yPrev, curx, cury, cutPoint);
                cutPts.push_back(cutPoint);
                cutIdx.push_back(idxPrev);
            }
            yPrev = cury;
            xPrev = curx;
            idxPrev = indices.at(idx);
        }
        return cutPts;
    }
    std::vector<float> CPPFImdlp::cutPointsAnt(std::vector<float> &X, std::vector<int> &y)
    {
        std::vector<float> cutPts;
        std::vector<int> cutIdx;
--- a/fimdlp/CPPFImdlp.h
+++ b/fimdlp/CPPFImdlp.h
@@ -19,6 +19,7 @@ namespace CPPFImdlp
        CPPFImdlp(int, bool debug = false);
        ~CPPFImdlp();
        std::vector<float> cutPoints(std::vector<float> &, std::vector<int> &);
        std::vector<float> cutPointsAnt(std::vector<float> &, std::vector<int> &);
    };
 }
 #endif
--- a/fimdlp/cfimdlp.pyx
+++ b/fimdlp/cfimdlp.pyx
@@ -8,6 +8,7 @@ cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp":
        CPPFImdlp() except + 
        CPPFImdlp(int, bool) except + 
        vector[float] cutPoints(vector[float]&, vector[int]&)
        vector[float] cutPointsAnt(vector[float]&, vector[int]&)
 cdef class CFImdlp:
    cdef CPPFImdlp *thisptr
@@ -17,3 +18,5 @@ cdef class CFImdlp:
        del self.thisptr
    def cut_points(self, X, y):
        return self.thisptr.cutPoints(X, y)
    def cut_points_ant(self, X, y):
        return self.thisptr.cutPointsAnt(X, y)
--- a/fimdlp/cppfimdlp.cpython-310-darwin.so
+++ b/fimdlp/cppfimdlp.cpython-310-darwin.so
--- a/fimdlp/mdlp.py
+++ b/fimdlp/mdlp.py
@@ -61,7 +61,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
        self.n_features_ = X.shape[1]
        self.X_ = X
        self.y_ = y
-        self.discretizer_ = CFImdlp()
+        self.discretizer_ = CFImdlp(debug=True)
        return self
    def transform(self, X):
@@ -108,5 +108,7 @@ class FImdlp(TransformerMixin, BaseEstimator):
            datax = self.X_[np.argsort(self.X_[:, i]), i]
            y_ = self.y_[np.argsort(self.X_[:, i])]
            Xcutpoints = self.discretizer_.cut_points(datax, y_)
-            print(f"{self.features_[i]:20s}: {Xcutpoints}")
+            print(f"New:{self.features_[i]:20s}: {Xcutpoints}")
            Xcutpoints = self.discretizer_.cut_points_ant(datax, y_)
            print(f"Ant:{self.features_[i]:20s}: {Xcutpoints}")
        return X
--- a/sample.py
+++ b/sample.py
@@ -1,10 +1,22 @@
 from sklearn.datasets import load_iris
 from fimdlp.mdlp import FImdlp
 from fimdlp.cppfimdlp import CFImdlp
 import numpy as np
 data = load_iris()
 X = data.data
 y = data.target
 features = data.feature_names
 test = FImdlp()
-test.fit(X, y, features=features).transform(X)
+# test.fit(X, y, features=features).transform(X)
 X = np.array(
    [
        [5.1, 3.5, 1.4, 0.2],
        [5.2, 3.0, 1.4, 0.2],
        [5.3, 3.2, 1.3, 0.2],
        [5.3, 3.1, 1.5, 0.2],
    ]
 )
 y = np.array([0, 0, 0, 1])
 test.fit(X, y).transform(X)