Refactor module methods to conform fit-transform

2025-08-17 16:35:52 +00:00 · 2022-12-01 20:03:50 +01:00
parent c7613ef237
commit 8f98a23505
4 changed files with 61 additions and 16 deletions
--- a/fimdlp/CPPFImdlp.cpp
+++ b/fimdlp/CPPFImdlp.cpp
@@ -27,6 +27,25 @@ namespace mdlp
    {
        return cutPoints;
    }
+    std::vector<float> CPPFImdlp::getDiscretizedValues()
+    {
+        return xDiscretized;
+    }
+    void CPPFImdlp::fit(std::vector<float> &X, std::vector<int> &y)
+    {
+        this->X = X;
+        this->y = y;
+        this->indices = sortIndices(X);
+        computeCutPoints();
+        filterCutPoints();
+        applyCutPoints();
+    }
+    std::vector<float> &CPPFImdlp::transform(std::vector<float> &X)
+    {
+        std::vector<size_t> indices_transform = sortIndices(X);
+        applyCutPoints();
+        return xDiscretized;
+    }
    void CPPFImdlp::debugPoints(std::vector<float> &X, std::vector<int> &y)
    {
        std::cout << "+++++++++++++++++++++++" << std::endl;
@@ -37,13 +56,32 @@ namespace mdlp
            printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]);
        }
        std::cout << "+++++++++++++++++++++++" << std::endl;
-        computeCutPoints(X, y);
+        fit(X, y);
        for (auto item : cutPoints)
        {
            std::cout << item << "  X[" << item.end << "]=" << X[item.end] << std::endl;
        }
    }
-    void CPPFImdlp::computeCutPoints(std::vector<float> &X_, std::vector<int> &y_)
+    void CPPFImdlp::applyCutPoints()
+    {
+    }
+    bool CPPFImdlp::evaluateCutPoint(CutPointBody point)
+    {
+        return true;
+    }
+    void CPPFImdlp::filterCutPoints()
+    {
+        std::vector<CutPointBody> filtered;
+        for (auto item : cutPoints)
+        {
+            if (evaluateCutPoint(item))
+            {
+                filtered.push_back(item);
+            }
+        }
+        cutPoints = filtered;
+    }
+    void CPPFImdlp::computeCutPoints()
    {

        std::vector<CutPointBody> cutPts;
@@ -52,9 +90,7 @@ namespace mdlp
        float xPrev, xCur, xPivot;
        int yPrev, yCur, yPivot;
        size_t idxPrev, idxPivot, idx, numElements, start;
-        X = X_;
-        y = y_;
-        indices = sortIndices(X);
+
        xCur = xPrev = X[indices[0]];
        yCur = yPrev = y[indices[0]];
        numElements = indices.size() - 1;
@@ -117,14 +153,13 @@ namespace mdlp
        }
        cutPoints = cutPts;
    }
-    std::vector<float> CPPFImdlp::computeCutPointsAnt(std::vector<float> &X, std::vector<int> &y)
+    void CPPFImdlp::computeCutPointsAnt()
    {
        std::vector<float> cutPts;
        std::vector<int> cutIdx;
        float xPrev, cutPoint;
        int yPrev;
        size_t idxPrev;
-        std::vector<size_t> indices = sortIndices(X);
        xPrev = X.at(indices[0]);
        yPrev = y.at(indices[0]);
        idxPrev = indices[0];
@@ -153,7 +188,7 @@ namespace mdlp
            yPrev = y.at(*index);
            idxPrev = *index;
        }
-        return cutPts;
+        // cutPoints = cutPts;
    }
    // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
    std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X)
--- a/fimdlp/CPPFImdlp.h
+++ b/fimdlp/CPPFImdlp.h
@@ -24,15 +24,21 @@ namespace mdlp

    protected:
        std::vector<size_t> sortIndices(std::vector<float> &);
+        bool evaluateCutPoint(CutPointBody);
+        void filterCutPoints();
+        void computeCutPoints();
+        void applyCutPoints();
+        void computeCutPointsAnt();

    public:
        CPPFImdlp();
        CPPFImdlp(int, bool debug = false);
        ~CPPFImdlp();
        std::vector<CutPointBody> getCutPoints();
-        void computeCutPoints(std::vector<float> &, std::vector<int> &);
-        std::vector<float> computeCutPointsAnt(std::vector<float> &, std::vector<int> &);
+        std::vector<float> getDiscretizedValues();
        void debugPoints(std::vector<float> &, std::vector<int> &);
+        void fit(std::vector<float> &, std::vector<int> &);
+        std::vector<float> &transform(std::vector<float> &);
    };
 }
 #endif
--- a/fimdlp/cfimdlp.pyx
+++ b/fimdlp/cfimdlp.pyx
@@ -11,10 +11,11 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp":
    cdef cppclass CPPFImdlp:
        CPPFImdlp() except + 
        CPPFImdlp(int, bool) except + 
+        void fit(vector[float]&, vector[int]&)
+        vector[float] transform(vector[float]&)
+        vector[float] getDiscretizedValues()
        vector[CutPointBody] getCutPoints()
-        vector[float] cutPointsAnt(vector[float]&, vector[int]&)
        void debugPoints(vector[float]&, vector[int]&)
-        void computeCutPoints(vector[float]&, vector[int]&)
        

 class PCutPointBody:
@@ -30,11 +31,14 @@ cdef class CFImdlp:
        self.thisptr = new CPPFImdlp(precision, debug)
    def __dealloc__(self):
        del self.thisptr
-    def cut_points(self, X, y):
-        self.thisptr.computeCutPoints(X, y)
+    def fit(self, X, y):
+        self.thisptr.fit(X, y)
+    def transform(self, X):
+        return self.thisptr.transform(X)
+    def get_discretized_values(self):
+        return self.thisptr.getDiscretizedValues()
+    def get_cut_points(self, X, y):
        return  self.thisptr.getCutPoints()
-    def cut_points_ant(self, X, y):
-        return self.get_cut_points(X, y)
    def debug_points(self, X, y):
        return self.thisptr.debugPoints(X, y)
 
--- a/fimdlp/cppfimdlp.cpython-310-darwin.so
+++ b/fimdlp/cppfimdlp.cpython-310-darwin.so