Refactor computeCutPoints

2025-08-17 08:25:51 +00:00 · 2022-11-30 18:30:08 +01:00
parent 2ca58d139a
commit a982dbd5ca
9 changed files with 67 additions and 27 deletions
--- a/fimdlp/CPPFImdlp.cpp
+++ b/fimdlp/CPPFImdlp.cpp
@@ -5,7 +5,7 @@
 #include <stdio.h>
 #include <algorithm>
 #include "Metrics.h"
-namespace CPPFImdlp
+namespace mdlp
 {
    std::ostream &operator<<(std::ostream &os, const CutPointBody &cut)
    {
@@ -23,6 +23,10 @@ namespace CPPFImdlp
    CPPFImdlp::~CPPFImdlp()
    {
    }
    std::vector<CutPointBody> CPPFImdlp::getCutPoints()
    {
        return cutPoints;
    }
    void CPPFImdlp::debugPoints(std::vector<float> &X, std::vector<int> &y)
    {
        std::cout << "+++++++++++++++++++++++" << std::endl;
@@ -33,12 +37,13 @@ namespace CPPFImdlp
            printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]);
        }
        std::cout << "+++++++++++++++++++++++" << std::endl;
-        for (auto item : cutPoints(X, y))
+        computeCutPoints(X, y);
        for (auto item : cutPoints)
        {
            std::cout << item << "  X[" << item.end << "]=" << X[item.end] << std::endl;
        }
    }
-    std::vector<CutPointBody> CPPFImdlp::cutPoints(std::vector<float> &X, std::vector<int> &y)
+    void CPPFImdlp::computeCutPoints(std::vector<float> &X_, std::vector<int> &y_)
    {
        std::vector<CutPointBody> cutPts;
@@ -47,7 +52,9 @@ namespace CPPFImdlp
        float xPrev, xCur, xPivot;
        int yPrev, yCur, yPivot;
        size_t idxPrev, idxPivot, idx, numElements, start;
-        std::vector<size_t> indices = sortIndices(X);
+        X = X_;
        y = y_;
        indices = sortIndices(X);
        xCur = xPrev = X[indices[0]];
        yCur = yPrev = y[indices[0]];
        numElements = indices.size() - 1;
@@ -79,10 +86,11 @@ namespace CPPFImdlp
            if (yPivot == -1 || yPrev != yCur)
            {
                cutPoint.start = start;
-                cutPoint.end = idxPrev;
+                cutPoint.end = idx - 1;
                start = idx;
                cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
                cutPoint.toValue = (xPrev + xCur) / 2;
                cutPoint.classNumber = -1;
                firstCutPoint = false;
                if (debug)
                {
@@ -95,20 +103,21 @@ namespace CPPFImdlp
            xPrev = xPivot;
            idxPrev = indices[idxPivot];
        }
-        if (idxPrev >= numElements)
+        if (idx == numElements)
        {
            cutPoint.start = start;
            cutPoint.end = numElements;
            cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
            cutPoint.toValue = std::numeric_limits<float>::max();
            cutPoint.classNumber = -1;
            if (debug)
                printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
            cutPts.push_back(cutPoint);
            cutIdx.push_back(idxPrev);
        }
-        return cutPts;
+        cutPoints = cutPts;
    }
-    std::vector<float> CPPFImdlp::cutPointsAnt(std::vector<float> &X, std::vector<int> &y)
+    std::vector<float> CPPFImdlp::computeCutPointsAnt(std::vector<float> &X, std::vector<int> &y)
    {
        std::vector<float> cutPts;
        std::vector<int> cutIdx;
--- a/fimdlp/CPPFImdlp.h
+++ b/fimdlp/CPPFImdlp.h
@@ -2,12 +2,13 @@
 #define CPPFIMDLP_H
 #include <vector>
 #include <utility>
-namespace CPPFImdlp
+namespace mdlp
 {
    struct CutPointBody
    {
-        size_t start, end;
+        size_t start, end;        // indices of the sorted vector
-        float fromValue, toValue;
+        int classNumber;          // class assigned to the cut point
        float fromValue, toValue; // Values of the variable
    };
    class CPPFImdlp
    {
@@ -15,15 +16,20 @@ namespace CPPFImdlp
        bool debug;
        int precision;
        float divider;
-        std::vector<size_t>
+        std::vector<size_t> indices; // sorted indices to use with X and y
-        sortIndices(std::vector<float> &);
+        std::vector<float> X;
        std::vector<int> y;
        std::vector<float> xDiscretized;
        std::vector<CutPointBody> cutPoints;
        std::vector<size_t> sortIndices(std::vector<float> &);
    public:
        CPPFImdlp();
        CPPFImdlp(int, bool debug = false);
        ~CPPFImdlp();
-        std::vector<CutPointBody> cutPoints(std::vector<float> &, std::vector<int> &);
+        std::vector<CutPointBody> getCutPoints();
-        std::vector<float> cutPointsAnt(std::vector<float> &, std::vector<int> &);
+        void computeCutPoints(std::vector<float> &, std::vector<int> &);
        std::vector<float> computeCutPointsAnt(std::vector<float> &, std::vector<int> &);
        void debugPoints(std::vector<float> &, std::vector<int> &);
    };
 }
--- a/fimdlp/Metrics.cpp
+++ b/fimdlp/Metrics.cpp
@@ -1,5 +1,5 @@
 #include "Metrics.h"
-namespace CPPFImdlp
+namespace mdlp
 {
    Metrics::Metrics()
    {
--- a/fimdlp/Metrics.h
+++ b/fimdlp/Metrics.h
@@ -2,7 +2,7 @@
 #define METRICS_H
 #include <vector>
 #include <cmath>
-namespace CPPFImdlp
+namespace mdlp
 {
    class Metrics
    {
--- a/fimdlp/cfimdlp.pyx
+++ b/fimdlp/cfimdlp.pyx
@@ -3,13 +3,26 @@
 from libcpp.vector cimport vector
 from libcpp cimport bool
-cdef extern from "CPPFImdlp.h" namespace "CPPFImdlp":
+cdef extern from "CPPFImdlp.h" namespace "mdlp":
    cdef struct CutPointBody:
        size_t start, end;
        int classNumber;
        float fromValue, toValue;
    cdef cppclass CPPFImdlp:
        CPPFImdlp() except + 
        CPPFImdlp(int, bool) except + 
        vector[CutPointBody] getCutPoints()
        vector[float] cutPointsAnt(vector[float]&, vector[int]&)
        void debugPoints(vector[float]&, vector[int]&)
        void computeCutPoints(vector[float]&, vector[int]&)
 class PCutPointBody:
    def __init__(self, start, end, fromValue, toValue):
        self.start = start
        self.end = end
        self.fromValue = fromValue
        self.toValue = toValue
 cdef class CFImdlp:
    cdef CPPFImdlp *thisptr
@@ -17,7 +30,11 @@ cdef class CFImdlp:
        self.thisptr = new CPPFImdlp(precision, debug)
    def __dealloc__(self):
        del self.thisptr
    def cut_points(self, X, y):
        self.thisptr.computeCutPoints(X, y)
        return  self.thisptr.getCutPoints()
    def cut_points_ant(self, X, y):
-        return self.thisptr.cutPointsAnt(X, y)
+        return self.get_cut_points(X, y)
    def debug_points(self, X, y):
        return self.thisptr.debugPoints(X, y)
--- a/fimdlp/cppfimdlp.cpython-310-darwin.so
+++ b/fimdlp/cppfimdlp.cpython-310-darwin.so
--- a/fimdlp/test
+++ b/fimdlp/test
--- a/fimdlp/test.cpp
+++ b/fimdlp/test.cpp
@@ -1,17 +1,17 @@
 #include "CPPFImdlp.h"
 #include <iostream>
-using namespace std;
+using namespace mdlp;
 int main(int argc, char *argv[], char *envp[])
 {
    {
-        CPPFImdlp::CPPFImdlp fimdlp = CPPFImdlp::CPPFImdlp(true);
+        CPPFImdlp fimdlp = CPPFImdlp(true);
-        vector<float> X = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+        std::vector<float> X = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-        vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+        std::vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
-        vector<float> cutPts = fimdlp.cutPoints(X, y);
+        fimdlp.computeCutPoints(X, y);
-        for (auto &cutPt : cutPts)
+        for (struct CutPointBody cutPt : fimdlp.getCutPoints())
        {
-            cout << cutPt << endl;
+            std::cout << cutPt << std::endl;
        }
        return 0;
    }
--- a/sample.py
+++ b/sample.py
@@ -3,6 +3,7 @@ from fimdlp.mdlp import FImdlp
 from fimdlp.cppfimdlp import CFImdlp
 import numpy as np
 data = load_iris()
 X = data.data
 y = data.target
@@ -16,7 +17,14 @@ test = CFImdlp(debug=False)
 # print(k)
 # k = test.cut_points_ant(X[:, 0], y)
 # print(k)
-test.debug_points(X[:, 0], y)
+# test.debug_points(X[:, 0], y)
 result = test.cut_points(X[:, 0], y)
 for item in result:
    print(
        f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
        f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
    )
 # X = np.array(
 #     [