Fix test apply cutpoints

2025-08-17 16:35:52 +00:00 · 2022-12-03 01:52:00 +01:00
parent b1f5d337fc
commit e99852c5d5
6 changed files with 91 additions and 65 deletions
--- a/fimdlp/CPPFImdlp.cpp
+++ b/fimdlp/CPPFImdlp.cpp
@@ -63,20 +63,6 @@ namespace mdlp {
        applyCutPoints();
        return xDiscretized;
    }
    void CPPFImdlp::debugPoints(samples& X_, labels& y_)
    {
        std::cout << "+++++++++++++++++++++++" << std::endl;
        // for (auto i : sortIndices(X))
        indices_t indices_n = sortIndices(X);
        for (size_t i = 0; i < indices_n.size(); i++) {
            printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices_n[i], X_[indices_n[i]], y_[indices_n[i]]);
        }
        std::cout << "+++++++++++++++++++++++" << std::endl;
        fit(X_, y_);
        for (auto item : cutPoints) {
            std::cout << item.start << "  X_[" << item.end << "]=" << X_[item.end] << std::endl;
        }
    }
    void CPPFImdlp::applyCutPoints()
    {
        for (auto cut : cutPoints) {
@@ -128,6 +114,7 @@ namespace mdlp {
                if (debug)
                    std::cout << "Accepted" << std::endl;
                if (lastReject) {
                    //Try to merge rejected intervals
                    if (first) {
                        item.fromValue = std::numeric_limits<float>::lowest();
                        item.start = indices[0];
@@ -141,6 +128,7 @@ namespace mdlp {
                filtered.push_back(item);
                first = false;
                rest.start = item.end;
                lastReject = false;
            } else {
                if (debug)
                    std::cout << "Rejected" << std::endl;
@@ -153,7 +141,6 @@ namespace mdlp {
        } else {
            filtered.push_back(rest);
        }
        cutPoints = filtered;
    }
    void CPPFImdlp::computeCutPointsProposed()
@@ -190,7 +177,7 @@ namespace mdlp {
            while (idx < numElements && xCur == xPivot);
            if (yPivot == -1 || yPrev != yCur) {
                cutPoint.start = start;
-                cutPoint.end = idx - 1;
+                cutPoint.end = idx;
                start = idx;
                cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
                cutPoint.toValue = (xPrev + xCur) / 2;
@@ -214,8 +201,9 @@ namespace mdlp {
                printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
            cutPts.push_back(cutPoint);
        }
        if (debug)
            for (auto cutPt : cutPts)
-            std::cout << "Cut point: " << cutPt;
+                std::cout << "Proposed: Cut point: " << cutPt;
        cutPoints = cutPts;
    }
    void CPPFImdlp::computeCutPointsOriginal()
@@ -260,7 +248,7 @@ namespace mdlp {
        cutPts.back().end = X.size();
        if (debug)
            for (auto cutPt : cutPts)
-                std::cout << "-Cut point: " << cutPt;
+                std::cout << "Original: Cut point: " << cutPt;
        cutPoints = cutPts;
    }
    // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
@@ -273,4 +261,12 @@ namespace mdlp {
                { return X_[i1] < X_[i2]; });
        return idx;
    }
    void CPPFImdlp::setCutPoints(cutPoints_t cutPoints_)
    {
        cutPoints = cutPoints_;
    }
    indices_t CPPFImdlp::getIndices()
    {
        return indices;
    }
 }
--- a/fimdlp/CPPFImdlp.h
+++ b/fimdlp/CPPFImdlp.h
@@ -17,6 +17,7 @@ namespace mdlp {
        cutPoints_t cutPoints;
    protected:
        void setCutPoints(cutPoints_t);
        static indices_t sortIndices(samples&);
        void computeCutPointsOriginal();
        void computeCutPointsProposed();
@@ -29,6 +30,7 @@ namespace mdlp {
        CPPFImdlp(bool, int, bool debug = false);
        ~CPPFImdlp();
        cutPoints_t getCutPoints();
        indices_t getIndices();
        labels getDiscretizedValues();
        void debugPoints(samples&, labels&);
        CPPFImdlp& fit(samples&, labels&);
--- a/fimdlp/cfimdlp.pyx
+++ b/fimdlp/cfimdlp.pyx
@@ -15,7 +15,6 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp":
        vector[int] transform(vector[float]&)
        vector[int] getDiscretizedValues()
        vector[CutPointBody] getCutPoints()
        void debugPoints(vector[float]&, vector[int]&)
 class PcutPoint_t:
@@ -41,6 +40,4 @@ cdef class CFImdlp:
        return self.thisptr.getDiscretizedValues()
    def get_cut_points(self):
        return self.thisptr.getCutPoints()
    def debug_points(self, X, y):
        return self.thisptr.debugPoints(X, y)
--- a/fimdlp/cppfimdlp.cpython-310-darwin.so
+++ b/fimdlp/cppfimdlp.cpython-310-darwin.so
--- a/fimdlp/testcpp/FImdlp_unittest.cc
+++ b/fimdlp/testcpp/FImdlp_unittest.cc
@@ -4,7 +4,7 @@
 namespace mdlp {
    class TestMetrics : public CPPFImdlp, public testing::Test {
    public:
-        TestMetrics() : CPPFImdlp(true, 6, false) {}
+        TestMetrics() : CPPFImdlp(true, 6, true) {}
        indices_t indices; // sorted indices to use with X and y
        samples X;
        labels y;
@@ -13,6 +13,8 @@ namespace mdlp {
        float precision_test = 0.000001;
        void SetUp()
        {
            //    5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
            //(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
            X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
            y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
            fit(X, y);
@@ -100,8 +102,8 @@ namespace mdlp {
    {
        cutPoints_t computed, expected;
        expected = {
-            { 0, 3, -1, -3.4028234663852886e+38, 5.1 }, { 4, 4, -1, 5.1, 5.2 },
+            { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 5, -1, 5.1, 5.2 },
-            { 5, 5, -1, 5.2, 5.4 }, { 6, 8, -1, 5.4, 5.85 },
+            { 5, 6, -1, 5.2, 5.4 }, { 6, 9, -1, 5.4, 5.85 },
            { 9, 10, -1, 5.85, 3.4028234663852886e+38 }
        };
        computeCutPointsProposed();
@@ -119,7 +121,7 @@ namespace mdlp {
    {
        cutPoints_t computed, expected;
        expected = {
-                { 0, 2, -1, -3.4028234663852886e+38, 1.5 },
+                { 0, 3, -1, -3.4028234663852886e+38, 1.5 },
                { 3, 4, -1, 1.5, 3.4028234663852886e+38 }
        };
        X = { 0, 1, 2, 2 };
@@ -136,4 +138,23 @@ namespace mdlp {
            EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
        }
    }
    TEST_F(TestMetrics, ApplyCutPoints)
    {
        cutPoints_t expected = {
            { 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 },
            { 6, 8, 59, 5.4, 5.85 },
            { 8, 10, 41, 5.85, 3.4028234663852886e+38 }
        };
        setCutPoints(expected);
        applyCutPoints();
        labels expected_x = getDiscretizedValues();
        indices_t indices_x = getIndices();
        for (auto i = 0; i < 5; i++) {
            std::cout << "cutPoint[" << i << "].start = " << expected[i].start << std::endl;
            for (auto j = expected[i].start; j < expected[i].end; j++) {
                std::cout << expected_x[j] << expected[i].classNumber << std::endl;
                EXPECT_EQ(expected_x[indices_x[j]], expected[i].classNumber);
            }
        }
    }
 }
--- a/sample.py
+++ b/sample.py
@@ -65,31 +65,37 @@ features = data.feature_names
 # test.fit(X, y, features=features)
 # test.transform(X)
 # test.get_cut_points()
-
+for proposed in [True, False]:
-test = CFImdlp(debug=False, proposed=False)
+    X = data.data
-# # k = test.cut_points(X[:, 0], y)
+    y = data.target
-# # print(k)
+    print("*** Proposed: ", proposed)
-# # k = test.cut_points_ant(X[:, 0], y)
+    test = CFImdlp(debug=False, proposed=proposed)
 # # print(k)
 # # test.debug_points(X[:, 0], y)
 # X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
 # indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
 # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
 # # To check
 # indices2 = np.argsort(X)
 # Xs = np.array(X)[indices2]
 # ys = np.array(y)[indices2]
    test.fit(X[:, 0], y)
 # test.fit(X, y)
    result = test.get_cut_points()
-# for item in result:
+    for item in result:
-#     print(
+        print(
-#         f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
+            f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
-#         f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
+            f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
-#     )
+        )
    print(test.get_discretized_values())
-
+    print("+" * 40)
    X = np.array(
        [
            [5.1, 3.5, 1.4, 0.2],
            [5.2, 3.0, 1.4, 0.2],
            [5.3, 3.2, 1.3, 0.2],
            [5.4, 3.1, 1.5, 0.2],
        ]
    )
    y = np.array([0, 0, 0, 1])
    print(test.fit(X[:, 0], y).transform(X[:, 0]))
    result = test.get_cut_points()
    for item in result:
        print(
            f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
            f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
        )
    print("*" * 40)
 # print(Xs, ys)
 # print("**********************")
 # test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)]
@@ -102,13 +108,17 @@ print(test.get_discretized_values())
 # print(indices)
 # print(np.array(X)[indices])
-X = np.array(
+
-    [
+# # k = test.cut_points(X[:, 0], y)
-        [5.1, 3.5, 1.4, 0.2],
+# # print(k)
-        [5.2, 3.0, 1.4, 0.2],
+# # k = test.cut_points_ant(X[:, 0], y)
-        [5.3, 3.2, 1.3, 0.2],
+# # print(k)
-        [5.3, 3.1, 1.5, 0.2],
+# # test.debug_points(X[:, 0], y)
-    ]
+X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
-)
+indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
-y = np.array([0, 0, 0, 1])
+# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
-print(test.fit(X[:, 0], y).transform(X[:, 0]))
+# # To check
 # indices2 = np.argsort(X)
 Xs = np.array(X)[indices2]
 ys = np.array(y)[indices2]