Refactor tests

2025-08-17 16:35:52 +00:00 · 2022-12-04 01:45:32 +01:00
parent 5cce895177
commit 9ce10131d6
5 changed files with 82 additions and 67 deletions
--- a/fimdlp/CPPFImdlp.cpp
+++ b/fimdlp/CPPFImdlp.cpp
@@ -175,7 +175,10 @@ namespace mdlp {
                    printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
            }
            while (idx < numElements && xCur == xPivot);
-            if (yPivot == -1 || yPrev != yCur) {
+            // Check if the class changed and there are more than 1 element
            if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur)) {
                // Must we add the entropy criteria here?
                // if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
                cutPoint.start = start;
                cutPoint.end = idx;
                start = idx;
@@ -201,9 +204,11 @@ namespace mdlp {
                printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
            cutPts.push_back(cutPoint);
        }
-        if (debug)
+        if (debug) {
            std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << std::endl;
            for (auto cutPt : cutPts)
-                std::cout << "Proposed: Cut point: " << cutPt;
+                std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposed: Cut point: " << cutPt;
        }
        cutPoints = cutPts;
    }
    void CPPFImdlp::computeCutPointsOriginal()
@@ -219,8 +224,11 @@ namespace mdlp {
        yPrev = y[idx];
        for (index = 0; index < size_t(indices.size()) - 1; index++) {
            idx = indices[index];
-            //  Definition 2 Cut points are always on boundaries
+            // Definition 2 Cut points are always on class boundaries && 
-            if (y[idx] != yPrev && xPrev < X[idx]) {
+            // there are more than 1 items in the interval
            if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1) {
                // Must we add the entropy criteria here?
                // if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
                if (first) {
                    first = false;
                    cutPoint.fromValue = std::numeric_limits<float>::lowest();
@@ -246,9 +254,11 @@ namespace mdlp {
        } else
            cutPts.back().toValue = std::numeric_limits<float>::max();
        cutPts.back().end = X.size();
-        if (debug)
+        if (debug) {
            std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << std::endl;
            for (auto cutPt : cutPts)
-                std::cout << "Original: Cut point: " << cutPt;
+                std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt;
        }
        cutPoints = cutPts;
    }
    // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
--- a/fimdlp/CPPFImdlp.h
+++ b/fimdlp/CPPFImdlp.h
@@ -4,7 +4,7 @@
 #include <utility>
 namespace mdlp {
    class CPPFImdlp {
-    private:
+    protected:
        bool proposed; // proposed algorithm or original algorithm
        int precision;
        bool debug;
@@ -16,7 +16,6 @@ namespace mdlp {
        int numClasses;
        cutPoints_t cutPoints;
    protected:
        void setCutPoints(cutPoints_t);
        static indices_t sortIndices(samples&);
        void computeCutPointsOriginal();
--- a/fimdlp/cppfimdlp.cpython-310-darwin.so
+++ b/fimdlp/cppfimdlp.cpython-310-darwin.so
--- a/fimdlp/testcpp/FImdlp_unittest.cc
+++ b/fimdlp/testcpp/FImdlp_unittest.cc
@@ -2,15 +2,9 @@
 #include "../Metrics.h"
 #include "../CPPFImdlp.h"
 namespace mdlp {
-    class TestMetrics : public CPPFImdlp, public testing::Test {
+    class TestFImdlp : public CPPFImdlp, public testing::Test {
    public:
-        TestMetrics() : CPPFImdlp(true, 6, true) {}
+        TestFImdlp() : CPPFImdlp(true, 6, true) {}
        indices_t indices; // sorted indices to use with X and y
        samples X;
        labels y;
        samples xDiscretized;
        int numClasses;
        float precision_test = 0.000001;
        void SetUp()
        {
            //    5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
@@ -19,7 +13,19 @@ namespace mdlp {
            y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
            fit(X, y);
        }
-        void check_sorted_vector(samples& X_, indices_t indices_)
+        void initCutPoints()
        {
            setCutPoints(cutPoints_t());
        }
        void initIndices()
        {
            indices = indices_t();
        }
        void initDiscretized()
        {
            xDiscretized = labels();
        }
        void checkSortedVector(samples& X_, indices_t indices_)
        {
            X = X_;
            indices = indices_;
@@ -32,113 +38,109 @@ namespace mdlp {
            }
        }
    };
-    // 
+    TEST_F(TestFImdlp, SortIndices)
    TEST_F(TestMetrics, SortIndices)
    {
        X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
-        indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
+        indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
-        check_sorted_vector(X, indices);
+        checkSortedVector(X, indices);
        X = { 5.77, 5.88, 5.99 };
        indices = { 0, 1, 2 };
-        check_sorted_vector(X, indices);
+        checkSortedVector(X, indices);
        X = { 5.33, 5.22, 5.11 };
        indices = { 2, 1, 0 };
-        check_sorted_vector(X, indices);
+        checkSortedVector(X, indices);
    }
-    TEST_F(TestMetrics, EvaluateCutPoint)
+    TEST_F(TestFImdlp, EvaluateCutPoint)
    {
        cutPoint_t rest, candidate;
-        rest.start = 0;
+        rest = { 0, 10, -1, -1, 1000 };
-        rest.end = 10;
+        candidate = { 0, 4, -1, -1, 5.15 };
        rest.classNumber = -1;
        rest.fromValue = -1;
        rest.toValue = 1000;
        candidate.start = 0;
        candidate.end = 4;
        candidate.fromValue = -1;
        candidate.toValue = 5.15;
        candidate.classNumber = -1;
        EXPECT_FALSE(evaluateCutPoint(rest, candidate));
    }
-    TEST_F(TestMetrics, ComputeCutPointsOriginal)
+    TEST_F(TestFImdlp, ComputeCutPointsOriginal)
    {
        cutPoints_t computed, expected;
        int expectedSize = 3;
        expected = {
            { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
-            { 6, 7, -1, 5.45, 5.65 }, { 7, 10, -1, 5.65, 3.4028234663852886e+38 }
+            { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
        };
        setCutPoints(cutPoints_t());
        computeCutPointsOriginal();
        computed = getCutPoints();
-        EXPECT_EQ(computed.size(), 4);
+        EXPECT_EQ(computed.size(), expectedSize);
-        for (auto i = 0; i < 4; i++) {
+        for (auto i = 0; i < expectedSize; i++) {
            EXPECT_EQ(computed[i].start, expected[i].start);
            EXPECT_EQ(computed[i].end, expected[i].end);
            EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
-            EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
+            EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
-            EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
+            EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
        }
    }
-    TEST_F(TestMetrics, ComputeCutPointsOriginalGCase)
+    TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
    {
        cutPoints_t computed, expected;
        expected = {
                { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
        };
        int expectedSize = 1;
        X = { 0, 1, 2, 2 };
        y = { 1, 1, 1, 2 };
        fit(X, y);
        computeCutPointsOriginal();
        computed = getCutPoints();
-        EXPECT_EQ(computed.size(), 1);
+        EXPECT_EQ(computed.size(), expectedSize);
-        for (auto i = 0; i < 1; i++) {
+        for (auto i = 0; i < expectedSize; i++) {
            EXPECT_EQ(computed[i].start, expected[i].start);
            EXPECT_EQ(computed[i].end, expected[i].end);
            EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
-            EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
+            EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
-            EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
+            EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
        }
    }
-    TEST_F(TestMetrics, ComputeCutPointsProposed)
+    TEST_F(TestFImdlp, ComputeCutPointsProposed)
    {
        cutPoints_t computed, expected;
        expected = {
-            { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 5, -1, 5.1, 5.2 },
+            { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
-            { 5, 6, -1, 5.2, 5.4 }, { 6, 9, -1, 5.4, 5.85 },
+            { 6, 9, -1, 5.4, 5.85 },
            { 9, 10, -1, 5.85, 3.4028234663852886e+38 }
        };
        int expectedSize = 4;
        computeCutPointsProposed();
        computed = getCutPoints();
-        EXPECT_EQ(computed.size(), 5);
+        EXPECT_EQ(computed.size(), expectedSize);
-        for (auto i = 0; i < 5; i++) {
+        for (auto i = 0; i < expectedSize; i++) {
            EXPECT_EQ(computed[i].start, expected[i].start);
            EXPECT_EQ(computed[i].end, expected[i].end);
            EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
-            EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
+            EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
-            EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
+            EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
        }
    }
-    TEST_F(TestMetrics, ComputeCutPointsProposedGCase)
+    TEST_F(TestFImdlp, ComputeCutPointsProposedGCase)
    {
        cutPoints_t computed, expected;
        expected = {
                { 0, 3, -1, -3.4028234663852886e+38, 1.5 },
                { 3, 4, -1, 1.5, 3.4028234663852886e+38 }
        };
        int expectedSize = 2;
        X = { 0, 1, 2, 2 };
        y = { 1, 1, 1, 2 };
        fit(X, y);
        computeCutPointsProposed();
        computed = getCutPoints();
-        EXPECT_EQ(computed.size(), 2);
+        EXPECT_EQ(computed.size(), expectedSize);
-        for (auto i = 0; i < 1; i++) {
+        for (auto i = 0; i < expectedSize; i++) {
            EXPECT_EQ(computed[i].start, expected[i].start);
            EXPECT_EQ(computed[i].end, expected[i].end);
            EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
-            EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
+            EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
-            EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
+            EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
        }
    }
-    TEST_F(TestMetrics, ApplyCutPoints)
+    TEST_F(TestFImdlp, ApplyCutPoints)
    {
        cutPoints_t expected = {
            { 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 },
--- a/sample.py
+++ b/sample.py
@@ -69,13 +69,14 @@ for proposed in [True, False]:
    X = data.data
    y = data.target
    print("*** Proposed: ", proposed)
-    test = CFImdlp(debug=False, proposed=proposed)
+    test = CFImdlp(debug=True, proposed=proposed)
    test.fit(X[:, 0], y)
    result = test.get_cut_points()
    for item in result:
        print(
-            f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
+            f"Class={item['classNumber']} - ({item['start']:3d}, "
-            f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
+            f"{item['end']:3d}) -> ({item['fromValue']:3.1f}, "
            f"{item['toValue']:3.1f}]"
        )
    print(test.get_discretized_values())
    print("+" * 40)
@@ -114,11 +115,14 @@ for proposed in [True, False]:
 # # k = test.cut_points_ant(X[:, 0], y)
 # # print(k)
 # # test.debug_points(X[:, 0], y)
-X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
+# X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
-indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
+# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
 # indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
 # clf = CFImdlp(debug=True, proposed=False)
 # clf.fit(X, y)
 # print(clf.get_cut_points())
 # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
 # # To check
 # indices2 = np.argsort(X)
- Xs = np.array(X)[indices2]
+# Xs = np.array(X)[indices2]
- ys = np.array(y)[indices2]
+# ys = np.array(y)[indices2]