diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index 5bd7829..3e21d38 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -63,20 +63,6 @@ namespace mdlp { applyCutPoints(); return xDiscretized; } - void CPPFImdlp::debugPoints(samples& X_, labels& y_) - { - std::cout << "+++++++++++++++++++++++" << std::endl; - // for (auto i : sortIndices(X)) - indices_t indices_n = sortIndices(X); - for (size_t i = 0; i < indices_n.size(); i++) { - printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices_n[i], X_[indices_n[i]], y_[indices_n[i]]); - } - std::cout << "+++++++++++++++++++++++" << std::endl; - fit(X_, y_); - for (auto item : cutPoints) { - std::cout << item.start << " X_[" << item.end << "]=" << X_[item.end] << std::endl; - } - } void CPPFImdlp::applyCutPoints() { for (auto cut : cutPoints) { @@ -128,6 +114,7 @@ namespace mdlp { if (debug) std::cout << "Accepted" << std::endl; if (lastReject) { + //Try to merge rejected intervals if (first) { item.fromValue = std::numeric_limits::lowest(); item.start = indices[0]; @@ -141,6 +128,7 @@ namespace mdlp { filtered.push_back(item); first = false; rest.start = item.end; + lastReject = false; } else { if (debug) std::cout << "Rejected" << std::endl; @@ -153,7 +141,6 @@ namespace mdlp { } else { filtered.push_back(rest); } - cutPoints = filtered; } void CPPFImdlp::computeCutPointsProposed() @@ -190,7 +177,7 @@ namespace mdlp { while (idx < numElements && xCur == xPivot); if (yPivot == -1 || yPrev != yCur) { cutPoint.start = start; - cutPoint.end = idx - 1; + cutPoint.end = idx; start = idx; cutPoint.fromValue = firstCutPoint ? std::numeric_limits::lowest() : cutPts.back().toValue; cutPoint.toValue = (xPrev + xCur) / 2; @@ -214,8 +201,9 @@ namespace mdlp { printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); cutPts.push_back(cutPoint); } - for (auto cutPt : cutPts) - std::cout << "Cut point: " << cutPt; + if (debug) + for (auto cutPt : cutPts) + std::cout << "Proposed: Cut point: " << cutPt; cutPoints = cutPts; } void CPPFImdlp::computeCutPointsOriginal() @@ -260,7 +248,7 @@ namespace mdlp { cutPts.back().end = X.size(); if (debug) for (auto cutPt : cutPts) - std::cout << "-Cut point: " << cutPt; + std::cout << "Original: Cut point: " << cutPt; cutPoints = cutPts; } // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes @@ -273,4 +261,12 @@ namespace mdlp { { return X_[i1] < X_[i2]; }); return idx; } + void CPPFImdlp::setCutPoints(cutPoints_t cutPoints_) + { + cutPoints = cutPoints_; + } + indices_t CPPFImdlp::getIndices() + { + return indices; + } } diff --git a/fimdlp/CPPFImdlp.h b/fimdlp/CPPFImdlp.h index 08ac0ff..97d7c35 100644 --- a/fimdlp/CPPFImdlp.h +++ b/fimdlp/CPPFImdlp.h @@ -17,6 +17,7 @@ namespace mdlp { cutPoints_t cutPoints; protected: + void setCutPoints(cutPoints_t); static indices_t sortIndices(samples&); void computeCutPointsOriginal(); void computeCutPointsProposed(); @@ -29,6 +30,7 @@ namespace mdlp { CPPFImdlp(bool, int, bool debug = false); ~CPPFImdlp(); cutPoints_t getCutPoints(); + indices_t getIndices(); labels getDiscretizedValues(); void debugPoints(samples&, labels&); CPPFImdlp& fit(samples&, labels&); diff --git a/fimdlp/cfimdlp.pyx b/fimdlp/cfimdlp.pyx index b4d553b..5093f96 100644 --- a/fimdlp/cfimdlp.pyx +++ b/fimdlp/cfimdlp.pyx @@ -15,7 +15,6 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp": vector[int] transform(vector[float]&) vector[int] getDiscretizedValues() vector[CutPointBody] getCutPoints() - void debugPoints(vector[float]&, vector[int]&) class PcutPoint_t: @@ -41,6 +40,4 @@ cdef class CFImdlp: return self.thisptr.getDiscretizedValues() def get_cut_points(self): return self.thisptr.getCutPoints() - def debug_points(self, X, y): - return self.thisptr.debugPoints(X, y) \ No newline at end of file diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index 4cef9e9..2ec680b 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/testcpp/FImdlp_unittest.cc b/fimdlp/testcpp/FImdlp_unittest.cc index 3be0b78..67744d7 100644 --- a/fimdlp/testcpp/FImdlp_unittest.cc +++ b/fimdlp/testcpp/FImdlp_unittest.cc @@ -4,7 +4,7 @@ namespace mdlp { class TestMetrics : public CPPFImdlp, public testing::Test { public: - TestMetrics() : CPPFImdlp(true, 6, false) {} + TestMetrics() : CPPFImdlp(true, 6, true) {} indices_t indices; // sorted indices to use with X and y samples X; labels y; @@ -13,6 +13,8 @@ namespace mdlp { float precision_test = 0.000001; void SetUp() { + // 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0] + //(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2) X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; fit(X, y); @@ -82,8 +84,8 @@ namespace mdlp { expected = { { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 }, }; - X = {0, 1, 2, 2}; - y = {1, 1, 1, 2}; + X = { 0, 1, 2, 2 }; + y = { 1, 1, 1, 2 }; fit(X, y); computeCutPointsOriginal(); computed = getCutPoints(); @@ -100,8 +102,8 @@ namespace mdlp { { cutPoints_t computed, expected; expected = { - { 0, 3, -1, -3.4028234663852886e+38, 5.1 }, { 4, 4, -1, 5.1, 5.2 }, - { 5, 5, -1, 5.2, 5.4 }, { 6, 8, -1, 5.4, 5.85 }, + { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 5, -1, 5.1, 5.2 }, + { 5, 6, -1, 5.2, 5.4 }, { 6, 9, -1, 5.4, 5.85 }, { 9, 10, -1, 5.85, 3.4028234663852886e+38 } }; computeCutPointsProposed(); @@ -119,11 +121,11 @@ namespace mdlp { { cutPoints_t computed, expected; expected = { - { 0, 2, -1, -3.4028234663852886e+38, 1.5 }, + { 0, 3, -1, -3.4028234663852886e+38, 1.5 }, { 3, 4, -1, 1.5, 3.4028234663852886e+38 } }; - X = {0, 1, 2, 2}; - y = {1, 1, 1, 2}; + X = { 0, 1, 2, 2 }; + y = { 1, 1, 1, 2 }; fit(X, y); computeCutPointsProposed(); computed = getCutPoints(); @@ -136,4 +138,23 @@ namespace mdlp { EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); } } + TEST_F(TestMetrics, ApplyCutPoints) + { + cutPoints_t expected = { + { 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 }, + { 6, 8, 59, 5.4, 5.85 }, + { 8, 10, 41, 5.85, 3.4028234663852886e+38 } + }; + setCutPoints(expected); + applyCutPoints(); + labels expected_x = getDiscretizedValues(); + indices_t indices_x = getIndices(); + for (auto i = 0; i < 5; i++) { + std::cout << "cutPoint[" << i << "].start = " << expected[i].start << std::endl; + for (auto j = expected[i].start; j < expected[i].end; j++) { + std::cout << expected_x[j] << expected[i].classNumber << std::endl; + EXPECT_EQ(expected_x[indices_x[j]], expected[i].classNumber); + } + } + } } \ No newline at end of file diff --git a/sample.py b/sample.py index ce2db6c..ffb8f01 100644 --- a/sample.py +++ b/sample.py @@ -65,31 +65,37 @@ features = data.feature_names # test.fit(X, y, features=features) # test.transform(X) # test.get_cut_points() - -test = CFImdlp(debug=False, proposed=False) -# # k = test.cut_points(X[:, 0], y) -# # print(k) -# # k = test.cut_points_ant(X[:, 0], y) -# # print(k) -# # test.debug_points(X[:, 0], y) -# X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9] -# indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7] -# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2] -# # To check -# indices2 = np.argsort(X) -# Xs = np.array(X)[indices2] -# ys = np.array(y)[indices2] - -test.fit(X[:, 0], y) -# test.fit(X, y) -result = test.get_cut_points() -# for item in result: -# print( -# f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" -# f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" -# ) -print(test.get_discretized_values()) - +for proposed in [True, False]: + X = data.data + y = data.target + print("*** Proposed: ", proposed) + test = CFImdlp(debug=False, proposed=proposed) + test.fit(X[:, 0], y) + result = test.get_cut_points() + for item in result: + print( + f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" + f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" + ) + print(test.get_discretized_values()) + print("+" * 40) + X = np.array( + [ + [5.1, 3.5, 1.4, 0.2], + [5.2, 3.0, 1.4, 0.2], + [5.3, 3.2, 1.3, 0.2], + [5.4, 3.1, 1.5, 0.2], + ] + ) + y = np.array([0, 0, 0, 1]) + print(test.fit(X[:, 0], y).transform(X[:, 0])) + result = test.get_cut_points() + for item in result: + print( + f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" + f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" + ) + print("*" * 40) # print(Xs, ys) # print("**********************") # test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)] @@ -102,13 +108,17 @@ print(test.get_discretized_values()) # print(indices) # print(np.array(X)[indices]) -X = np.array( - [ - [5.1, 3.5, 1.4, 0.2], - [5.2, 3.0, 1.4, 0.2], - [5.3, 3.2, 1.3, 0.2], - [5.3, 3.1, 1.5, 0.2], - ] -) -y = np.array([0, 0, 0, 1]) -print(test.fit(X[:, 0], y).transform(X[:, 0])) + +# # k = test.cut_points(X[:, 0], y) +# # print(k) +# # k = test.cut_points_ant(X[:, 0], y) +# # print(k) +# # test.debug_points(X[:, 0], y) +X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9] +indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7] +# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2] +# # To check +# indices2 = np.argsort(X) + Xs = np.array(X)[indices2] + ys = np.array(y)[indices2] +