Fix test apply cutpoints

This commit is contained in:
2022-12-03 01:52:00 +01:00
parent b1f5d337fc
commit e99852c5d5
6 changed files with 91 additions and 65 deletions

View File

@@ -63,20 +63,6 @@ namespace mdlp {
applyCutPoints(); applyCutPoints();
return xDiscretized; return xDiscretized;
} }
void CPPFImdlp::debugPoints(samples& X_, labels& y_)
{
std::cout << "+++++++++++++++++++++++" << std::endl;
// for (auto i : sortIndices(X))
indices_t indices_n = sortIndices(X);
for (size_t i = 0; i < indices_n.size(); i++) {
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices_n[i], X_[indices_n[i]], y_[indices_n[i]]);
}
std::cout << "+++++++++++++++++++++++" << std::endl;
fit(X_, y_);
for (auto item : cutPoints) {
std::cout << item.start << " X_[" << item.end << "]=" << X_[item.end] << std::endl;
}
}
void CPPFImdlp::applyCutPoints() void CPPFImdlp::applyCutPoints()
{ {
for (auto cut : cutPoints) { for (auto cut : cutPoints) {
@@ -128,6 +114,7 @@ namespace mdlp {
if (debug) if (debug)
std::cout << "Accepted" << std::endl; std::cout << "Accepted" << std::endl;
if (lastReject) { if (lastReject) {
//Try to merge rejected intervals
if (first) { if (first) {
item.fromValue = std::numeric_limits<float>::lowest(); item.fromValue = std::numeric_limits<float>::lowest();
item.start = indices[0]; item.start = indices[0];
@@ -141,6 +128,7 @@ namespace mdlp {
filtered.push_back(item); filtered.push_back(item);
first = false; first = false;
rest.start = item.end; rest.start = item.end;
lastReject = false;
} else { } else {
if (debug) if (debug)
std::cout << "Rejected" << std::endl; std::cout << "Rejected" << std::endl;
@@ -153,7 +141,6 @@ namespace mdlp {
} else { } else {
filtered.push_back(rest); filtered.push_back(rest);
} }
cutPoints = filtered; cutPoints = filtered;
} }
void CPPFImdlp::computeCutPointsProposed() void CPPFImdlp::computeCutPointsProposed()
@@ -190,7 +177,7 @@ namespace mdlp {
while (idx < numElements && xCur == xPivot); while (idx < numElements && xCur == xPivot);
if (yPivot == -1 || yPrev != yCur) { if (yPivot == -1 || yPrev != yCur) {
cutPoint.start = start; cutPoint.start = start;
cutPoint.end = idx - 1; cutPoint.end = idx;
start = idx; start = idx;
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue; cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
cutPoint.toValue = (xPrev + xCur) / 2; cutPoint.toValue = (xPrev + xCur) / 2;
@@ -214,8 +201,9 @@ namespace mdlp {
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
cutPts.push_back(cutPoint); cutPts.push_back(cutPoint);
} }
if (debug)
for (auto cutPt : cutPts) for (auto cutPt : cutPts)
std::cout << "Cut point: " << cutPt; std::cout << "Proposed: Cut point: " << cutPt;
cutPoints = cutPts; cutPoints = cutPts;
} }
void CPPFImdlp::computeCutPointsOriginal() void CPPFImdlp::computeCutPointsOriginal()
@@ -260,7 +248,7 @@ namespace mdlp {
cutPts.back().end = X.size(); cutPts.back().end = X.size();
if (debug) if (debug)
for (auto cutPt : cutPts) for (auto cutPt : cutPts)
std::cout << "-Cut point: " << cutPt; std::cout << "Original: Cut point: " << cutPt;
cutPoints = cutPts; cutPoints = cutPts;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
@@ -273,4 +261,12 @@ namespace mdlp {
{ return X_[i1] < X_[i2]; }); { return X_[i1] < X_[i2]; });
return idx; return idx;
} }
void CPPFImdlp::setCutPoints(cutPoints_t cutPoints_)
{
cutPoints = cutPoints_;
}
indices_t CPPFImdlp::getIndices()
{
return indices;
}
} }

View File

@@ -17,6 +17,7 @@ namespace mdlp {
cutPoints_t cutPoints; cutPoints_t cutPoints;
protected: protected:
void setCutPoints(cutPoints_t);
static indices_t sortIndices(samples&); static indices_t sortIndices(samples&);
void computeCutPointsOriginal(); void computeCutPointsOriginal();
void computeCutPointsProposed(); void computeCutPointsProposed();
@@ -29,6 +30,7 @@ namespace mdlp {
CPPFImdlp(bool, int, bool debug = false); CPPFImdlp(bool, int, bool debug = false);
~CPPFImdlp(); ~CPPFImdlp();
cutPoints_t getCutPoints(); cutPoints_t getCutPoints();
indices_t getIndices();
labels getDiscretizedValues(); labels getDiscretizedValues();
void debugPoints(samples&, labels&); void debugPoints(samples&, labels&);
CPPFImdlp& fit(samples&, labels&); CPPFImdlp& fit(samples&, labels&);

View File

@@ -15,7 +15,6 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp":
vector[int] transform(vector[float]&) vector[int] transform(vector[float]&)
vector[int] getDiscretizedValues() vector[int] getDiscretizedValues()
vector[CutPointBody] getCutPoints() vector[CutPointBody] getCutPoints()
void debugPoints(vector[float]&, vector[int]&)
class PcutPoint_t: class PcutPoint_t:
@@ -41,6 +40,4 @@ cdef class CFImdlp:
return self.thisptr.getDiscretizedValues() return self.thisptr.getDiscretizedValues()
def get_cut_points(self): def get_cut_points(self):
return self.thisptr.getCutPoints() return self.thisptr.getCutPoints()
def debug_points(self, X, y):
return self.thisptr.debugPoints(X, y)

View File

@@ -4,7 +4,7 @@
namespace mdlp { namespace mdlp {
class TestMetrics : public CPPFImdlp, public testing::Test { class TestMetrics : public CPPFImdlp, public testing::Test {
public: public:
TestMetrics() : CPPFImdlp(true, 6, false) {} TestMetrics() : CPPFImdlp(true, 6, true) {}
indices_t indices; // sorted indices to use with X and y indices_t indices; // sorted indices to use with X and y
samples X; samples X;
labels y; labels y;
@@ -13,6 +13,8 @@ namespace mdlp {
float precision_test = 0.000001; float precision_test = 0.000001;
void SetUp() void SetUp()
{ {
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
fit(X, y); fit(X, y);
@@ -100,8 +102,8 @@ namespace mdlp {
{ {
cutPoints_t computed, expected; cutPoints_t computed, expected;
expected = { expected = {
{ 0, 3, -1, -3.4028234663852886e+38, 5.1 }, { 4, 4, -1, 5.1, 5.2 }, { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 5, -1, 5.1, 5.2 },
{ 5, 5, -1, 5.2, 5.4 }, { 6, 8, -1, 5.4, 5.85 }, { 5, 6, -1, 5.2, 5.4 }, { 6, 9, -1, 5.4, 5.85 },
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 } { 9, 10, -1, 5.85, 3.4028234663852886e+38 }
}; };
computeCutPointsProposed(); computeCutPointsProposed();
@@ -119,7 +121,7 @@ namespace mdlp {
{ {
cutPoints_t computed, expected; cutPoints_t computed, expected;
expected = { expected = {
{ 0, 2, -1, -3.4028234663852886e+38, 1.5 }, { 0, 3, -1, -3.4028234663852886e+38, 1.5 },
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 } { 3, 4, -1, 1.5, 3.4028234663852886e+38 }
}; };
X = { 0, 1, 2, 2 }; X = { 0, 1, 2, 2 };
@@ -136,4 +138,23 @@ namespace mdlp {
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
} }
} }
TEST_F(TestMetrics, ApplyCutPoints)
{
cutPoints_t expected = {
{ 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 },
{ 6, 8, 59, 5.4, 5.85 },
{ 8, 10, 41, 5.85, 3.4028234663852886e+38 }
};
setCutPoints(expected);
applyCutPoints();
labels expected_x = getDiscretizedValues();
indices_t indices_x = getIndices();
for (auto i = 0; i < 5; i++) {
std::cout << "cutPoint[" << i << "].start = " << expected[i].start << std::endl;
for (auto j = expected[i].start; j < expected[i].end; j++) {
std::cout << expected_x[j] << expected[i].classNumber << std::endl;
EXPECT_EQ(expected_x[indices_x[j]], expected[i].classNumber);
}
}
}
} }

View File

@@ -65,31 +65,37 @@ features = data.feature_names
# test.fit(X, y, features=features) # test.fit(X, y, features=features)
# test.transform(X) # test.transform(X)
# test.get_cut_points() # test.get_cut_points()
for proposed in [True, False]:
test = CFImdlp(debug=False, proposed=False) X = data.data
# # k = test.cut_points(X[:, 0], y) y = data.target
# # print(k) print("*** Proposed: ", proposed)
# # k = test.cut_points_ant(X[:, 0], y) test = CFImdlp(debug=False, proposed=proposed)
# # print(k)
# # test.debug_points(X[:, 0], y)
# X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
# indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
# # To check
# indices2 = np.argsort(X)
# Xs = np.array(X)[indices2]
# ys = np.array(y)[indices2]
test.fit(X[:, 0], y) test.fit(X[:, 0], y)
# test.fit(X, y)
result = test.get_cut_points() result = test.get_cut_points()
# for item in result: for item in result:
# print( print(
# f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
# f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
# ) )
print(test.get_discretized_values()) print(test.get_discretized_values())
print("+" * 40)
X = np.array(
[
[5.1, 3.5, 1.4, 0.2],
[5.2, 3.0, 1.4, 0.2],
[5.3, 3.2, 1.3, 0.2],
[5.4, 3.1, 1.5, 0.2],
]
)
y = np.array([0, 0, 0, 1])
print(test.fit(X[:, 0], y).transform(X[:, 0]))
result = test.get_cut_points()
for item in result:
print(
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
)
print("*" * 40)
# print(Xs, ys) # print(Xs, ys)
# print("**********************") # print("**********************")
# test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)] # test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)]
@@ -102,13 +108,17 @@ print(test.get_discretized_values())
# print(indices) # print(indices)
# print(np.array(X)[indices]) # print(np.array(X)[indices])
X = np.array(
[ # # k = test.cut_points(X[:, 0], y)
[5.1, 3.5, 1.4, 0.2], # # print(k)
[5.2, 3.0, 1.4, 0.2], # # k = test.cut_points_ant(X[:, 0], y)
[5.3, 3.2, 1.3, 0.2], # # print(k)
[5.3, 3.1, 1.5, 0.2], # # test.debug_points(X[:, 0], y)
] X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
) indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
y = np.array([0, 0, 0, 1]) # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
print(test.fit(X[:, 0], y).transform(X[:, 0])) # # To check
# indices2 = np.argsort(X)
Xs = np.array(X)[indices2]
ys = np.array(y)[indices2]