mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-16 16:05:52 +00:00
Fix test apply cutpoints
This commit is contained in:
@@ -63,20 +63,6 @@ namespace mdlp {
|
||||
applyCutPoints();
|
||||
return xDiscretized;
|
||||
}
|
||||
void CPPFImdlp::debugPoints(samples& X_, labels& y_)
|
||||
{
|
||||
std::cout << "+++++++++++++++++++++++" << std::endl;
|
||||
// for (auto i : sortIndices(X))
|
||||
indices_t indices_n = sortIndices(X);
|
||||
for (size_t i = 0; i < indices_n.size(); i++) {
|
||||
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices_n[i], X_[indices_n[i]], y_[indices_n[i]]);
|
||||
}
|
||||
std::cout << "+++++++++++++++++++++++" << std::endl;
|
||||
fit(X_, y_);
|
||||
for (auto item : cutPoints) {
|
||||
std::cout << item.start << " X_[" << item.end << "]=" << X_[item.end] << std::endl;
|
||||
}
|
||||
}
|
||||
void CPPFImdlp::applyCutPoints()
|
||||
{
|
||||
for (auto cut : cutPoints) {
|
||||
@@ -128,6 +114,7 @@ namespace mdlp {
|
||||
if (debug)
|
||||
std::cout << "Accepted" << std::endl;
|
||||
if (lastReject) {
|
||||
//Try to merge rejected intervals
|
||||
if (first) {
|
||||
item.fromValue = std::numeric_limits<float>::lowest();
|
||||
item.start = indices[0];
|
||||
@@ -141,6 +128,7 @@ namespace mdlp {
|
||||
filtered.push_back(item);
|
||||
first = false;
|
||||
rest.start = item.end;
|
||||
lastReject = false;
|
||||
} else {
|
||||
if (debug)
|
||||
std::cout << "Rejected" << std::endl;
|
||||
@@ -153,7 +141,6 @@ namespace mdlp {
|
||||
} else {
|
||||
filtered.push_back(rest);
|
||||
}
|
||||
|
||||
cutPoints = filtered;
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsProposed()
|
||||
@@ -190,7 +177,7 @@ namespace mdlp {
|
||||
while (idx < numElements && xCur == xPivot);
|
||||
if (yPivot == -1 || yPrev != yCur) {
|
||||
cutPoint.start = start;
|
||||
cutPoint.end = idx - 1;
|
||||
cutPoint.end = idx;
|
||||
start = idx;
|
||||
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
|
||||
cutPoint.toValue = (xPrev + xCur) / 2;
|
||||
@@ -214,8 +201,9 @@ namespace mdlp {
|
||||
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
||||
cutPts.push_back(cutPoint);
|
||||
}
|
||||
for (auto cutPt : cutPts)
|
||||
std::cout << "Cut point: " << cutPt;
|
||||
if (debug)
|
||||
for (auto cutPt : cutPts)
|
||||
std::cout << "Proposed: Cut point: " << cutPt;
|
||||
cutPoints = cutPts;
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsOriginal()
|
||||
@@ -260,7 +248,7 @@ namespace mdlp {
|
||||
cutPts.back().end = X.size();
|
||||
if (debug)
|
||||
for (auto cutPt : cutPts)
|
||||
std::cout << "-Cut point: " << cutPt;
|
||||
std::cout << "Original: Cut point: " << cutPt;
|
||||
cutPoints = cutPts;
|
||||
}
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
@@ -273,4 +261,12 @@ namespace mdlp {
|
||||
{ return X_[i1] < X_[i2]; });
|
||||
return idx;
|
||||
}
|
||||
void CPPFImdlp::setCutPoints(cutPoints_t cutPoints_)
|
||||
{
|
||||
cutPoints = cutPoints_;
|
||||
}
|
||||
indices_t CPPFImdlp::getIndices()
|
||||
{
|
||||
return indices;
|
||||
}
|
||||
}
|
||||
|
@@ -17,6 +17,7 @@ namespace mdlp {
|
||||
cutPoints_t cutPoints;
|
||||
|
||||
protected:
|
||||
void setCutPoints(cutPoints_t);
|
||||
static indices_t sortIndices(samples&);
|
||||
void computeCutPointsOriginal();
|
||||
void computeCutPointsProposed();
|
||||
@@ -29,6 +30,7 @@ namespace mdlp {
|
||||
CPPFImdlp(bool, int, bool debug = false);
|
||||
~CPPFImdlp();
|
||||
cutPoints_t getCutPoints();
|
||||
indices_t getIndices();
|
||||
labels getDiscretizedValues();
|
||||
void debugPoints(samples&, labels&);
|
||||
CPPFImdlp& fit(samples&, labels&);
|
||||
|
@@ -15,7 +15,6 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp":
|
||||
vector[int] transform(vector[float]&)
|
||||
vector[int] getDiscretizedValues()
|
||||
vector[CutPointBody] getCutPoints()
|
||||
void debugPoints(vector[float]&, vector[int]&)
|
||||
|
||||
|
||||
class PcutPoint_t:
|
||||
@@ -41,6 +40,4 @@ cdef class CFImdlp:
|
||||
return self.thisptr.getDiscretizedValues()
|
||||
def get_cut_points(self):
|
||||
return self.thisptr.getCutPoints()
|
||||
def debug_points(self, X, y):
|
||||
return self.thisptr.debugPoints(X, y)
|
||||
|
Binary file not shown.
@@ -4,7 +4,7 @@
|
||||
namespace mdlp {
|
||||
class TestMetrics : public CPPFImdlp, public testing::Test {
|
||||
public:
|
||||
TestMetrics() : CPPFImdlp(true, 6, false) {}
|
||||
TestMetrics() : CPPFImdlp(true, 6, true) {}
|
||||
indices_t indices; // sorted indices to use with X and y
|
||||
samples X;
|
||||
labels y;
|
||||
@@ -13,6 +13,8 @@ namespace mdlp {
|
||||
float precision_test = 0.000001;
|
||||
void SetUp()
|
||||
{
|
||||
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
||||
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
fit(X, y);
|
||||
@@ -82,8 +84,8 @@ namespace mdlp {
|
||||
expected = {
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
||||
};
|
||||
X = {0, 1, 2, 2};
|
||||
y = {1, 1, 1, 2};
|
||||
X = { 0, 1, 2, 2 };
|
||||
y = { 1, 1, 1, 2 };
|
||||
fit(X, y);
|
||||
computeCutPointsOriginal();
|
||||
computed = getCutPoints();
|
||||
@@ -100,8 +102,8 @@ namespace mdlp {
|
||||
{
|
||||
cutPoints_t computed, expected;
|
||||
expected = {
|
||||
{ 0, 3, -1, -3.4028234663852886e+38, 5.1 }, { 4, 4, -1, 5.1, 5.2 },
|
||||
{ 5, 5, -1, 5.2, 5.4 }, { 6, 8, -1, 5.4, 5.85 },
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 5, -1, 5.1, 5.2 },
|
||||
{ 5, 6, -1, 5.2, 5.4 }, { 6, 9, -1, 5.4, 5.85 },
|
||||
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
||||
};
|
||||
computeCutPointsProposed();
|
||||
@@ -119,11 +121,11 @@ namespace mdlp {
|
||||
{
|
||||
cutPoints_t computed, expected;
|
||||
expected = {
|
||||
{ 0, 2, -1, -3.4028234663852886e+38, 1.5 },
|
||||
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
||||
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
||||
};
|
||||
X = {0, 1, 2, 2};
|
||||
y = {1, 1, 1, 2};
|
||||
X = { 0, 1, 2, 2 };
|
||||
y = { 1, 1, 1, 2 };
|
||||
fit(X, y);
|
||||
computeCutPointsProposed();
|
||||
computed = getCutPoints();
|
||||
@@ -136,4 +138,23 @@ namespace mdlp {
|
||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
||||
}
|
||||
}
|
||||
TEST_F(TestMetrics, ApplyCutPoints)
|
||||
{
|
||||
cutPoints_t expected = {
|
||||
{ 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 },
|
||||
{ 6, 8, 59, 5.4, 5.85 },
|
||||
{ 8, 10, 41, 5.85, 3.4028234663852886e+38 }
|
||||
};
|
||||
setCutPoints(expected);
|
||||
applyCutPoints();
|
||||
labels expected_x = getDiscretizedValues();
|
||||
indices_t indices_x = getIndices();
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
std::cout << "cutPoint[" << i << "].start = " << expected[i].start << std::endl;
|
||||
for (auto j = expected[i].start; j < expected[i].end; j++) {
|
||||
std::cout << expected_x[j] << expected[i].classNumber << std::endl;
|
||||
EXPECT_EQ(expected_x[indices_x[j]], expected[i].classNumber);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
80
sample.py
80
sample.py
@@ -65,31 +65,37 @@ features = data.feature_names
|
||||
# test.fit(X, y, features=features)
|
||||
# test.transform(X)
|
||||
# test.get_cut_points()
|
||||
|
||||
test = CFImdlp(debug=False, proposed=False)
|
||||
# # k = test.cut_points(X[:, 0], y)
|
||||
# # print(k)
|
||||
# # k = test.cut_points_ant(X[:, 0], y)
|
||||
# # print(k)
|
||||
# # test.debug_points(X[:, 0], y)
|
||||
# X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
||||
# indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
||||
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||
# # To check
|
||||
# indices2 = np.argsort(X)
|
||||
# Xs = np.array(X)[indices2]
|
||||
# ys = np.array(y)[indices2]
|
||||
|
||||
test.fit(X[:, 0], y)
|
||||
# test.fit(X, y)
|
||||
result = test.get_cut_points()
|
||||
# for item in result:
|
||||
# print(
|
||||
# f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
||||
# f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
||||
# )
|
||||
print(test.get_discretized_values())
|
||||
|
||||
for proposed in [True, False]:
|
||||
X = data.data
|
||||
y = data.target
|
||||
print("*** Proposed: ", proposed)
|
||||
test = CFImdlp(debug=False, proposed=proposed)
|
||||
test.fit(X[:, 0], y)
|
||||
result = test.get_cut_points()
|
||||
for item in result:
|
||||
print(
|
||||
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
||||
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
||||
)
|
||||
print(test.get_discretized_values())
|
||||
print("+" * 40)
|
||||
X = np.array(
|
||||
[
|
||||
[5.1, 3.5, 1.4, 0.2],
|
||||
[5.2, 3.0, 1.4, 0.2],
|
||||
[5.3, 3.2, 1.3, 0.2],
|
||||
[5.4, 3.1, 1.5, 0.2],
|
||||
]
|
||||
)
|
||||
y = np.array([0, 0, 0, 1])
|
||||
print(test.fit(X[:, 0], y).transform(X[:, 0]))
|
||||
result = test.get_cut_points()
|
||||
for item in result:
|
||||
print(
|
||||
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
||||
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
||||
)
|
||||
print("*" * 40)
|
||||
# print(Xs, ys)
|
||||
# print("**********************")
|
||||
# test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)]
|
||||
@@ -102,13 +108,17 @@ print(test.get_discretized_values())
|
||||
# print(indices)
|
||||
# print(np.array(X)[indices])
|
||||
|
||||
X = np.array(
|
||||
[
|
||||
[5.1, 3.5, 1.4, 0.2],
|
||||
[5.2, 3.0, 1.4, 0.2],
|
||||
[5.3, 3.2, 1.3, 0.2],
|
||||
[5.3, 3.1, 1.5, 0.2],
|
||||
]
|
||||
)
|
||||
y = np.array([0, 0, 0, 1])
|
||||
print(test.fit(X[:, 0], y).transform(X[:, 0]))
|
||||
|
||||
# # k = test.cut_points(X[:, 0], y)
|
||||
# # print(k)
|
||||
# # k = test.cut_points_ant(X[:, 0], y)
|
||||
# # print(k)
|
||||
# # test.debug_points(X[:, 0], y)
|
||||
X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
||||
indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
||||
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||
# # To check
|
||||
# indices2 = np.argsort(X)
|
||||
Xs = np.array(X)[indices2]
|
||||
ys = np.array(y)[indices2]
|
||||
|
||||
|
Reference in New Issue
Block a user