mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Fix test apply cutpoints
This commit is contained in:
@@ -63,20 +63,6 @@ namespace mdlp {
|
|||||||
applyCutPoints();
|
applyCutPoints();
|
||||||
return xDiscretized;
|
return xDiscretized;
|
||||||
}
|
}
|
||||||
void CPPFImdlp::debugPoints(samples& X_, labels& y_)
|
|
||||||
{
|
|
||||||
std::cout << "+++++++++++++++++++++++" << std::endl;
|
|
||||||
// for (auto i : sortIndices(X))
|
|
||||||
indices_t indices_n = sortIndices(X);
|
|
||||||
for (size_t i = 0; i < indices_n.size(); i++) {
|
|
||||||
printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices_n[i], X_[indices_n[i]], y_[indices_n[i]]);
|
|
||||||
}
|
|
||||||
std::cout << "+++++++++++++++++++++++" << std::endl;
|
|
||||||
fit(X_, y_);
|
|
||||||
for (auto item : cutPoints) {
|
|
||||||
std::cout << item.start << " X_[" << item.end << "]=" << X_[item.end] << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void CPPFImdlp::applyCutPoints()
|
void CPPFImdlp::applyCutPoints()
|
||||||
{
|
{
|
||||||
for (auto cut : cutPoints) {
|
for (auto cut : cutPoints) {
|
||||||
@@ -128,6 +114,7 @@ namespace mdlp {
|
|||||||
if (debug)
|
if (debug)
|
||||||
std::cout << "Accepted" << std::endl;
|
std::cout << "Accepted" << std::endl;
|
||||||
if (lastReject) {
|
if (lastReject) {
|
||||||
|
//Try to merge rejected intervals
|
||||||
if (first) {
|
if (first) {
|
||||||
item.fromValue = std::numeric_limits<float>::lowest();
|
item.fromValue = std::numeric_limits<float>::lowest();
|
||||||
item.start = indices[0];
|
item.start = indices[0];
|
||||||
@@ -141,6 +128,7 @@ namespace mdlp {
|
|||||||
filtered.push_back(item);
|
filtered.push_back(item);
|
||||||
first = false;
|
first = false;
|
||||||
rest.start = item.end;
|
rest.start = item.end;
|
||||||
|
lastReject = false;
|
||||||
} else {
|
} else {
|
||||||
if (debug)
|
if (debug)
|
||||||
std::cout << "Rejected" << std::endl;
|
std::cout << "Rejected" << std::endl;
|
||||||
@@ -153,7 +141,6 @@ namespace mdlp {
|
|||||||
} else {
|
} else {
|
||||||
filtered.push_back(rest);
|
filtered.push_back(rest);
|
||||||
}
|
}
|
||||||
|
|
||||||
cutPoints = filtered;
|
cutPoints = filtered;
|
||||||
}
|
}
|
||||||
void CPPFImdlp::computeCutPointsProposed()
|
void CPPFImdlp::computeCutPointsProposed()
|
||||||
@@ -190,7 +177,7 @@ namespace mdlp {
|
|||||||
while (idx < numElements && xCur == xPivot);
|
while (idx < numElements && xCur == xPivot);
|
||||||
if (yPivot == -1 || yPrev != yCur) {
|
if (yPivot == -1 || yPrev != yCur) {
|
||||||
cutPoint.start = start;
|
cutPoint.start = start;
|
||||||
cutPoint.end = idx - 1;
|
cutPoint.end = idx;
|
||||||
start = idx;
|
start = idx;
|
||||||
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
|
cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
|
||||||
cutPoint.toValue = (xPrev + xCur) / 2;
|
cutPoint.toValue = (xPrev + xCur) / 2;
|
||||||
@@ -214,8 +201,9 @@ namespace mdlp {
|
|||||||
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
||||||
cutPts.push_back(cutPoint);
|
cutPts.push_back(cutPoint);
|
||||||
}
|
}
|
||||||
for (auto cutPt : cutPts)
|
if (debug)
|
||||||
std::cout << "Cut point: " << cutPt;
|
for (auto cutPt : cutPts)
|
||||||
|
std::cout << "Proposed: Cut point: " << cutPt;
|
||||||
cutPoints = cutPts;
|
cutPoints = cutPts;
|
||||||
}
|
}
|
||||||
void CPPFImdlp::computeCutPointsOriginal()
|
void CPPFImdlp::computeCutPointsOriginal()
|
||||||
@@ -260,7 +248,7 @@ namespace mdlp {
|
|||||||
cutPts.back().end = X.size();
|
cutPts.back().end = X.size();
|
||||||
if (debug)
|
if (debug)
|
||||||
for (auto cutPt : cutPts)
|
for (auto cutPt : cutPts)
|
||||||
std::cout << "-Cut point: " << cutPt;
|
std::cout << "Original: Cut point: " << cutPt;
|
||||||
cutPoints = cutPts;
|
cutPoints = cutPts;
|
||||||
}
|
}
|
||||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||||
@@ -273,4 +261,12 @@ namespace mdlp {
|
|||||||
{ return X_[i1] < X_[i2]; });
|
{ return X_[i1] < X_[i2]; });
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
void CPPFImdlp::setCutPoints(cutPoints_t cutPoints_)
|
||||||
|
{
|
||||||
|
cutPoints = cutPoints_;
|
||||||
|
}
|
||||||
|
indices_t CPPFImdlp::getIndices()
|
||||||
|
{
|
||||||
|
return indices;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -17,6 +17,7 @@ namespace mdlp {
|
|||||||
cutPoints_t cutPoints;
|
cutPoints_t cutPoints;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
void setCutPoints(cutPoints_t);
|
||||||
static indices_t sortIndices(samples&);
|
static indices_t sortIndices(samples&);
|
||||||
void computeCutPointsOriginal();
|
void computeCutPointsOriginal();
|
||||||
void computeCutPointsProposed();
|
void computeCutPointsProposed();
|
||||||
@@ -29,6 +30,7 @@ namespace mdlp {
|
|||||||
CPPFImdlp(bool, int, bool debug = false);
|
CPPFImdlp(bool, int, bool debug = false);
|
||||||
~CPPFImdlp();
|
~CPPFImdlp();
|
||||||
cutPoints_t getCutPoints();
|
cutPoints_t getCutPoints();
|
||||||
|
indices_t getIndices();
|
||||||
labels getDiscretizedValues();
|
labels getDiscretizedValues();
|
||||||
void debugPoints(samples&, labels&);
|
void debugPoints(samples&, labels&);
|
||||||
CPPFImdlp& fit(samples&, labels&);
|
CPPFImdlp& fit(samples&, labels&);
|
||||||
|
@@ -15,7 +15,6 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp":
|
|||||||
vector[int] transform(vector[float]&)
|
vector[int] transform(vector[float]&)
|
||||||
vector[int] getDiscretizedValues()
|
vector[int] getDiscretizedValues()
|
||||||
vector[CutPointBody] getCutPoints()
|
vector[CutPointBody] getCutPoints()
|
||||||
void debugPoints(vector[float]&, vector[int]&)
|
|
||||||
|
|
||||||
|
|
||||||
class PcutPoint_t:
|
class PcutPoint_t:
|
||||||
@@ -41,6 +40,4 @@ cdef class CFImdlp:
|
|||||||
return self.thisptr.getDiscretizedValues()
|
return self.thisptr.getDiscretizedValues()
|
||||||
def get_cut_points(self):
|
def get_cut_points(self):
|
||||||
return self.thisptr.getCutPoints()
|
return self.thisptr.getCutPoints()
|
||||||
def debug_points(self, X, y):
|
|
||||||
return self.thisptr.debugPoints(X, y)
|
|
||||||
|
|
Binary file not shown.
@@ -4,7 +4,7 @@
|
|||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
class TestMetrics : public CPPFImdlp, public testing::Test {
|
class TestMetrics : public CPPFImdlp, public testing::Test {
|
||||||
public:
|
public:
|
||||||
TestMetrics() : CPPFImdlp(true, 6, false) {}
|
TestMetrics() : CPPFImdlp(true, 6, true) {}
|
||||||
indices_t indices; // sorted indices to use with X and y
|
indices_t indices; // sorted indices to use with X and y
|
||||||
samples X;
|
samples X;
|
||||||
labels y;
|
labels y;
|
||||||
@@ -13,6 +13,8 @@ namespace mdlp {
|
|||||||
float precision_test = 0.000001;
|
float precision_test = 0.000001;
|
||||||
void SetUp()
|
void SetUp()
|
||||||
{
|
{
|
||||||
|
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
||||||
|
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
||||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
@@ -82,8 +84,8 @@ namespace mdlp {
|
|||||||
expected = {
|
expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
||||||
};
|
};
|
||||||
X = {0, 1, 2, 2};
|
X = { 0, 1, 2, 2 };
|
||||||
y = {1, 1, 1, 2};
|
y = { 1, 1, 1, 2 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computeCutPointsOriginal();
|
computeCutPointsOriginal();
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
@@ -100,8 +102,8 @@ namespace mdlp {
|
|||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t computed, expected;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 3, -1, -3.4028234663852886e+38, 5.1 }, { 4, 4, -1, 5.1, 5.2 },
|
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 5, -1, 5.1, 5.2 },
|
||||||
{ 5, 5, -1, 5.2, 5.4 }, { 6, 8, -1, 5.4, 5.85 },
|
{ 5, 6, -1, 5.2, 5.4 }, { 6, 9, -1, 5.4, 5.85 },
|
||||||
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
||||||
};
|
};
|
||||||
computeCutPointsProposed();
|
computeCutPointsProposed();
|
||||||
@@ -119,11 +121,11 @@ namespace mdlp {
|
|||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t computed, expected;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 2, -1, -3.4028234663852886e+38, 1.5 },
|
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
||||||
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
||||||
};
|
};
|
||||||
X = {0, 1, 2, 2};
|
X = { 0, 1, 2, 2 };
|
||||||
y = {1, 1, 1, 2};
|
y = { 1, 1, 1, 2 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computeCutPointsProposed();
|
computeCutPointsProposed();
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
@@ -136,4 +138,23 @@ namespace mdlp {
|
|||||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
TEST_F(TestMetrics, ApplyCutPoints)
|
||||||
|
{
|
||||||
|
cutPoints_t expected = {
|
||||||
|
{ 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 },
|
||||||
|
{ 6, 8, 59, 5.4, 5.85 },
|
||||||
|
{ 8, 10, 41, 5.85, 3.4028234663852886e+38 }
|
||||||
|
};
|
||||||
|
setCutPoints(expected);
|
||||||
|
applyCutPoints();
|
||||||
|
labels expected_x = getDiscretizedValues();
|
||||||
|
indices_t indices_x = getIndices();
|
||||||
|
for (auto i = 0; i < 5; i++) {
|
||||||
|
std::cout << "cutPoint[" << i << "].start = " << expected[i].start << std::endl;
|
||||||
|
for (auto j = expected[i].start; j < expected[i].end; j++) {
|
||||||
|
std::cout << expected_x[j] << expected[i].classNumber << std::endl;
|
||||||
|
EXPECT_EQ(expected_x[indices_x[j]], expected[i].classNumber);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
80
sample.py
80
sample.py
@@ -65,31 +65,37 @@ features = data.feature_names
|
|||||||
# test.fit(X, y, features=features)
|
# test.fit(X, y, features=features)
|
||||||
# test.transform(X)
|
# test.transform(X)
|
||||||
# test.get_cut_points()
|
# test.get_cut_points()
|
||||||
|
for proposed in [True, False]:
|
||||||
test = CFImdlp(debug=False, proposed=False)
|
X = data.data
|
||||||
# # k = test.cut_points(X[:, 0], y)
|
y = data.target
|
||||||
# # print(k)
|
print("*** Proposed: ", proposed)
|
||||||
# # k = test.cut_points_ant(X[:, 0], y)
|
test = CFImdlp(debug=False, proposed=proposed)
|
||||||
# # print(k)
|
test.fit(X[:, 0], y)
|
||||||
# # test.debug_points(X[:, 0], y)
|
result = test.get_cut_points()
|
||||||
# X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
for item in result:
|
||||||
# indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
print(
|
||||||
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
||||||
# # To check
|
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
||||||
# indices2 = np.argsort(X)
|
)
|
||||||
# Xs = np.array(X)[indices2]
|
print(test.get_discretized_values())
|
||||||
# ys = np.array(y)[indices2]
|
print("+" * 40)
|
||||||
|
X = np.array(
|
||||||
test.fit(X[:, 0], y)
|
[
|
||||||
# test.fit(X, y)
|
[5.1, 3.5, 1.4, 0.2],
|
||||||
result = test.get_cut_points()
|
[5.2, 3.0, 1.4, 0.2],
|
||||||
# for item in result:
|
[5.3, 3.2, 1.3, 0.2],
|
||||||
# print(
|
[5.4, 3.1, 1.5, 0.2],
|
||||||
# f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
]
|
||||||
# f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
)
|
||||||
# )
|
y = np.array([0, 0, 0, 1])
|
||||||
print(test.get_discretized_values())
|
print(test.fit(X[:, 0], y).transform(X[:, 0]))
|
||||||
|
result = test.get_cut_points()
|
||||||
|
for item in result:
|
||||||
|
print(
|
||||||
|
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
||||||
|
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
||||||
|
)
|
||||||
|
print("*" * 40)
|
||||||
# print(Xs, ys)
|
# print(Xs, ys)
|
||||||
# print("**********************")
|
# print("**********************")
|
||||||
# test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)]
|
# test = [(0, 3), (4, 4), (5, 5), (6, 8), (9, 9)]
|
||||||
@@ -102,13 +108,17 @@ print(test.get_discretized_values())
|
|||||||
# print(indices)
|
# print(indices)
|
||||||
# print(np.array(X)[indices])
|
# print(np.array(X)[indices])
|
||||||
|
|
||||||
X = np.array(
|
|
||||||
[
|
# # k = test.cut_points(X[:, 0], y)
|
||||||
[5.1, 3.5, 1.4, 0.2],
|
# # print(k)
|
||||||
[5.2, 3.0, 1.4, 0.2],
|
# # k = test.cut_points_ant(X[:, 0], y)
|
||||||
[5.3, 3.2, 1.3, 0.2],
|
# # print(k)
|
||||||
[5.3, 3.1, 1.5, 0.2],
|
# # test.debug_points(X[:, 0], y)
|
||||||
]
|
X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
||||||
)
|
indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
||||||
y = np.array([0, 0, 0, 1])
|
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||||
print(test.fit(X[:, 0], y).transform(X[:, 0]))
|
# # To check
|
||||||
|
# indices2 = np.argsort(X)
|
||||||
|
Xs = np.array(X)[indices2]
|
||||||
|
ys = np.array(y)[indices2]
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user