Refactor tests

This commit is contained in:
2022-12-04 01:45:32 +01:00
parent 5cce895177
commit 9ce10131d6
5 changed files with 82 additions and 67 deletions

View File

@@ -175,7 +175,10 @@ namespace mdlp {
printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur); printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
} }
while (idx < numElements && xCur == xPivot); while (idx < numElements && xCur == xPivot);
if (yPivot == -1 || yPrev != yCur) { // Check if the class changed and there are more than 1 element
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur)) {
// Must we add the entropy criteria here?
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
cutPoint.start = start; cutPoint.start = start;
cutPoint.end = idx; cutPoint.end = idx;
start = idx; start = idx;
@@ -201,9 +204,11 @@ namespace mdlp {
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
cutPts.push_back(cutPoint); cutPts.push_back(cutPoint);
} }
if (debug) if (debug) {
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << std::endl;
for (auto cutPt : cutPts) for (auto cutPt : cutPts)
std::cout << "Proposed: Cut point: " << cutPt; std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposed: Cut point: " << cutPt;
}
cutPoints = cutPts; cutPoints = cutPts;
} }
void CPPFImdlp::computeCutPointsOriginal() void CPPFImdlp::computeCutPointsOriginal()
@@ -219,8 +224,11 @@ namespace mdlp {
yPrev = y[idx]; yPrev = y[idx];
for (index = 0; index < size_t(indices.size()) - 1; index++) { for (index = 0; index < size_t(indices.size()) - 1; index++) {
idx = indices[index]; idx = indices[index];
// Definition 2 Cut points are always on boundaries // Definition 2 Cut points are always on class boundaries &&
if (y[idx] != yPrev && xPrev < X[idx]) { // there are more than 1 items in the interval
if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1) {
// Must we add the entropy criteria here?
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
if (first) { if (first) {
first = false; first = false;
cutPoint.fromValue = std::numeric_limits<float>::lowest(); cutPoint.fromValue = std::numeric_limits<float>::lowest();
@@ -246,9 +254,11 @@ namespace mdlp {
} else } else
cutPts.back().toValue = std::numeric_limits<float>::max(); cutPts.back().toValue = std::numeric_limits<float>::max();
cutPts.back().end = X.size(); cutPts.back().end = X.size();
if (debug) if (debug) {
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << std::endl;
for (auto cutPt : cutPts) for (auto cutPt : cutPts)
std::cout << "Original: Cut point: " << cutPt; std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt;
}
cutPoints = cutPts; cutPoints = cutPts;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes

View File

@@ -4,7 +4,7 @@
#include <utility> #include <utility>
namespace mdlp { namespace mdlp {
class CPPFImdlp { class CPPFImdlp {
private: protected:
bool proposed; // proposed algorithm or original algorithm bool proposed; // proposed algorithm or original algorithm
int precision; int precision;
bool debug; bool debug;
@@ -16,7 +16,6 @@ namespace mdlp {
int numClasses; int numClasses;
cutPoints_t cutPoints; cutPoints_t cutPoints;
protected:
void setCutPoints(cutPoints_t); void setCutPoints(cutPoints_t);
static indices_t sortIndices(samples&); static indices_t sortIndices(samples&);
void computeCutPointsOriginal(); void computeCutPointsOriginal();

View File

@@ -2,15 +2,9 @@
#include "../Metrics.h" #include "../Metrics.h"
#include "../CPPFImdlp.h" #include "../CPPFImdlp.h"
namespace mdlp { namespace mdlp {
class TestMetrics : public CPPFImdlp, public testing::Test { class TestFImdlp : public CPPFImdlp, public testing::Test {
public: public:
TestMetrics() : CPPFImdlp(true, 6, true) {} TestFImdlp() : CPPFImdlp(true, 6, true) {}
indices_t indices; // sorted indices to use with X and y
samples X;
labels y;
samples xDiscretized;
int numClasses;
float precision_test = 0.000001;
void SetUp() void SetUp()
{ {
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0] // 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
@@ -19,7 +13,19 @@ namespace mdlp {
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
fit(X, y); fit(X, y);
} }
void check_sorted_vector(samples& X_, indices_t indices_) void initCutPoints()
{
setCutPoints(cutPoints_t());
}
void initIndices()
{
indices = indices_t();
}
void initDiscretized()
{
xDiscretized = labels();
}
void checkSortedVector(samples& X_, indices_t indices_)
{ {
X = X_; X = X_;
indices = indices_; indices = indices_;
@@ -32,113 +38,109 @@ namespace mdlp {
} }
} }
}; };
// TEST_F(TestFImdlp, SortIndices)
TEST_F(TestMetrics, SortIndices)
{ {
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 }; indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
check_sorted_vector(X, indices); checkSortedVector(X, indices);
X = { 5.77, 5.88, 5.99 }; X = { 5.77, 5.88, 5.99 };
indices = { 0, 1, 2 }; indices = { 0, 1, 2 };
check_sorted_vector(X, indices); checkSortedVector(X, indices);
X = { 5.33, 5.22, 5.11 }; X = { 5.33, 5.22, 5.11 };
indices = { 2, 1, 0 }; indices = { 2, 1, 0 };
check_sorted_vector(X, indices); checkSortedVector(X, indices);
} }
TEST_F(TestMetrics, EvaluateCutPoint) TEST_F(TestFImdlp, EvaluateCutPoint)
{ {
cutPoint_t rest, candidate; cutPoint_t rest, candidate;
rest.start = 0; rest = { 0, 10, -1, -1, 1000 };
rest.end = 10; candidate = { 0, 4, -1, -1, 5.15 };
rest.classNumber = -1;
rest.fromValue = -1;
rest.toValue = 1000;
candidate.start = 0;
candidate.end = 4;
candidate.fromValue = -1;
candidate.toValue = 5.15;
candidate.classNumber = -1;
EXPECT_FALSE(evaluateCutPoint(rest, candidate)); EXPECT_FALSE(evaluateCutPoint(rest, candidate));
} }
TEST_F(TestMetrics, ComputeCutPointsOriginal) TEST_F(TestFImdlp, ComputeCutPointsOriginal)
{ {
cutPoints_t computed, expected; cutPoints_t computed, expected;
int expectedSize = 3;
expected = { expected = {
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 }, { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
{ 6, 7, -1, 5.45, 5.65 }, { 7, 10, -1, 5.65, 3.4028234663852886e+38 } { 6, 10, -1, 5.45, 3.4028234663852886e+38 }
}; };
setCutPoints(cutPoints_t());
computeCutPointsOriginal(); computeCutPointsOriginal();
computed = getCutPoints(); computed = getCutPoints();
EXPECT_EQ(computed.size(), 4); EXPECT_EQ(computed.size(), expectedSize);
for (auto i = 0; i < 4; i++) { for (auto i = 0; i < expectedSize; i++) {
EXPECT_EQ(computed[i].start, expected[i].start); EXPECT_EQ(computed[i].start, expected[i].start);
EXPECT_EQ(computed[i].end, expected[i].end); EXPECT_EQ(computed[i].end, expected[i].end);
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber); EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test); EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
} }
} }
TEST_F(TestMetrics, ComputeCutPointsOriginalGCase) TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
{ {
cutPoints_t computed, expected; cutPoints_t computed, expected;
expected = { expected = {
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 }, { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
}; };
int expectedSize = 1;
X = { 0, 1, 2, 2 }; X = { 0, 1, 2, 2 };
y = { 1, 1, 1, 2 }; y = { 1, 1, 1, 2 };
fit(X, y); fit(X, y);
computeCutPointsOriginal(); computeCutPointsOriginal();
computed = getCutPoints(); computed = getCutPoints();
EXPECT_EQ(computed.size(), 1); EXPECT_EQ(computed.size(), expectedSize);
for (auto i = 0; i < 1; i++) { for (auto i = 0; i < expectedSize; i++) {
EXPECT_EQ(computed[i].start, expected[i].start); EXPECT_EQ(computed[i].start, expected[i].start);
EXPECT_EQ(computed[i].end, expected[i].end); EXPECT_EQ(computed[i].end, expected[i].end);
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber); EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test); EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
} }
} }
TEST_F(TestMetrics, ComputeCutPointsProposed) TEST_F(TestFImdlp, ComputeCutPointsProposed)
{ {
cutPoints_t computed, expected; cutPoints_t computed, expected;
expected = { expected = {
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 5, -1, 5.1, 5.2 }, { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
{ 5, 6, -1, 5.2, 5.4 }, { 6, 9, -1, 5.4, 5.85 }, { 6, 9, -1, 5.4, 5.85 },
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 } { 9, 10, -1, 5.85, 3.4028234663852886e+38 }
}; };
int expectedSize = 4;
computeCutPointsProposed(); computeCutPointsProposed();
computed = getCutPoints(); computed = getCutPoints();
EXPECT_EQ(computed.size(), 5); EXPECT_EQ(computed.size(), expectedSize);
for (auto i = 0; i < 5; i++) { for (auto i = 0; i < expectedSize; i++) {
EXPECT_EQ(computed[i].start, expected[i].start); EXPECT_EQ(computed[i].start, expected[i].start);
EXPECT_EQ(computed[i].end, expected[i].end); EXPECT_EQ(computed[i].end, expected[i].end);
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber); EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test); EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
} }
} }
TEST_F(TestMetrics, ComputeCutPointsProposedGCase) TEST_F(TestFImdlp, ComputeCutPointsProposedGCase)
{ {
cutPoints_t computed, expected; cutPoints_t computed, expected;
expected = { expected = {
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 }, { 0, 3, -1, -3.4028234663852886e+38, 1.5 },
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 } { 3, 4, -1, 1.5, 3.4028234663852886e+38 }
}; };
int expectedSize = 2;
X = { 0, 1, 2, 2 }; X = { 0, 1, 2, 2 };
y = { 1, 1, 1, 2 }; y = { 1, 1, 1, 2 };
fit(X, y); fit(X, y);
computeCutPointsProposed(); computeCutPointsProposed();
computed = getCutPoints(); computed = getCutPoints();
EXPECT_EQ(computed.size(), 2); EXPECT_EQ(computed.size(), expectedSize);
for (auto i = 0; i < 1; i++) { for (auto i = 0; i < expectedSize; i++) {
EXPECT_EQ(computed[i].start, expected[i].start); EXPECT_EQ(computed[i].start, expected[i].start);
EXPECT_EQ(computed[i].end, expected[i].end); EXPECT_EQ(computed[i].end, expected[i].end);
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber); EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test); EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test); EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
} }
} }
TEST_F(TestMetrics, ApplyCutPoints) TEST_F(TestFImdlp, ApplyCutPoints)
{ {
cutPoints_t expected = { cutPoints_t expected = {
{ 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 }, { 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 },

View File

@@ -69,13 +69,14 @@ for proposed in [True, False]:
X = data.data X = data.data
y = data.target y = data.target
print("*** Proposed: ", proposed) print("*** Proposed: ", proposed)
test = CFImdlp(debug=False, proposed=proposed) test = CFImdlp(debug=True, proposed=proposed)
test.fit(X[:, 0], y) test.fit(X[:, 0], y)
result = test.get_cut_points() result = test.get_cut_points()
for item in result: for item in result:
print( print(
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})" f"Class={item['classNumber']} - ({item['start']:3d}, "
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]" f"{item['end']:3d}) -> ({item['fromValue']:3.1f}, "
f"{item['toValue']:3.1f}]"
) )
print(test.get_discretized_values()) print(test.get_discretized_values())
print("+" * 40) print("+" * 40)
@@ -114,11 +115,14 @@ for proposed in [True, False]:
# # k = test.cut_points_ant(X[:, 0], y) # # k = test.cut_points_ant(X[:, 0], y)
# # print(k) # # print(k)
# # test.debug_points(X[:, 0], y) # # test.debug_points(X[:, 0], y)
X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9] # X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7] # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
# indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
# clf = CFImdlp(debug=True, proposed=False)
# clf.fit(X, y)
# print(clf.get_cut_points())
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2] # y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
# # To check # # To check
# indices2 = np.argsort(X) # indices2 = np.argsort(X)
Xs = np.array(X)[indices2] # Xs = np.array(X)[indices2]
ys = np.array(y)[indices2] # ys = np.array(y)[indices2]