mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-17 16:35:52 +00:00
Refactor tests
This commit is contained in:
@@ -175,7 +175,10 @@ namespace mdlp {
|
|||||||
printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
|
printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
|
||||||
}
|
}
|
||||||
while (idx < numElements && xCur == xPivot);
|
while (idx < numElements && xCur == xPivot);
|
||||||
if (yPivot == -1 || yPrev != yCur) {
|
// Check if the class changed and there are more than 1 element
|
||||||
|
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur)) {
|
||||||
|
// Must we add the entropy criteria here?
|
||||||
|
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
||||||
cutPoint.start = start;
|
cutPoint.start = start;
|
||||||
cutPoint.end = idx;
|
cutPoint.end = idx;
|
||||||
start = idx;
|
start = idx;
|
||||||
@@ -201,9 +204,11 @@ namespace mdlp {
|
|||||||
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
||||||
cutPts.push_back(cutPoint);
|
cutPts.push_back(cutPoint);
|
||||||
}
|
}
|
||||||
if (debug)
|
if (debug) {
|
||||||
|
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << std::endl;
|
||||||
for (auto cutPt : cutPts)
|
for (auto cutPt : cutPts)
|
||||||
std::cout << "Proposed: Cut point: " << cutPt;
|
std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposed: Cut point: " << cutPt;
|
||||||
|
}
|
||||||
cutPoints = cutPts;
|
cutPoints = cutPts;
|
||||||
}
|
}
|
||||||
void CPPFImdlp::computeCutPointsOriginal()
|
void CPPFImdlp::computeCutPointsOriginal()
|
||||||
@@ -219,8 +224,11 @@ namespace mdlp {
|
|||||||
yPrev = y[idx];
|
yPrev = y[idx];
|
||||||
for (index = 0; index < size_t(indices.size()) - 1; index++) {
|
for (index = 0; index < size_t(indices.size()) - 1; index++) {
|
||||||
idx = indices[index];
|
idx = indices[index];
|
||||||
// Definition 2 Cut points are always on boundaries
|
// Definition 2 Cut points are always on class boundaries &&
|
||||||
if (y[idx] != yPrev && xPrev < X[idx]) {
|
// there are more than 1 items in the interval
|
||||||
|
if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1) {
|
||||||
|
// Must we add the entropy criteria here?
|
||||||
|
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
||||||
if (first) {
|
if (first) {
|
||||||
first = false;
|
first = false;
|
||||||
cutPoint.fromValue = std::numeric_limits<float>::lowest();
|
cutPoint.fromValue = std::numeric_limits<float>::lowest();
|
||||||
@@ -246,9 +254,11 @@ namespace mdlp {
|
|||||||
} else
|
} else
|
||||||
cutPts.back().toValue = std::numeric_limits<float>::max();
|
cutPts.back().toValue = std::numeric_limits<float>::max();
|
||||||
cutPts.back().end = X.size();
|
cutPts.back().end = X.size();
|
||||||
if (debug)
|
if (debug) {
|
||||||
|
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << std::endl;
|
||||||
for (auto cutPt : cutPts)
|
for (auto cutPt : cutPts)
|
||||||
std::cout << "Original: Cut point: " << cutPt;
|
std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt;
|
||||||
|
}
|
||||||
cutPoints = cutPts;
|
cutPoints = cutPts;
|
||||||
}
|
}
|
||||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||||
|
@@ -4,7 +4,7 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
class CPPFImdlp {
|
class CPPFImdlp {
|
||||||
private:
|
protected:
|
||||||
bool proposed; // proposed algorithm or original algorithm
|
bool proposed; // proposed algorithm or original algorithm
|
||||||
int precision;
|
int precision;
|
||||||
bool debug;
|
bool debug;
|
||||||
@@ -16,7 +16,6 @@ namespace mdlp {
|
|||||||
int numClasses;
|
int numClasses;
|
||||||
cutPoints_t cutPoints;
|
cutPoints_t cutPoints;
|
||||||
|
|
||||||
protected:
|
|
||||||
void setCutPoints(cutPoints_t);
|
void setCutPoints(cutPoints_t);
|
||||||
static indices_t sortIndices(samples&);
|
static indices_t sortIndices(samples&);
|
||||||
void computeCutPointsOriginal();
|
void computeCutPointsOriginal();
|
||||||
|
Binary file not shown.
@@ -2,15 +2,9 @@
|
|||||||
#include "../Metrics.h"
|
#include "../Metrics.h"
|
||||||
#include "../CPPFImdlp.h"
|
#include "../CPPFImdlp.h"
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
class TestMetrics : public CPPFImdlp, public testing::Test {
|
class TestFImdlp : public CPPFImdlp, public testing::Test {
|
||||||
public:
|
public:
|
||||||
TestMetrics() : CPPFImdlp(true, 6, true) {}
|
TestFImdlp() : CPPFImdlp(true, 6, true) {}
|
||||||
indices_t indices; // sorted indices to use with X and y
|
|
||||||
samples X;
|
|
||||||
labels y;
|
|
||||||
samples xDiscretized;
|
|
||||||
int numClasses;
|
|
||||||
float precision_test = 0.000001;
|
|
||||||
void SetUp()
|
void SetUp()
|
||||||
{
|
{
|
||||||
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
||||||
@@ -19,7 +13,19 @@ namespace mdlp {
|
|||||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
}
|
}
|
||||||
void check_sorted_vector(samples& X_, indices_t indices_)
|
void initCutPoints()
|
||||||
|
{
|
||||||
|
setCutPoints(cutPoints_t());
|
||||||
|
}
|
||||||
|
void initIndices()
|
||||||
|
{
|
||||||
|
indices = indices_t();
|
||||||
|
}
|
||||||
|
void initDiscretized()
|
||||||
|
{
|
||||||
|
xDiscretized = labels();
|
||||||
|
}
|
||||||
|
void checkSortedVector(samples& X_, indices_t indices_)
|
||||||
{
|
{
|
||||||
X = X_;
|
X = X_;
|
||||||
indices = indices_;
|
indices = indices_;
|
||||||
@@ -32,113 +38,109 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
//
|
TEST_F(TestFImdlp, SortIndices)
|
||||||
TEST_F(TestMetrics, SortIndices)
|
|
||||||
{
|
{
|
||||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||||
indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||||
check_sorted_vector(X, indices);
|
checkSortedVector(X, indices);
|
||||||
X = { 5.77, 5.88, 5.99 };
|
X = { 5.77, 5.88, 5.99 };
|
||||||
indices = { 0, 1, 2 };
|
indices = { 0, 1, 2 };
|
||||||
check_sorted_vector(X, indices);
|
checkSortedVector(X, indices);
|
||||||
X = { 5.33, 5.22, 5.11 };
|
X = { 5.33, 5.22, 5.11 };
|
||||||
indices = { 2, 1, 0 };
|
indices = { 2, 1, 0 };
|
||||||
check_sorted_vector(X, indices);
|
checkSortedVector(X, indices);
|
||||||
}
|
}
|
||||||
TEST_F(TestMetrics, EvaluateCutPoint)
|
TEST_F(TestFImdlp, EvaluateCutPoint)
|
||||||
{
|
{
|
||||||
cutPoint_t rest, candidate;
|
cutPoint_t rest, candidate;
|
||||||
rest.start = 0;
|
rest = { 0, 10, -1, -1, 1000 };
|
||||||
rest.end = 10;
|
candidate = { 0, 4, -1, -1, 5.15 };
|
||||||
rest.classNumber = -1;
|
|
||||||
rest.fromValue = -1;
|
|
||||||
rest.toValue = 1000;
|
|
||||||
candidate.start = 0;
|
|
||||||
candidate.end = 4;
|
|
||||||
candidate.fromValue = -1;
|
|
||||||
candidate.toValue = 5.15;
|
|
||||||
candidate.classNumber = -1;
|
|
||||||
EXPECT_FALSE(evaluateCutPoint(rest, candidate));
|
EXPECT_FALSE(evaluateCutPoint(rest, candidate));
|
||||||
}
|
}
|
||||||
TEST_F(TestMetrics, ComputeCutPointsOriginal)
|
TEST_F(TestFImdlp, ComputeCutPointsOriginal)
|
||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t computed, expected;
|
||||||
|
int expectedSize = 3;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||||
{ 6, 7, -1, 5.45, 5.65 }, { 7, 10, -1, 5.65, 3.4028234663852886e+38 }
|
{ 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||||
};
|
};
|
||||||
|
setCutPoints(cutPoints_t());
|
||||||
computeCutPointsOriginal();
|
computeCutPointsOriginal();
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), 4);
|
EXPECT_EQ(computed.size(), expectedSize);
|
||||||
for (auto i = 0; i < 4; i++) {
|
for (auto i = 0; i < expectedSize; i++) {
|
||||||
EXPECT_EQ(computed[i].start, expected[i].start);
|
EXPECT_EQ(computed[i].start, expected[i].start);
|
||||||
EXPECT_EQ(computed[i].end, expected[i].end);
|
EXPECT_EQ(computed[i].end, expected[i].end);
|
||||||
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
||||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
|
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
|
||||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST_F(TestMetrics, ComputeCutPointsOriginalGCase)
|
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
|
||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t computed, expected;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
||||||
};
|
};
|
||||||
|
int expectedSize = 1;
|
||||||
X = { 0, 1, 2, 2 };
|
X = { 0, 1, 2, 2 };
|
||||||
y = { 1, 1, 1, 2 };
|
y = { 1, 1, 1, 2 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computeCutPointsOriginal();
|
computeCutPointsOriginal();
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), 1);
|
EXPECT_EQ(computed.size(), expectedSize);
|
||||||
for (auto i = 0; i < 1; i++) {
|
for (auto i = 0; i < expectedSize; i++) {
|
||||||
EXPECT_EQ(computed[i].start, expected[i].start);
|
EXPECT_EQ(computed[i].start, expected[i].start);
|
||||||
EXPECT_EQ(computed[i].end, expected[i].end);
|
EXPECT_EQ(computed[i].end, expected[i].end);
|
||||||
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
||||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
|
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
|
||||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST_F(TestMetrics, ComputeCutPointsProposed)
|
TEST_F(TestFImdlp, ComputeCutPointsProposed)
|
||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t computed, expected;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 5, -1, 5.1, 5.2 },
|
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
|
||||||
{ 5, 6, -1, 5.2, 5.4 }, { 6, 9, -1, 5.4, 5.85 },
|
{ 6, 9, -1, 5.4, 5.85 },
|
||||||
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
||||||
};
|
};
|
||||||
|
int expectedSize = 4;
|
||||||
computeCutPointsProposed();
|
computeCutPointsProposed();
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), 5);
|
EXPECT_EQ(computed.size(), expectedSize);
|
||||||
for (auto i = 0; i < 5; i++) {
|
for (auto i = 0; i < expectedSize; i++) {
|
||||||
EXPECT_EQ(computed[i].start, expected[i].start);
|
EXPECT_EQ(computed[i].start, expected[i].start);
|
||||||
EXPECT_EQ(computed[i].end, expected[i].end);
|
EXPECT_EQ(computed[i].end, expected[i].end);
|
||||||
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
||||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
|
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
|
||||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST_F(TestMetrics, ComputeCutPointsProposedGCase)
|
TEST_F(TestFImdlp, ComputeCutPointsProposedGCase)
|
||||||
{
|
{
|
||||||
cutPoints_t computed, expected;
|
cutPoints_t computed, expected;
|
||||||
expected = {
|
expected = {
|
||||||
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
||||||
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
||||||
};
|
};
|
||||||
|
int expectedSize = 2;
|
||||||
X = { 0, 1, 2, 2 };
|
X = { 0, 1, 2, 2 };
|
||||||
y = { 1, 1, 1, 2 };
|
y = { 1, 1, 1, 2 };
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computeCutPointsProposed();
|
computeCutPointsProposed();
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
EXPECT_EQ(computed.size(), 2);
|
EXPECT_EQ(computed.size(), expectedSize);
|
||||||
for (auto i = 0; i < 1; i++) {
|
for (auto i = 0; i < expectedSize; i++) {
|
||||||
EXPECT_EQ(computed[i].start, expected[i].start);
|
EXPECT_EQ(computed[i].start, expected[i].start);
|
||||||
EXPECT_EQ(computed[i].end, expected[i].end);
|
EXPECT_EQ(computed[i].end, expected[i].end);
|
||||||
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
||||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
|
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
|
||||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST_F(TestMetrics, ApplyCutPoints)
|
TEST_F(TestFImdlp, ApplyCutPoints)
|
||||||
{
|
{
|
||||||
cutPoints_t expected = {
|
cutPoints_t expected = {
|
||||||
{ 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 },
|
{ 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 },
|
||||||
|
20
sample.py
20
sample.py
@@ -69,13 +69,14 @@ for proposed in [True, False]:
|
|||||||
X = data.data
|
X = data.data
|
||||||
y = data.target
|
y = data.target
|
||||||
print("*** Proposed: ", proposed)
|
print("*** Proposed: ", proposed)
|
||||||
test = CFImdlp(debug=False, proposed=proposed)
|
test = CFImdlp(debug=True, proposed=proposed)
|
||||||
test.fit(X[:, 0], y)
|
test.fit(X[:, 0], y)
|
||||||
result = test.get_cut_points()
|
result = test.get_cut_points()
|
||||||
for item in result:
|
for item in result:
|
||||||
print(
|
print(
|
||||||
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
f"Class={item['classNumber']} - ({item['start']:3d}, "
|
||||||
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
f"{item['end']:3d}) -> ({item['fromValue']:3.1f}, "
|
||||||
|
f"{item['toValue']:3.1f}]"
|
||||||
)
|
)
|
||||||
print(test.get_discretized_values())
|
print(test.get_discretized_values())
|
||||||
print("+" * 40)
|
print("+" * 40)
|
||||||
@@ -114,11 +115,14 @@ for proposed in [True, False]:
|
|||||||
# # k = test.cut_points_ant(X[:, 0], y)
|
# # k = test.cut_points_ant(X[:, 0], y)
|
||||||
# # print(k)
|
# # print(k)
|
||||||
# # test.debug_points(X[:, 0], y)
|
# # test.debug_points(X[:, 0], y)
|
||||||
X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
# X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
||||||
indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||||
|
# indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
||||||
|
# clf = CFImdlp(debug=True, proposed=False)
|
||||||
|
# clf.fit(X, y)
|
||||||
|
# print(clf.get_cut_points())
|
||||||
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||||
# # To check
|
# # To check
|
||||||
# indices2 = np.argsort(X)
|
# indices2 = np.argsort(X)
|
||||||
Xs = np.array(X)[indices2]
|
# Xs = np.array(X)[indices2]
|
||||||
ys = np.array(y)[indices2]
|
# ys = np.array(y)[indices2]
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user