mirror of
https://github.com/Doctorado-ML/FImdlp.git
synced 2025-08-16 16:05:52 +00:00
Merge remote-tracking branch 'origin/main' into main
This commit is contained in:
@@ -174,7 +174,10 @@ namespace mdlp {
|
||||
printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
|
||||
}
|
||||
while (idx < numElements && xCur == xPivot);
|
||||
if (yPivot == -1 || yPrev != yCur) {
|
||||
// Check if the class changed and there are more than 1 element
|
||||
if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur)) {
|
||||
// Must we add the entropy criteria here?
|
||||
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
||||
cutPoint.start = start;
|
||||
cutPoint.end = idx;
|
||||
start = idx;
|
||||
@@ -200,9 +203,11 @@ namespace mdlp {
|
||||
printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
|
||||
cutPts.push_back(cutPoint);
|
||||
}
|
||||
if (debug)
|
||||
if (debug) {
|
||||
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << std::endl;
|
||||
for (auto cutPt : cutPts)
|
||||
std::cout << "Proposed: Cut point: " << cutPt;
|
||||
std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposed: Cut point: " << cutPt;
|
||||
}
|
||||
cutPoints = cutPts;
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsOriginal()
|
||||
@@ -218,8 +223,11 @@ namespace mdlp {
|
||||
yPrev = y[idx];
|
||||
for (index = 0; index < size_t(indices.size()) - 1; index++) {
|
||||
idx = indices[index];
|
||||
// Definition 2 Cut points are always on boundaries
|
||||
if (y[idx] != yPrev && xPrev < X[idx]) {
|
||||
// Definition 2 Cut points are always on class boundaries &&
|
||||
// there are more than 1 items in the interval
|
||||
if (y[idx] != yPrev && xPrev < X[idx] && idxPrev != index - 1) {
|
||||
// Must we add the entropy criteria here?
|
||||
// if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point }
|
||||
if (first) {
|
||||
first = false;
|
||||
cutPoint.fromValue = std::numeric_limits<float>::lowest();
|
||||
@@ -245,9 +253,11 @@ namespace mdlp {
|
||||
} else
|
||||
cutPts.back().toValue = std::numeric_limits<float>::max();
|
||||
cutPts.back().end = X.size();
|
||||
if (debug)
|
||||
if (debug) {
|
||||
std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << std::endl;
|
||||
for (auto cutPt : cutPts)
|
||||
std::cout << "Original: Cut point: " << cutPt;
|
||||
std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt;
|
||||
}
|
||||
cutPoints = cutPts;
|
||||
}
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
|
@@ -4,7 +4,7 @@
|
||||
#include <utility>
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
private:
|
||||
protected:
|
||||
bool proposed; // proposed algorithm or original algorithm
|
||||
int precision;
|
||||
bool debug;
|
||||
@@ -16,7 +16,6 @@ namespace mdlp {
|
||||
int numClasses;
|
||||
cutPoints_t cutPoints;
|
||||
|
||||
protected:
|
||||
void setCutPoints(cutPoints_t);
|
||||
static indices_t sortIndices(samples&);
|
||||
void computeCutPointsOriginal();
|
||||
|
@@ -2,15 +2,9 @@
|
||||
#include "../Metrics.h"
|
||||
#include "../CPPFImdlp.h"
|
||||
namespace mdlp {
|
||||
class TestMetrics : public CPPFImdlp, public testing::Test {
|
||||
class TestFImdlp : public CPPFImdlp, public testing::Test {
|
||||
public:
|
||||
TestMetrics() : CPPFImdlp(true, 6, true) {}
|
||||
indices_t indices; // sorted indices to use with X and y
|
||||
samples X;
|
||||
labels y;
|
||||
samples xDiscretized;
|
||||
int numClasses;
|
||||
float precision_test = 0.000001;
|
||||
TestFImdlp() : CPPFImdlp(true, 6, true) {}
|
||||
void SetUp()
|
||||
{
|
||||
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
||||
@@ -19,7 +13,19 @@ namespace mdlp {
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
fit(X, y);
|
||||
}
|
||||
void check_sorted_vector(samples& X_, indices_t indices_)
|
||||
void initCutPoints()
|
||||
{
|
||||
setCutPoints(cutPoints_t());
|
||||
}
|
||||
void initIndices()
|
||||
{
|
||||
indices = indices_t();
|
||||
}
|
||||
void initDiscretized()
|
||||
{
|
||||
xDiscretized = labels();
|
||||
}
|
||||
void checkSortedVector(samples& X_, indices_t indices_)
|
||||
{
|
||||
X = X_;
|
||||
indices = indices_;
|
||||
@@ -32,113 +38,109 @@ namespace mdlp {
|
||||
}
|
||||
}
|
||||
};
|
||||
//
|
||||
TEST_F(TestMetrics, SortIndices)
|
||||
TEST_F(TestFImdlp, SortIndices)
|
||||
{
|
||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||
check_sorted_vector(X, indices);
|
||||
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||
checkSortedVector(X, indices);
|
||||
X = { 5.77, 5.88, 5.99 };
|
||||
indices = { 0, 1, 2 };
|
||||
check_sorted_vector(X, indices);
|
||||
checkSortedVector(X, indices);
|
||||
X = { 5.33, 5.22, 5.11 };
|
||||
indices = { 2, 1, 0 };
|
||||
check_sorted_vector(X, indices);
|
||||
checkSortedVector(X, indices);
|
||||
}
|
||||
TEST_F(TestMetrics, EvaluateCutPoint)
|
||||
TEST_F(TestFImdlp, EvaluateCutPoint)
|
||||
{
|
||||
cutPoint_t rest, candidate;
|
||||
rest.start = 0;
|
||||
rest.end = 10;
|
||||
rest.classNumber = -1;
|
||||
rest.fromValue = -1;
|
||||
rest.toValue = 1000;
|
||||
candidate.start = 0;
|
||||
candidate.end = 4;
|
||||
candidate.fromValue = -1;
|
||||
candidate.toValue = 5.15;
|
||||
candidate.classNumber = -1;
|
||||
rest = { 0, 10, -1, -1, 1000 };
|
||||
candidate = { 0, 4, -1, -1, 5.15 };
|
||||
EXPECT_FALSE(evaluateCutPoint(rest, candidate));
|
||||
}
|
||||
TEST_F(TestMetrics, ComputeCutPointsOriginal)
|
||||
TEST_F(TestFImdlp, ComputeCutPointsOriginal)
|
||||
{
|
||||
cutPoints_t computed, expected;
|
||||
int expectedSize = 3;
|
||||
expected = {
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 },
|
||||
{ 6, 7, -1, 5.45, 5.65 }, { 7, 10, -1, 5.65, 3.4028234663852886e+38 }
|
||||
{ 6, 10, -1, 5.45, 3.4028234663852886e+38 }
|
||||
};
|
||||
setCutPoints(cutPoints_t());
|
||||
computeCutPointsOriginal();
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 4);
|
||||
for (auto i = 0; i < 4; i++) {
|
||||
EXPECT_EQ(computed.size(), expectedSize);
|
||||
for (auto i = 0; i < expectedSize; i++) {
|
||||
EXPECT_EQ(computed[i].start, expected[i].start);
|
||||
EXPECT_EQ(computed[i].end, expected[i].end);
|
||||
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
|
||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
|
||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
|
||||
}
|
||||
}
|
||||
TEST_F(TestMetrics, ComputeCutPointsOriginalGCase)
|
||||
TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase)
|
||||
{
|
||||
cutPoints_t computed, expected;
|
||||
expected = {
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 },
|
||||
};
|
||||
int expectedSize = 1;
|
||||
X = { 0, 1, 2, 2 };
|
||||
y = { 1, 1, 1, 2 };
|
||||
fit(X, y);
|
||||
computeCutPointsOriginal();
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 1);
|
||||
for (auto i = 0; i < 1; i++) {
|
||||
EXPECT_EQ(computed.size(), expectedSize);
|
||||
for (auto i = 0; i < expectedSize; i++) {
|
||||
EXPECT_EQ(computed[i].start, expected[i].start);
|
||||
EXPECT_EQ(computed[i].end, expected[i].end);
|
||||
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
|
||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
|
||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
|
||||
}
|
||||
}
|
||||
TEST_F(TestMetrics, ComputeCutPointsProposed)
|
||||
TEST_F(TestFImdlp, ComputeCutPointsProposed)
|
||||
{
|
||||
cutPoints_t computed, expected;
|
||||
expected = {
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 5, -1, 5.1, 5.2 },
|
||||
{ 5, 6, -1, 5.2, 5.4 }, { 6, 9, -1, 5.4, 5.85 },
|
||||
{ 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 },
|
||||
{ 6, 9, -1, 5.4, 5.85 },
|
||||
{ 9, 10, -1, 5.85, 3.4028234663852886e+38 }
|
||||
};
|
||||
int expectedSize = 4;
|
||||
computeCutPointsProposed();
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 5);
|
||||
for (auto i = 0; i < 5; i++) {
|
||||
EXPECT_EQ(computed.size(), expectedSize);
|
||||
for (auto i = 0; i < expectedSize; i++) {
|
||||
EXPECT_EQ(computed[i].start, expected[i].start);
|
||||
EXPECT_EQ(computed[i].end, expected[i].end);
|
||||
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
|
||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
|
||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
|
||||
}
|
||||
}
|
||||
TEST_F(TestMetrics, ComputeCutPointsProposedGCase)
|
||||
TEST_F(TestFImdlp, ComputeCutPointsProposedGCase)
|
||||
{
|
||||
cutPoints_t computed, expected;
|
||||
expected = {
|
||||
{ 0, 3, -1, -3.4028234663852886e+38, 1.5 },
|
||||
{ 3, 4, -1, 1.5, 3.4028234663852886e+38 }
|
||||
};
|
||||
int expectedSize = 2;
|
||||
X = { 0, 1, 2, 2 };
|
||||
y = { 1, 1, 1, 2 };
|
||||
fit(X, y);
|
||||
computeCutPointsProposed();
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 2);
|
||||
for (auto i = 0; i < 1; i++) {
|
||||
EXPECT_EQ(computed.size(), expectedSize);
|
||||
for (auto i = 0; i < expectedSize; i++) {
|
||||
EXPECT_EQ(computed[i].start, expected[i].start);
|
||||
EXPECT_EQ(computed[i].end, expected[i].end);
|
||||
EXPECT_EQ(computed[i].classNumber, expected[i].classNumber);
|
||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision_test);
|
||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision_test);
|
||||
EXPECT_NEAR(computed[i].fromValue, expected[i].fromValue, precision);
|
||||
EXPECT_NEAR(computed[i].toValue, expected[i].toValue, precision);
|
||||
}
|
||||
}
|
||||
TEST_F(TestMetrics, ApplyCutPoints)
|
||||
TEST_F(TestFImdlp, ApplyCutPoints)
|
||||
{
|
||||
cutPoints_t expected = {
|
||||
{ 0, 4, 17, -3.4028234663852886e+38, 5.1 }, { 4, 6, 31, 5.1, 5.4 },
|
||||
|
20
sample.py
20
sample.py
@@ -69,13 +69,14 @@ for proposed in [True, False]:
|
||||
X = data.data
|
||||
y = data.target
|
||||
print("*** Proposed: ", proposed)
|
||||
test = CFImdlp(debug=False, proposed=proposed)
|
||||
test = CFImdlp(debug=True, proposed=proposed)
|
||||
test.fit(X[:, 0], y)
|
||||
result = test.get_cut_points()
|
||||
for item in result:
|
||||
print(
|
||||
f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
|
||||
f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
|
||||
f"Class={item['classNumber']} - ({item['start']:3d}, "
|
||||
f"{item['end']:3d}) -> ({item['fromValue']:3.1f}, "
|
||||
f"{item['toValue']:3.1f}]"
|
||||
)
|
||||
print(test.get_discretized_values())
|
||||
print("+" * 40)
|
||||
@@ -114,11 +115,14 @@ for proposed in [True, False]:
|
||||
# # k = test.cut_points_ant(X[:, 0], y)
|
||||
# # print(k)
|
||||
# # test.debug_points(X[:, 0], y)
|
||||
X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
||||
indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
||||
# X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
|
||||
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||
# indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
|
||||
# clf = CFImdlp(debug=True, proposed=False)
|
||||
# clf.fit(X, y)
|
||||
# print(clf.get_cut_points())
|
||||
# y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
|
||||
# # To check
|
||||
# indices2 = np.argsort(X)
|
||||
Xs = np.array(X)[indices2]
|
||||
ys = np.array(y)[indices2]
|
||||
|
||||
# Xs = np.array(X)[indices2]
|
||||
# ys = np.array(y)[indices2]
|
||||
|
Reference in New Issue
Block a user