diff --git a/Makefile b/Makefile index a8549f7..b7d92df 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ test: cd fimdlp/testcpp && ./test coverage: + make test cd fimdlp/testcpp && ./cover lint: ## Lint and static-check diff --git a/fimdlp/CPPFImdlp.cpp b/fimdlp/CPPFImdlp.cpp index d203542..27621d3 100644 --- a/fimdlp/CPPFImdlp.cpp +++ b/fimdlp/CPPFImdlp.cpp @@ -3,12 +3,13 @@ #include #include #include "Metrics.h" + namespace mdlp { - std::ostream& operator << (std::ostream& os, const cutPoint_t& cut) + ostream& operator << (ostream& os, const cutPoint_t& cut) { os << cut.classNumber << " -> (" << cut.start << ", " << cut.end << ") - (" << cut.fromValue << ", " << cut.toValue << ") " - << std::endl; + << endl; return os; } @@ -27,7 +28,7 @@ namespace mdlp { samples CPPFImdlp::getCutPoints() { samples output(cutPoints.size()); - std::transform(cutPoints.begin(), cutPoints.end(), output.begin(), + ::transform(cutPoints.begin(), cutPoints.end(), output.begin(), [](cutPoint_t cut) { return cut.toValue; }); return output; } @@ -40,11 +41,11 @@ namespace mdlp { X = X_; y = y_; if (X.size() != y.size()) { - std::cerr << "X and y must have the same size" << std::endl; + cerr << "X and y must have the same size" << endl; return *this; } if (X.size() == 0) { - std::cerr << "X and y must have at least one element" << std::endl; + cerr << "X and y must have at least one element" << endl; return *this; } this->indices = sortIndices(X_); @@ -84,10 +85,10 @@ namespace mdlp { delta = log2(pow(3, float(k)) - 2) - (float(k) * ent - float(k1) * ent1 - float(k2) * ent2); float term = 1 / N * (log2(N - 1) + delta); if (debug) { - std::cout << "Rest: " << rest; - std::cout << "Candidate: " << candidate; - std::cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << std::endl; - std::cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << std::endl; + cout << "Rest: " << rest; + cout << "Candidate: " << candidate; + cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << endl; + cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << endl; } return (ig > term); } @@ -99,15 +100,15 @@ namespace mdlp { rest.start = 0; rest.end = X.size(); - rest.fromValue = std::numeric_limits::lowest(); - rest.toValue = std::numeric_limits::max(); + rest.fromValue = numeric_limits::lowest(); + rest.toValue = numeric_limits::max(); rest.classNumber = classNumber; bool first = true; for (size_t index = 0; index < size_t(cutPoints.size()); index++) { item = cutPoints[index]; if (evaluateCutPoint(rest, item)) { if (debug) - std::cout << "Accepted: " << item << std::endl; + cout << "Accepted: " << item << endl; //Assign class number to the interval (cutpoint) item.classNumber = classNumber++; filtered.push_back(item); @@ -115,11 +116,11 @@ namespace mdlp { rest.start = item.end; } else { if (debug) - std::cout << "Rejected: " << item << std::endl; + cout << "Rejected: " << item << endl; if (index != size_t(cutPoints.size()) - 1) { // Try to merge the rejected cutpoint with the next one if (first) { - cutPoints[index + 1].fromValue = std::numeric_limits::lowest(); + cutPoints[index + 1].fromValue = numeric_limits::lowest(); cutPoints[index + 1].start = indices[0]; } else { cutPoints[index + 1].fromValue = item.fromValue; @@ -129,7 +130,7 @@ namespace mdlp { } } if (!first) { - filtered.back().toValue = std::numeric_limits::max(); + filtered.back().toValue = numeric_limits::max(); filtered.back().end = X.size() - 1; } else { filtered.push_back(rest); @@ -175,7 +176,7 @@ namespace mdlp { cutPoint.start = start; cutPoint.end = idx; start = idx; - cutPoint.fromValue = firstCutPoint ? std::numeric_limits::lowest() : cutPts.back().toValue; + cutPoint.fromValue = firstCutPoint ? numeric_limits::lowest() : cutPts.back().toValue; cutPoint.toValue = (xPrev + xCur) / 2; cutPoint.classNumber = -1; firstCutPoint = false; @@ -190,17 +191,17 @@ namespace mdlp { if (idx == numElements) { cutPoint.start = start; cutPoint.end = numElements + 1; - cutPoint.fromValue = firstCutPoint ? std::numeric_limits::lowest() : cutPts.back().toValue; - cutPoint.toValue = std::numeric_limits::max(); + cutPoint.fromValue = firstCutPoint ? numeric_limits::lowest() : cutPts.back().toValue; + cutPoint.toValue = numeric_limits::max(); cutPoint.classNumber = -1; if (debug) printf("Final Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue); cutPts.push_back(cutPoint); } if (debug) { - std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << std::endl; + cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, numElements + 1, numClasses) << endl; for (auto cutPt : cutPts) - std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposal: Cut point: " << cutPt; + cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << " :Proposal: Cut point: " << cutPt; } cutPoints = cutPts; } @@ -224,7 +225,7 @@ namespace mdlp { // if (totalEntropy - (entropyLeft + entropyRight) < 0) { Accept cut point } if (first) { first = false; - cutPoint.fromValue = std::numeric_limits::lowest(); + cutPoint.fromValue = numeric_limits::lowest(); } else { cutPoint.fromValue = cutPts.back().toValue; } @@ -241,16 +242,16 @@ namespace mdlp { if (first) { cutPoint.start = 0; cutPoint.classNumber = -1; - cutPoint.fromValue = std::numeric_limits::lowest(); - cutPoint.toValue = std::numeric_limits::max(); + cutPoint.fromValue = numeric_limits::lowest(); + cutPoint.toValue = numeric_limits::max(); cutPts.push_back(cutPoint); } else - cutPts.back().toValue = std::numeric_limits::max(); + cutPts.back().toValue = numeric_limits::max(); cutPts.back().end = X.size(); if (debug) { - std::cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << std::endl; + cout << "Entropy of the dataset: " << Metrics::entropy(y, indices, 0, indices.size(), numClasses) << endl; for (auto cutPt : cutPts) - std::cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt; + cout << "Entropy: " << Metrics::entropy(y, indices, cutPt.start, cutPt.end, numClasses) << ": Original: Cut point: " << cutPt; } cutPoints = cutPts; } @@ -258,8 +259,8 @@ namespace mdlp { indices_t CPPFImdlp::sortIndices(samples& X_) { indices_t idx(X_.size()); - std::iota(idx.begin(), idx.end(), 0); - for (std::size_t i = 0; i < X_.size(); i++) + iota(idx.begin(), idx.end(), 0); + for (size_t i = 0; i < X_.size(); i++) stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2) { return X_[i1] < X_[i2]; }); return idx; diff --git a/fimdlp/cppfimdlp.cpython-310-darwin.so b/fimdlp/cppfimdlp.cpython-310-darwin.so index a367999..285ee7f 100755 Binary files a/fimdlp/cppfimdlp.cpython-310-darwin.so and b/fimdlp/cppfimdlp.cpython-310-darwin.so differ diff --git a/fimdlp/testcpp/FImdlp_unittest.cc b/fimdlp/testcpp/FImdlp_unittest.cc index b3d504c..ced121f 100644 --- a/fimdlp/testcpp/FImdlp_unittest.cc +++ b/fimdlp/testcpp/FImdlp_unittest.cc @@ -49,6 +49,14 @@ namespace mdlp { EXPECT_NEAR(cutPoints[i].toValue, expected[i].toValue, precision); } } + template + void checkVectors(std::vector const& expected, std::vector const& computed) + { + EXPECT_EQ(expected.size(), computed.size()); + for (auto i = 0; i < expected.size(); i++) { + EXPECT_EQ(expected[i], computed[i]); + } + } }; TEST_F(TestFImdlp, SortIndices) @@ -72,7 +80,7 @@ namespace mdlp { } TEST_F(TestFImdlp, ComputeCutPointsOriginal) { - cutPoints_t computed, expected; + cutPoints_t expected; expected = { { 0, 4, -1, -3.4028234663852886e+38, 5.15 }, { 4, 6, -1, 5.15, 5.45 }, { 6, 10, -1, 5.45, 3.4028234663852886e+38 } @@ -83,7 +91,7 @@ namespace mdlp { } TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase) { - cutPoints_t computed, expected; + cutPoints_t expected; expected = { { 0, 4, -1, -3.4028234663852886e+38, 3.4028234663852886e+38 }, }; @@ -95,7 +103,7 @@ namespace mdlp { } TEST_F(TestFImdlp, ComputeCutPointsProposal) { - cutPoints_t computed, expected; + cutPoints_t expected; expected = { { 0, 4, -1, -3.4028234663852886e+38, 5.1 }, { 4, 6, -1, 5.1, 5.4 }, { 6, 9, -1, 5.4, 5.85 }, @@ -106,7 +114,7 @@ namespace mdlp { } TEST_F(TestFImdlp, ComputeCutPointsProposalGCase) { - cutPoints_t computed, expected; + cutPoints_t expected; expected = { { 0, 3, -1, -3.4028234663852886e+38, 1.5 }, { 3, 4, -1, 1.5, 3.4028234663852886e+38 } @@ -117,4 +125,17 @@ namespace mdlp { computeCutPointsProposal(); checkCutPoints(expected); } + TEST_F(TestFImdlp, DiscretizedValues) + { + labels computed, expected = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + computed = getDiscretizedValues(); + checkVectors(expected, computed); + } + TEST_F(TestFImdlp, GetCutPoints) + { + samples computed, expected = { 5.15, 5.45, 3.4028234663852886e+38 }; + computeCutPointsOriginal(); + computed = getCutPoints(); + checkVectors(expected, computed); + } } \ No newline at end of file diff --git a/fimdlp/typesFImdlp.h b/fimdlp/typesFImdlp.h index 2a0e72c..cc2fb9e 100644 --- a/fimdlp/typesFImdlp.h +++ b/fimdlp/typesFImdlp.h @@ -1,6 +1,8 @@ #ifndef TYPES_H #define TYPES_H #include + +using namespace std; namespace mdlp { struct CutPointBody { size_t start, end; // indices of the sorted vector @@ -8,9 +10,9 @@ namespace mdlp { float fromValue, toValue; }; typedef CutPointBody cutPoint_t; - typedef std::vector samples; - typedef std::vector labels; - typedef std::vector indices_t; - typedef std::vector cutPoints_t; + typedef vector samples; + typedef vector labels; + typedef vector indices_t; + typedef vector cutPoints_t; } #endif \ No newline at end of file