diff --git a/CPPFImdlp.cpp b/CPPFImdlp.cpp index 1990f95..acab4ea 100644 --- a/CPPFImdlp.cpp +++ b/CPPFImdlp.cpp @@ -8,7 +8,11 @@ namespace mdlp { - CPPFImdlp::CPPFImdlp(): indices(indices_t()), X(samples_t()), y(labels_t()), + CPPFImdlp::CPPFImdlp():depth(0), max_depth(numeric_limits::max()), min_length(3), indices(indices_t()), X(samples_t()), y(labels_t()), + metrics(Metrics(y, indices)) + { + } + CPPFImdlp::CPPFImdlp(int min_length_, int max_depth_): depth(0), max_depth(max_depth_), min_length(min_length_), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices)) { } @@ -25,9 +29,15 @@ namespace mdlp { if (X.empty() || y.empty()) { throw invalid_argument("X and y must have at least one element"); } + if (min_length < 3) { + throw invalid_argument("min_length must be greater than 2"); + } + if (max_depth < 1) { + throw invalid_argument("max_depth must be greater than 0"); + } indices = sortIndices(X_, y_); metrics.setData(y, indices); - computeCutPoints(0, X.size()); + computeCutPoints(0, X.size(), 1); return *this; } @@ -60,12 +70,14 @@ namespace mdlp { return { (actual + previous) / 2, cut }; } - void CPPFImdlp::computeCutPoints(size_t start, size_t end) + void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) { size_t cut; pair result; - if (end - start < 3) + // Check if the interval length and the depth are Ok + if (end - start < min_length || depth_ > max_depth) return; + depth = depth_ > depth ? depth_ : depth; cut = getCandidate(start, end); if (cut == numeric_limits::max()) return; @@ -73,8 +85,8 @@ namespace mdlp { result = valueCutPoint(start, cut, end); cut = result.second; cutPoints.push_back(result.first); - computeCutPoints(start, cut); - computeCutPoints(cut, end); + computeCutPoints(start, cut, depth_ + 1); + computeCutPoints(cut, end, depth_ + 1); } } @@ -158,4 +170,8 @@ namespace mdlp { sort(output.begin(), output.end()); return output; } + int CPPFImdlp::get_depth() + { + return depth; + } } diff --git a/CPPFImdlp.h b/CPPFImdlp.h index 6ff9d7e..d8a23db 100644 --- a/CPPFImdlp.h +++ b/CPPFImdlp.h @@ -10,19 +10,23 @@ namespace mdlp { indices_t indices; samples_t X; labels_t y; + int depth, max_depth; + size_t min_length; Metrics metrics; cutPoints_t cutPoints; static indices_t sortIndices(samples_t&, labels_t&); - void computeCutPoints(size_t, size_t); + void computeCutPoints(size_t, size_t, int); bool mdlp(size_t, size_t, size_t); size_t getCandidate(size_t, size_t); pair valueCutPoint(size_t, size_t, size_t); public: CPPFImdlp(); + CPPFImdlp(int, int); ~CPPFImdlp(); CPPFImdlp& fit(samples_t&, labels_t&); - samples_t getCutPoints(); + cutPoints_t getCutPoints(); + int get_depth(); inline string version() { return "1.1.1"; }; }; } diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp index 32a9a50..f45d814 100644 --- a/tests/FImdlp_unittest.cpp +++ b/tests/FImdlp_unittest.cpp @@ -8,6 +8,7 @@ namespace mdlp { class TestFImdlp: public CPPFImdlp, public testing::Test { public: precision_t precision = 0.000001; + //precision_t precision = 0.000000000001; TestFImdlp(): CPPFImdlp() {} void SetUp() { @@ -25,18 +26,16 @@ namespace mdlp { prev = X[testSortedIndices[i]]; } } - void checkCutPoints(cutPoints_t& expected) + void checkCutPoints(cutPoints_t& computed, cutPoints_t& expected) { - int expectedSize = expected.size(); - EXPECT_EQ(cutPoints.size(), expectedSize); - for (unsigned long i = 0; i < cutPoints.size(); i++) { - EXPECT_NEAR(cutPoints[i], expected[i], precision); + EXPECT_EQ(computed.size(), expected.size()); + for (unsigned long i = 0; i < computed.size(); i++) { + EXPECT_NEAR(computed[i], expected[i], precision); } } template void checkVectors(std::vector const& expected, std::vector const& computed) { - EXPECT_EQ(expected.size(), computed.size()); ASSERT_EQ(expected.size(), computed.size()); for (auto i = 0; i < expected.size(); i++) { EXPECT_NEAR(expected[i], computed[i], precision); @@ -55,6 +54,20 @@ namespace mdlp { EXPECT_EQ(result.second, limit); return true; } + void test_dataset(CPPFImdlp& test, string filename, vector& expected, int depths[]) + { + ArffFiles file; + file.load("../datasets/" + filename, true); + vector& X = file.getX(); + labels_t& y = file.getY(); + auto attributes = file.getAttributes(); + for (auto feature = 0; feature < attributes.size(); feature++) { + test.fit(X[feature], y); + EXPECT_EQ(test.get_depth(), depths[feature]); + auto computed = test.getCutPoints(); + checkCutPoints(computed, expected[feature]); + } + } }; TEST_F(TestFImdlp, FitErrorEmptyDataset) { @@ -68,6 +81,15 @@ namespace mdlp { y = { 1, 2 }; EXPECT_THROW(fit(X, y), std::invalid_argument); } + TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth) + { + auto testLength = CPPFImdlp(2, 10); + auto testDepth = CPPFImdlp(3, 0); + X = { 1, 2, 3 }; + y = { 1, 2, 3 }; + EXPECT_THROW(testLength.fit(X, y), invalid_argument); + EXPECT_THROW(testDepth.fit(X, y), invalid_argument); + } TEST_F(TestFImdlp, SortIndices) { X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; @@ -114,7 +136,7 @@ namespace mdlp { TEST_F(TestFImdlp, TestArtificialDataset) { fit(X, y); - computeCutPoints(0, 20); + computeCutPoints(0, 20, 1); cutPoints_t expected = { 5.05 }; vector computed = getCutPoints(); computed = getCutPoints(); @@ -126,28 +148,15 @@ namespace mdlp { } TEST_F(TestFImdlp, TestIris) { - ArffFiles file; - string path = "../datasets/"; - - file.load(path + "iris.arff", true); - int items = file.getSize(); - vector& X = file.getX(); vector expected = { - { 5.4499998092651367, 5.75 }, + { 5.45, 5.75 }, { 2.75, 2.85, 2.95, 3.05, 3.35 }, - { 2.4500000476837158, 4.75, 5.0500001907348633 }, - { 0.80000001192092896, 1.75 } + { 2.45, 4.75, 5.05 }, + { 0.8, 1.75 } }; - labels_t& y = file.getY(); - auto attributes = file.getAttributes(); - for (auto feature = 0; feature < attributes.size(); feature++) { - fit(X[feature], y); - vector computed = getCutPoints(); - EXPECT_EQ(computed.size(), expected[feature].size()); - for (auto i = 0; i < computed.size(); i++) { - EXPECT_NEAR(computed[i], expected[feature][i], precision); - } - } + int depths[] = { 3, 5, 5, 5 }; + auto test = CPPFImdlp(); + test_dataset(test, "iris.arff", expected, depths); } TEST_F(TestFImdlp, ComputeCutPointsGCase) { @@ -156,7 +165,8 @@ namespace mdlp { samples_t X_ = { 0, 1, 2, 2, 2 }; labels_t y_ = { 1, 1, 1, 2, 2 }; fit(X_, y_); - checkCutPoints(expected); + auto computed = getCutPoints(); + checkCutPoints(computed, expected); } TEST_F(TestFImdlp, ValueCutPoint) { @@ -178,4 +188,43 @@ namespace mdlp { samples_t X4c = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 }; test_result(X4c, 4, 6.9 / 2, 2, "4c"); } + TEST_F(TestFImdlp, MaxDepth) + { + // Set max_depth to 2 + auto test = CPPFImdlp(3, 1); + vector expected = { + { 5.45 }, + { 3.35 }, + { 2.45 }, + {0.8 } + }; + int depths[] = { 1, 1, 1, 1 }; + test_dataset(test, "iris.arff", expected, depths); + } + TEST_F(TestFImdlp, MinLength) + { + // Set min_length to 75 + auto test = CPPFImdlp(75, 100); + vector expected = { + { 5.45, 5.75 }, + { 2.85, 3.35 }, + { 2.45, 4.75 }, + { 0.8, 1.75 } + }; + int depths[] = { 3, 3, 3, 3 }; + test_dataset(test, "iris.arff", expected, depths); + } + TEST_F(TestFImdlp, MinLengthMaxDepth) + { + // Set min_length to 75 + auto test = CPPFImdlp(75, 2); + vector expected = { + { 5.45, 5.75 }, + { 2.85, 3.35 }, + { 2.45, 4.75 }, + { 0.8, 1.75 } + }; + int depths[] = { 2, 2, 2, 2 }; + test_dataset(test, "iris.arff", expected, depths); + } } diff --git a/tests/test b/tests/test index e27cdde..5bacf1f 100755 --- a/tests/test +++ b/tests/test @@ -9,4 +9,4 @@ if test $? -ne 0; then exit 1 fi cd build -ctest --output-on-failure +ctest --output-on-failure|grep -v profiling