From 5925dbd66634ab9bd7f0e2ce941d8c494a6e3bbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Wed, 21 Dec 2022 16:42:37 +0100 Subject: [PATCH] test: :zap: Refactor tests to new version --- CPPFImdlp.h | 2 +- README.md | 12 +++- sample/CMakeLists.txt | 2 +- sample/sample.cpp | 2 +- {sample => tests}/ArffFiles.cpp | 0 {sample => tests}/ArffFiles.h | 0 tests/CMakeLists.txt | 2 +- tests/FImdlp_unittest.cpp | 121 ++++++++++++++++++++++---------- 8 files changed, 100 insertions(+), 41 deletions(-) rename {sample => tests}/ArffFiles.cpp (100%) rename {sample => tests}/ArffFiles.h (100%) diff --git a/CPPFImdlp.h b/CPPFImdlp.h index 56e9006..8e79646 100644 --- a/CPPFImdlp.h +++ b/CPPFImdlp.h @@ -27,7 +27,7 @@ namespace mdlp { ~CPPFImdlp(); CPPFImdlp& fit(samples_t&, labels_t&); samples_t getCutPoints(); - inline string version() { return "0.9.7"; }; + inline string version() { return "1.0.0"; }; }; } #endif \ No newline at end of file diff --git a/README.md b/README.md index 892d922..3d09810 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # mdlp + Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf) The implementation tries to mitigate the problem of different label values with the same value of the variable: @@ -19,4 +20,13 @@ cd build cmake .. make ./sample iris -``` \ No newline at end of file +``` + +## Test + +To run the tests, execute the following commands: + +```bash +cd tests +./test +``` diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index d4d1f55..6fea95c 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -3,4 +3,4 @@ project(main) set(CMAKE_CXX_STANDARD 14) -add_executable(sample sample.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp) +add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp) diff --git a/sample/sample.cpp b/sample/sample.cpp index 6c65255..db784c8 100644 --- a/sample/sample.cpp +++ b/sample/sample.cpp @@ -1,8 +1,8 @@ -#include "ArffFiles.h" #include #include #include #include "../CPPFImdlp.h" +#include "../tests/ArffFiles.h" using namespace std; using namespace mdlp; diff --git a/sample/ArffFiles.cpp b/tests/ArffFiles.cpp similarity index 100% rename from sample/ArffFiles.cpp rename to tests/ArffFiles.cpp diff --git a/sample/ArffFiles.h b/tests/ArffFiles.h similarity index 100% rename from sample/ArffFiles.h rename to tests/ArffFiles.h diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c27aba4..58b1e86 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -18,7 +18,7 @@ FetchContent_MakeAvailable(googletest) enable_testing() add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp) -add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../Metrics.cpp FImdlp_unittest.cpp) +add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp) target_link_libraries(Metrics_unittest GTest::gtest_main) target_link_libraries(FImdlp_unittest GTest::gtest_main) target_compile_options(Metrics_unittest PRIVATE --coverage) diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp index 06d3d52..e86a156 100644 --- a/tests/FImdlp_unittest.cpp +++ b/tests/FImdlp_unittest.cpp @@ -1,6 +1,7 @@ #include "gtest/gtest.h" #include "../Metrics.h" #include "../CPPFImdlp.h" +#include "ArffFiles.h" #include namespace mdlp { @@ -10,10 +11,8 @@ namespace mdlp { TestFImdlp(): CPPFImdlp() {} void SetUp() { - // 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0] - //(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2) - X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; - y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; + X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; + y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; algorithm = false; fit(X, y); } @@ -55,6 +54,11 @@ namespace mdlp { y = labels_t(); EXPECT_THROW(fit(X, y), std::invalid_argument); } + TEST_F(TestFImdlp, FitErrorIncorrectAlgorithm) + { + algorithm = 2; + EXPECT_THROW(fit(X, y), std::invalid_argument); + } TEST_F(TestFImdlp, FitErrorDifferentSize) { X = { 1, 2, 3 }; @@ -64,56 +68,111 @@ namespace mdlp { TEST_F(TestFImdlp, SortIndices) { X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; + y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 }; checkSortedVector(); X = { 5.77, 5.88, 5.99 }; + y = { 1, 2, 1 }; indices = { 0, 1, 2 }; checkSortedVector(); X = { 5.33, 5.22, 5.11 }; + y = { 1, 2, 1 }; indices = { 2, 1, 0 }; checkSortedVector(); + X = { 5.33, 5.22, 5.33 }; + y = { 2, 2, 1 }; + indices = { 1, 2, 0 }; } - - TEST_F(TestFImdlp, TestDataset) + TEST_F(TestFImdlp, TestArtificialDatasetAlternative) { - algorithm = 0; + algorithm = 1; fit(X, y); - computeCutPoints(0, 10); - cutPoints_t expected = { 5.6499996185302734 }; + computeCutPoints(0, 20); + cutPoints_t expected = { 5.0500001907348633 }; vector computed = getCutPoints(); computed = getCutPoints(); int expectedSize = expected.size(); EXPECT_EQ(computed.size(), expected.size()); - for (auto i = 0; i < expectedSize; i++) { + for (auto i = 0; i < computed.size(); i++) { EXPECT_NEAR(computed[i], expected[i], precision); } } - TEST_F(TestFImdlp, ComputeCutPoints) + TEST_F(TestFImdlp, TestArtificialDataset) { - cutPoints_t expected = { 5.65 }; - algorithm = false; - computeCutPoints(0, 10); - checkCutPoints(expected); + algorithm = 0; + fit(X, y); + computeCutPoints(0, 20); + cutPoints_t expected = { 5.0500001907348633 }; + vector computed = getCutPoints(); + computed = getCutPoints(); + int expectedSize = expected.size(); + EXPECT_EQ(computed.size(), expected.size()); + for (auto i = 0; i < computed.size(); i++) { + EXPECT_NEAR(computed[i], expected[i], precision); + } + } + TEST_F(TestFImdlp, TestIris) + { + ArffFiles file; + string path = "../datasets/"; + + file.load(path + "iris.arff", true); + int items = file.getSize(); + vector& X = file.getX(); + vector expected = { + { 5.4499998092651367, 6.25 }, + { 2.8499999046325684, 3, 3.0499999523162842, 3.3499999046325684 }, + { 2.4500000476837158, 4.75, 5.0500001907348633 }, + { 0.80000001192092896, 1.4500000476837158, 1.75 } + }; + labels_t& y = file.getY(); + auto attributes = file.getAttributes(); + algorithm = 0; + for (auto feature = 0; feature < attributes.size(); feature++) { + fit(X[feature], y); + vector computed = getCutPoints(); + EXPECT_EQ(computed.size(), expected[feature].size()); + for (auto i = 0; i < computed.size(); i++) { + EXPECT_NEAR(computed[i], expected[feature][i], precision); + } + } + } + TEST_F(TestFImdlp, TestIrisAlternative) + { + ArffFiles file; + string path = "../datasets/"; + + file.load(path + "iris.arff", true); + int items = file.getSize(); + vector& X = file.getX(); + vector expected = { + { 5.4499998092651367, 5.75 }, + { 2.8499999046325684, 3.3499999046325684 }, + { 2.4500000476837158, 4.75 }, + { 0.80000001192092896, 1.75 } + }; + labels_t& y = file.getY(); + auto attributes = file.getAttributes(); + algorithm = 1; + for (auto feature = 0; feature < attributes.size(); feature++) { + fit(X[feature], y); + vector computed = getCutPoints(); + EXPECT_EQ(computed.size(), expected[feature].size()); + for (auto i = 0; i < computed.size(); i++) { + EXPECT_NEAR(computed[i], expected[feature][i], precision); + } + } } TEST_F(TestFImdlp, ComputeCutPointsGCase) { cutPoints_t expected; - algorithm = false; - expected = { 2 }; + algorithm = 0; + expected = { 1.5 }; samples_t X_ = { 0, 1, 2, 2 }; labels_t y_ = { 1, 1, 1, 2 }; fit(X_, y_); checkCutPoints(expected); } - TEST_F(TestFImdlp, ComputeCutPointsalAlternative) - { - algorithm = true; - cutPoints_t expected; - expected = {}; - fit(X, y); - computeCutPointsAlternative(0, 10); - checkCutPoints(expected); - } TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase) { cutPoints_t expected; @@ -124,14 +183,4 @@ namespace mdlp { fit(X_, y_); checkCutPoints(expected); } - TEST_F(TestFImdlp, GetCutPoints) - { - samples_t computed, expected = { 5.65 }; - algorithm = false; - computeCutPoints(0, 10); - computed = getCutPoints(); - for (auto item : cutPoints) - cout << setprecision(6) << item << endl; - checkVectors(expected, computed); - } }