mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 15:35:55 +00:00
test: ⚡ Refactor tests to new version
This commit is contained in:
@@ -27,7 +27,7 @@ namespace mdlp {
|
|||||||
~CPPFImdlp();
|
~CPPFImdlp();
|
||||||
CPPFImdlp& fit(samples_t&, labels_t&);
|
CPPFImdlp& fit(samples_t&, labels_t&);
|
||||||
samples_t getCutPoints();
|
samples_t getCutPoints();
|
||||||
inline string version() { return "0.9.7"; };
|
inline string version() { return "1.0.0"; };
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
12
README.md
12
README.md
@@ -1,4 +1,5 @@
|
|||||||
# mdlp
|
# mdlp
|
||||||
|
|
||||||
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
||||||
|
|
||||||
The implementation tries to mitigate the problem of different label values with the same value of the variable:
|
The implementation tries to mitigate the problem of different label values with the same value of the variable:
|
||||||
@@ -19,4 +20,13 @@ cd build
|
|||||||
cmake ..
|
cmake ..
|
||||||
make
|
make
|
||||||
./sample iris
|
./sample iris
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Test
|
||||||
|
|
||||||
|
To run the tests, execute the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd tests
|
||||||
|
./test
|
||||||
|
```
|
||||||
|
@@ -3,4 +3,4 @@ project(main)
|
|||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
|
|
||||||
add_executable(sample sample.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
||||||
|
@@ -1,8 +1,8 @@
|
|||||||
#include "ArffFiles.h"
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include "../CPPFImdlp.h"
|
#include "../CPPFImdlp.h"
|
||||||
|
#include "../tests/ArffFiles.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace mdlp;
|
using namespace mdlp;
|
||||||
|
@@ -18,7 +18,7 @@ FetchContent_MakeAvailable(googletest)
|
|||||||
enable_testing()
|
enable_testing()
|
||||||
|
|
||||||
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
||||||
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
||||||
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
||||||
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
||||||
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
#include "../Metrics.h"
|
#include "../Metrics.h"
|
||||||
#include "../CPPFImdlp.h"
|
#include "../CPPFImdlp.h"
|
||||||
|
#include "ArffFiles.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
@@ -10,10 +11,8 @@ namespace mdlp {
|
|||||||
TestFImdlp(): CPPFImdlp() {}
|
TestFImdlp(): CPPFImdlp() {}
|
||||||
void SetUp()
|
void SetUp()
|
||||||
{
|
{
|
||||||
// 5.0, 5.1, 5.1, 5.1, 5.2, 5.3, 5.6, 5.7, 5.9, 6.0]
|
X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||||
//(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
|
y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
|
||||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
|
||||||
algorithm = false;
|
algorithm = false;
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
}
|
}
|
||||||
@@ -55,6 +54,11 @@ namespace mdlp {
|
|||||||
y = labels_t();
|
y = labels_t();
|
||||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||||
}
|
}
|
||||||
|
TEST_F(TestFImdlp, FitErrorIncorrectAlgorithm)
|
||||||
|
{
|
||||||
|
algorithm = 2;
|
||||||
|
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||||
|
}
|
||||||
TEST_F(TestFImdlp, FitErrorDifferentSize)
|
TEST_F(TestFImdlp, FitErrorDifferentSize)
|
||||||
{
|
{
|
||||||
X = { 1, 2, 3 };
|
X = { 1, 2, 3 };
|
||||||
@@ -64,56 +68,111 @@ namespace mdlp {
|
|||||||
TEST_F(TestFImdlp, SortIndices)
|
TEST_F(TestFImdlp, SortIndices)
|
||||||
{
|
{
|
||||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||||
|
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||||
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||||
checkSortedVector();
|
checkSortedVector();
|
||||||
X = { 5.77, 5.88, 5.99 };
|
X = { 5.77, 5.88, 5.99 };
|
||||||
|
y = { 1, 2, 1 };
|
||||||
indices = { 0, 1, 2 };
|
indices = { 0, 1, 2 };
|
||||||
checkSortedVector();
|
checkSortedVector();
|
||||||
X = { 5.33, 5.22, 5.11 };
|
X = { 5.33, 5.22, 5.11 };
|
||||||
|
y = { 1, 2, 1 };
|
||||||
indices = { 2, 1, 0 };
|
indices = { 2, 1, 0 };
|
||||||
checkSortedVector();
|
checkSortedVector();
|
||||||
|
X = { 5.33, 5.22, 5.33 };
|
||||||
|
y = { 2, 2, 1 };
|
||||||
|
indices = { 1, 2, 0 };
|
||||||
}
|
}
|
||||||
|
TEST_F(TestFImdlp, TestArtificialDatasetAlternative)
|
||||||
TEST_F(TestFImdlp, TestDataset)
|
|
||||||
{
|
{
|
||||||
algorithm = 0;
|
algorithm = 1;
|
||||||
fit(X, y);
|
fit(X, y);
|
||||||
computeCutPoints(0, 10);
|
computeCutPoints(0, 20);
|
||||||
cutPoints_t expected = { 5.6499996185302734 };
|
cutPoints_t expected = { 5.0500001907348633 };
|
||||||
vector<precision_t> computed = getCutPoints();
|
vector<precision_t> computed = getCutPoints();
|
||||||
computed = getCutPoints();
|
computed = getCutPoints();
|
||||||
int expectedSize = expected.size();
|
int expectedSize = expected.size();
|
||||||
EXPECT_EQ(computed.size(), expected.size());
|
EXPECT_EQ(computed.size(), expected.size());
|
||||||
for (auto i = 0; i < expectedSize; i++) {
|
for (auto i = 0; i < computed.size(); i++) {
|
||||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST_F(TestFImdlp, ComputeCutPoints)
|
TEST_F(TestFImdlp, TestArtificialDataset)
|
||||||
{
|
{
|
||||||
cutPoints_t expected = { 5.65 };
|
algorithm = 0;
|
||||||
algorithm = false;
|
fit(X, y);
|
||||||
computeCutPoints(0, 10);
|
computeCutPoints(0, 20);
|
||||||
checkCutPoints(expected);
|
cutPoints_t expected = { 5.0500001907348633 };
|
||||||
|
vector<precision_t> computed = getCutPoints();
|
||||||
|
computed = getCutPoints();
|
||||||
|
int expectedSize = expected.size();
|
||||||
|
EXPECT_EQ(computed.size(), expected.size());
|
||||||
|
for (auto i = 0; i < computed.size(); i++) {
|
||||||
|
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST_F(TestFImdlp, TestIris)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
string path = "../datasets/";
|
||||||
|
|
||||||
|
file.load(path + "iris.arff", true);
|
||||||
|
int items = file.getSize();
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
vector<cutPoints_t> expected = {
|
||||||
|
{ 5.4499998092651367, 6.25 },
|
||||||
|
{ 2.8499999046325684, 3, 3.0499999523162842, 3.3499999046325684 },
|
||||||
|
{ 2.4500000476837158, 4.75, 5.0500001907348633 },
|
||||||
|
{ 0.80000001192092896, 1.4500000476837158, 1.75 }
|
||||||
|
};
|
||||||
|
labels_t& y = file.getY();
|
||||||
|
auto attributes = file.getAttributes();
|
||||||
|
algorithm = 0;
|
||||||
|
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||||
|
fit(X[feature], y);
|
||||||
|
vector<precision_t> computed = getCutPoints();
|
||||||
|
EXPECT_EQ(computed.size(), expected[feature].size());
|
||||||
|
for (auto i = 0; i < computed.size(); i++) {
|
||||||
|
EXPECT_NEAR(computed[i], expected[feature][i], precision);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST_F(TestFImdlp, TestIrisAlternative)
|
||||||
|
{
|
||||||
|
ArffFiles file;
|
||||||
|
string path = "../datasets/";
|
||||||
|
|
||||||
|
file.load(path + "iris.arff", true);
|
||||||
|
int items = file.getSize();
|
||||||
|
vector<samples_t>& X = file.getX();
|
||||||
|
vector<cutPoints_t> expected = {
|
||||||
|
{ 5.4499998092651367, 5.75 },
|
||||||
|
{ 2.8499999046325684, 3.3499999046325684 },
|
||||||
|
{ 2.4500000476837158, 4.75 },
|
||||||
|
{ 0.80000001192092896, 1.75 }
|
||||||
|
};
|
||||||
|
labels_t& y = file.getY();
|
||||||
|
auto attributes = file.getAttributes();
|
||||||
|
algorithm = 1;
|
||||||
|
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||||
|
fit(X[feature], y);
|
||||||
|
vector<precision_t> computed = getCutPoints();
|
||||||
|
EXPECT_EQ(computed.size(), expected[feature].size());
|
||||||
|
for (auto i = 0; i < computed.size(); i++) {
|
||||||
|
EXPECT_NEAR(computed[i], expected[feature][i], precision);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsGCase)
|
TEST_F(TestFImdlp, ComputeCutPointsGCase)
|
||||||
{
|
{
|
||||||
cutPoints_t expected;
|
cutPoints_t expected;
|
||||||
algorithm = false;
|
algorithm = 0;
|
||||||
expected = { 2 };
|
expected = { 1.5 };
|
||||||
samples_t X_ = { 0, 1, 2, 2 };
|
samples_t X_ = { 0, 1, 2, 2 };
|
||||||
labels_t y_ = { 1, 1, 1, 2 };
|
labels_t y_ = { 1, 1, 1, 2 };
|
||||||
fit(X_, y_);
|
fit(X_, y_);
|
||||||
checkCutPoints(expected);
|
checkCutPoints(expected);
|
||||||
}
|
}
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsalAlternative)
|
|
||||||
{
|
|
||||||
algorithm = true;
|
|
||||||
cutPoints_t expected;
|
|
||||||
expected = {};
|
|
||||||
fit(X, y);
|
|
||||||
computeCutPointsAlternative(0, 10);
|
|
||||||
checkCutPoints(expected);
|
|
||||||
}
|
|
||||||
TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase)
|
TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase)
|
||||||
{
|
{
|
||||||
cutPoints_t expected;
|
cutPoints_t expected;
|
||||||
@@ -124,14 +183,4 @@ namespace mdlp {
|
|||||||
fit(X_, y_);
|
fit(X_, y_);
|
||||||
checkCutPoints(expected);
|
checkCutPoints(expected);
|
||||||
}
|
}
|
||||||
TEST_F(TestFImdlp, GetCutPoints)
|
|
||||||
{
|
|
||||||
samples_t computed, expected = { 5.65 };
|
|
||||||
algorithm = false;
|
|
||||||
computeCutPoints(0, 10);
|
|
||||||
computed = getCutPoints();
|
|
||||||
for (auto item : cutPoints)
|
|
||||||
cout << setprecision(6) << item << endl;
|
|
||||||
checkVectors(expected, computed);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user