mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 23:45:57 +00:00
Add max_cutpoints Hyperparameter
This commit is contained in:
@@ -40,11 +40,10 @@ vector<int>& ArffFiles::getY()
|
||||
void ArffFiles::load(string fileName, bool classLast)
|
||||
{
|
||||
ifstream file(fileName);
|
||||
string keyword, attribute, type;
|
||||
if (file.is_open()) {
|
||||
string line;
|
||||
string line, keyword, attribute, type;
|
||||
while (getline(file, line)) {
|
||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
@@ -79,7 +78,7 @@ void ArffFiles::generateDataset(bool classLast)
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
vector<string> yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? attributes.size() : 0;
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
int pos = 0, xIndex = 0;
|
||||
|
@@ -86,13 +86,22 @@ namespace mdlp {
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth)
|
||||
{
|
||||
auto testLength = CPPFImdlp(2, 10);
|
||||
auto testDepth = CPPFImdlp(3, 0);
|
||||
auto testLength = CPPFImdlp(2, 10, 0);
|
||||
auto testDepth = CPPFImdlp(3, 0, 0);
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2, 3 };
|
||||
EXPECT_THROW(testLength.fit(X, y), invalid_argument);
|
||||
EXPECT_THROW(testDepth.fit(X, y), invalid_argument);
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorMaxCutPoints)
|
||||
{
|
||||
auto testmin = CPPFImdlp(2, 10, -1);
|
||||
auto testmax = CPPFImdlp(3, 0, 200);
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2, 3 };
|
||||
EXPECT_THROW(testmin.fit(X, y), invalid_argument);
|
||||
EXPECT_THROW(testmax.fit(X, y), invalid_argument);
|
||||
}
|
||||
TEST_F(TestFImdlp, SortIndices)
|
||||
{
|
||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
@@ -139,10 +148,8 @@ namespace mdlp {
|
||||
TEST_F(TestFImdlp, TestArtificialDataset)
|
||||
{
|
||||
fit(X, y);
|
||||
computeCutPoints(0, 20, 1);
|
||||
cutPoints_t expected = { 5.05 };
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
@@ -194,7 +201,7 @@ namespace mdlp {
|
||||
TEST_F(TestFImdlp, MaxDepth)
|
||||
{
|
||||
// Set max_depth to 1
|
||||
auto test = CPPFImdlp(3, 1);
|
||||
auto test = CPPFImdlp(3, 1, 0);
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.45 },
|
||||
{ 3.35 },
|
||||
@@ -206,7 +213,7 @@ namespace mdlp {
|
||||
}
|
||||
TEST_F(TestFImdlp, MinLength)
|
||||
{
|
||||
auto test = CPPFImdlp(75, 100);
|
||||
auto test = CPPFImdlp(75, 100, 0);
|
||||
// Set min_length to 75
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.45, 5.75 },
|
||||
@@ -220,7 +227,33 @@ namespace mdlp {
|
||||
TEST_F(TestFImdlp, MinLengthMaxDepth)
|
||||
{
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2);
|
||||
auto test = CPPFImdlp(75, 2, 0);
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.45, 5.75 },
|
||||
{ 2.85, 3.35 },
|
||||
{ 2.45, 4.75 },
|
||||
{ 0.8, 1.75 }
|
||||
};
|
||||
int depths[] = { 2, 2, 2, 2 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
TEST_F(TestFImdlp, MaxCutPointsInteger)
|
||||
{
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 1);
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.45 },
|
||||
{ 3.35 },
|
||||
{ 2.45 },
|
||||
{ 0.8}
|
||||
};
|
||||
int depths[] = { 1, 1, 1, 1 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
TEST_F(TestFImdlp, MaxCutPointsFloat)
|
||||
{
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 0.2);
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.45, 5.75 },
|
||||
{ 2.85, 3.35 },
|
||||
|
@@ -36,6 +36,7 @@ namespace mdlp {
|
||||
TEST_F(TestMetrics, InformationGain)
|
||||
{
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.108032, informationGain(0, 5, 10), precision);
|
||||
|
@@ -13,4 +13,5 @@ rm -fr gcovr-report/* 2>/dev/null
|
||||
#lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
||||
#lcov --remove lcoverage/main_coverage.info 'v1/*' '/Applications/*' '*/tests/*' --output-file lcoverage/main_coverage.info -q
|
||||
#lcov --list lcoverage/main_coverage.info
|
||||
gcovr --root .. --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --txt --sonarqube=gcovr-report/coverage.xml
|
||||
cd ..
|
||||
gcovr --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml
|
||||
|
Reference in New Issue
Block a user