Remove alternative and Classic

Refactor ValueCutPoint
Reefactor sameValues in getCandidate
This commit is contained in:
2023-02-20 18:23:05 +01:00
parent 04c1772019
commit dec1295933
14 changed files with 198 additions and 203 deletions

View File

@@ -17,7 +17,7 @@ unsigned long int ArffFiles::getSize()
{
return lines.size();
}
vector<tuple<string, string>> ArffFiles::getAttributes()
vector<pair<string, string>> ArffFiles::getAttributes()
{
return attributes;
}
@@ -50,7 +50,7 @@ void ArffFiles::load(string fileName, bool classLast)
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line);
ss >> keyword >> attribute >> type;
attributes.push_back(make_tuple(attribute, type));
attributes.push_back({ attribute, type });
continue;
}
if (line[0] == '@') {

View File

@@ -2,12 +2,11 @@
#define ARFFFILES_H
#include <string>
#include <vector>
#include <tuple>
using namespace std;
class ArffFiles {
private:
vector<string> lines;
vector<tuple<string, string>> attributes;
vector<pair<string, string>> attributes;
string className, classType;
vector<vector<float>> X;
vector<int> y;
@@ -22,7 +21,7 @@ public:
string trim(const string&);
vector<vector<float>>& getX();
vector<int>& getY();
vector<tuple<string, string>> getAttributes();
vector<pair<string, string>> getAttributes();
vector<int> factorize(const vector<string>& labels_t);
};
#endif

View File

@@ -13,18 +13,13 @@ namespace mdlp {
{
X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
algorithm = false;
fit(X, y);
}
void setalgorithm(bool value)
{
algorithm = value;
}
void checkSortedVector()
{
indices_t testSortedIndices = sortIndices(X, y);
precision_t prev = X[testSortedIndices[0]];
for (auto i = 0; i < X.size(); ++i) {
for (unsigned long i = 0; i < X.size(); ++i) {
EXPECT_EQ(testSortedIndices[i], indices[i]);
EXPECT_LE(prev, X[testSortedIndices[i]]);
prev = X[testSortedIndices[i]];
@@ -34,7 +29,7 @@ namespace mdlp {
{
int expectedSize = expected.size();
EXPECT_EQ(cutPoints.size(), expectedSize);
for (auto i = 0; i < cutPoints.size(); i++) {
for (unsigned long i = 0; i < cutPoints.size(); i++) {
EXPECT_NEAR(cutPoints[i], expected[i], precision);
}
}
@@ -47,6 +42,19 @@ namespace mdlp {
EXPECT_NEAR(expected[i], computed[i], precision);
}
}
bool test_result(samples_t& X_, size_t cut, float midPoint, size_t limit, string title)
{
pair<precision_t, size_t> result;
labels_t y_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
X = X_;
y = y_;
indices = sortIndices(X, y);
cout << "* " << title << endl;
result = valueCutPoint(0, cut, 10);
EXPECT_NEAR(result.first, midPoint, precision);
EXPECT_EQ(result.second, limit);
return true;
}
};
TEST_F(TestFImdlp, FitErrorEmptyDataset)
{
@@ -54,11 +62,6 @@ namespace mdlp {
y = labels_t();
EXPECT_THROW(fit(X, y), std::invalid_argument);
}
TEST_F(TestFImdlp, FitErrorIncorrectAlgorithm)
{
algorithm = 2;
EXPECT_THROW(fit(X, y), std::invalid_argument);
}
TEST_F(TestFImdlp, FitErrorDifferentSize)
{
X = { 1, 2, 3 };
@@ -83,31 +86,41 @@ namespace mdlp {
y = { 2, 2, 1 };
indices = { 1, 2, 0 };
}
TEST_F(TestFImdlp, TestArtificialDatasetAlternative)
TEST_F(TestFImdlp, TestShortDatasets)
{
algorithm = 1;
vector<precision_t> computed;
X = { 1 };
y = { 1 };
fit(X, y);
computeCutPoints(0, 20);
cutPoints_t expected = { 5.0500001907348633 };
vector<precision_t> computed = getCutPoints();
computed = getCutPoints();
int expectedSize = expected.size();
EXPECT_EQ(computed.size(), expected.size());
for (auto i = 0; i < computed.size(); i++) {
EXPECT_NEAR(computed[i], expected[i], precision);
}
EXPECT_EQ(computed.size(), 0);
X = { 1, 3 };
y = { 1, 2 };
fit(X, y);
computed = getCutPoints();
EXPECT_EQ(computed.size(), 0);
X = { 2, 4 };
y = { 1, 2 };
fit(X, y);
computed = getCutPoints();
EXPECT_EQ(computed.size(), 0);
X = { 1, 2, 3 };
y = { 1, 2, 2 };
fit(X, y);
computed = getCutPoints();
EXPECT_EQ(computed.size(), 1);
EXPECT_NEAR(computed[0], 1.5, precision);
}
TEST_F(TestFImdlp, TestArtificialDataset)
{
algorithm = 0;
fit(X, y);
computeCutPoints(0, 20);
cutPoints_t expected = { 5.0500001907348633 };
cutPoints_t expected = { 5.05 };
vector<precision_t> computed = getCutPoints();
computed = getCutPoints();
int expectedSize = expected.size();
EXPECT_EQ(computed.size(), expected.size());
for (auto i = 0; i < computed.size(); i++) {
for (unsigned long i = 0; i < computed.size(); i++) {
EXPECT_NEAR(computed[i], expected[i], precision);
}
}
@@ -116,44 +129,17 @@ namespace mdlp {
ArffFiles file;
string path = "../datasets/";
file.load(path + "iris.arff", true);
int items = file.getSize();
vector<samples_t>& X = file.getX();
vector<cutPoints_t> expected = {
{ 5.4499998092651367, 6.25 },
{ 2.8499999046325684, 3, 3.0499999523162842, 3.3499999046325684 },
{ 2.4500000476837158, 4.75, 5.0500001907348633 },
{ 0.80000001192092896, 1.4500000476837158, 1.75 }
};
labels_t& y = file.getY();
auto attributes = file.getAttributes();
algorithm = 0;
for (auto feature = 0; feature < attributes.size(); feature++) {
fit(X[feature], y);
vector<precision_t> computed = getCutPoints();
EXPECT_EQ(computed.size(), expected[feature].size());
for (auto i = 0; i < computed.size(); i++) {
EXPECT_NEAR(computed[i], expected[feature][i], precision);
}
}
}
TEST_F(TestFImdlp, TestIrisAlternative)
{
ArffFiles file;
string path = "../datasets/";
file.load(path + "iris.arff", true);
int items = file.getSize();
vector<samples_t>& X = file.getX();
vector<cutPoints_t> expected = {
{ 5.4499998092651367, 5.75 },
{ 2.8499999046325684, 3.3499999046325684 },
{ 2.4500000476837158, 4.75 },
{ 2.75, 2.85, 2.95, 3.05, 3.35 },
{ 2.4500000476837158, 4.75, 5.0500001907348633 },
{ 0.80000001192092896, 1.75 }
};
labels_t& y = file.getY();
auto attributes = file.getAttributes();
algorithm = 1;
for (auto feature = 0; feature < attributes.size(); feature++) {
fit(X[feature], y);
vector<precision_t> computed = getCutPoints();
@@ -166,21 +152,30 @@ namespace mdlp {
TEST_F(TestFImdlp, ComputeCutPointsGCase)
{
cutPoints_t expected;
algorithm = 0;
expected = { 1.5 };
samples_t X_ = { 0, 1, 2, 2 };
labels_t y_ = { 1, 1, 1, 2 };
samples_t X_ = { 0, 1, 2, 2, 2 };
labels_t y_ = { 1, 1, 1, 2, 2 };
fit(X_, y_);
checkCutPoints(expected);
}
TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase)
TEST_F(TestFImdlp, CompleteValueCutPoint)
{
cutPoints_t expected;
expected = { 1.5 };
algorithm = true;
samples_t X_ = { 0, 1, 2, 2 };
labels_t y_ = { 1, 1, 1, 2 };
fit(X_, y_);
checkCutPoints(expected);
// Case titles as stated in the doc
samples_t X1a{ 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0 };
test_result(X1a, 6, 7.3 / 2, 6, "1a");
samples_t X2a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
test_result(X2a, 6, 7.1 / 2, 4, "2a");
samples_t X2b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
test_result(X2b, 6, 7.5 / 2, 7, "2b");
samples_t X3a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
test_result(X3a, 4, 7.1 / 2, 4, "3a");
samples_t X3b = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 };
test_result(X3b, 4, 7.1 / 2, 4, "3b");
samples_t X4a = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.9, 4.0 };
test_result(X4a, 4, 6.9 / 2, 2, "4a");
samples_t X4b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
test_result(X4b, 4, 7.5 / 2, 7, "4b");
samples_t X4c = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 };
test_result(X4c, 4, 6.9 / 2, 2, "4c");
}
}