From dec12959332d11a780d894cc386aca7510b3298b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Mon, 20 Feb 2023 18:23:05 +0100 Subject: [PATCH] Remove alternative and Classic Refactor ValueCutPoint Reefactor sameValues in getCandidate --- .github/workflows/build.yml | 26 ++++++ .gitignore | 1 + .vscode/launch.json | 22 +++++ .vscode/settings.json | 5 ++ CMakeLists.txt | 2 +- CPPFImdlp.cpp | 169 +++++++++++++----------------------- CPPFImdlp.h | 12 +-- Metrics.cpp | 6 +- sample/CMakeLists.txt | 2 +- sample/sample.cpp | 20 +++-- tests/ArffFiles.cpp | 4 +- tests/ArffFiles.h | 5 +- tests/FImdlp_unittest.cpp | 125 +++++++++++++------------- typesFImdlp.h | 2 +- 14 files changed, 198 insertions(+), 203 deletions(-) create mode 100644 .github/workflows/build.yml create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..4625a01 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,26 @@ +name: Build + +on: + push: + branches: + - main + + +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis + - uses: sonarsource/sonarqube-scan-action@master + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }} + # If you wish to fail your job when the Quality Gate is red, uncomment the + # following lines. This would typically be used to fail a deployment. + # - uses: sonarsource/sonarqube-quality-gate-action@master + # timeout-minutes: 5 + # env: + # SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} diff --git a/.gitignore b/.gitignore index be772d2..23b7ce1 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ .idea cmake-* **/CMakeFiles +sonar-project.properties diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..4d023ae --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,22 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "(lldb) Launch", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceRoot}/sample/build/sample", + "args": [ + "test" + ], + "stopAtEntry": false, + "cwd": "${workspaceRoot}/sample/build/", + "environment": [], + "externalConsole": false, + "MIMode": "lldb" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..09b14a2 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "sonarlint.connectedMode.project": { + "projectKey": "rmontanana_mdlp_AYZkjILJHyjW-meBaElG" + } +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 382ca27..ff48211 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) project(mdlp) -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 11) add_library(mdlp CPPFImdlp.cpp Metrics.cpp) diff --git a/CPPFImdlp.cpp b/CPPFImdlp.cpp index c70efaf..59b9b4b 100644 --- a/CPPFImdlp.cpp +++ b/CPPFImdlp.cpp @@ -6,13 +6,15 @@ #include "Metrics.h" namespace mdlp { - CPPFImdlp::CPPFImdlp(int algorithm) : algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), - metrics(Metrics(y, indices)) { - } + CPPFImdlp::CPPFImdlp(): indices(indices_t()), X(samples_t()), y(labels_t()), + metrics(Metrics(y, indices)) + { + } CPPFImdlp::~CPPFImdlp() = default; - CPPFImdlp &CPPFImdlp::fit(samples_t &X_, labels_t &y_) { + CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_) + { X = X_; y = y_; cutPoints.clear(); @@ -24,117 +26,75 @@ namespace mdlp { } indices = sortIndices(X_, y_); metrics.setData(y, indices); - switch (algorithm) { - case 0: - computeCutPoints(0, X.size()); - break; - case 1: - computeCutPointsAlternative(0, X.size()); - break; - case 2: - indices = sortIndices1(X_); - metrics.setData(y, indices); - computeCutPointsClassic(0, X.size()); - break; - default: - throw invalid_argument("algorithm must be 0, 1 or 2"); - } + computeCutPoints(0, X.size()); return *this; } - precision_t CPPFImdlp::halfWayValueCutPoint(size_t start, size_t idx) { - size_t idxPrev = idx - 1; - precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]]; - // definition 2 of the paper => X[t-1] < X[t] - while (idxPrev-- > start && actual == previous) { - previous = X[indices[idxPrev]]; - } - return (previous + actual) / 2; - } - - tuple CPPFImdlp::completeValueCutPoint(size_t start, size_t cut, size_t end) { - size_t idxPrev = cut - 1; - bool fforward = false; - precision_t previous, actual; + pair CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) + { + size_t n, m, idxPrev = cut - 1 <= start ? cut - 1 : cut; + size_t idxNext = cut + 1 < end ? cut + 1 : cut; + bool backWall; // true if duplicates reach begining of the interval + precision_t previous, actual, next; previous = X[indices[idxPrev]]; actual = X[indices[cut]]; + next = X[indices[idxNext]]; // definition 2 of the paper => X[t-1] < X[t] - while (idxPrev-- > start && actual == previous) { - previous = X[indices[idxPrev]]; + // get the first equal value of X in the interval + while (idxPrev > start && actual == previous) { + previous = X[indices[--idxPrev]]; } + backWall = idxPrev == start && actual == previous; // get the last equal value of X in the interval - while (actual == X[indices[cut]] && cut + 1 < end) { - cut++; - fforward = true; + while (idxNext < end - 1 && actual == next) { + next = X[indices[++idxNext]]; } - if (fforward) - cut--; - // try to get the next value if it can't be found backwards - if (previous == actual && cut + 1 < end) - actual = X[indices[cut + 1]]; - return make_tuple((previous + actual) / 2, cut); + // # of duplicates before cutpoint + n = cut - 1 - idxPrev; + // # of duplicates after cutpoint + m = idxNext - cut - 1; + // Decide which values to use + cut = cut + (backWall ? m + 1 : -n); + actual = X[indices[cut]]; + return { (actual + previous) / 2, cut }; } - void CPPFImdlp::computeCutPoints(size_t start, size_t end) { + void CPPFImdlp::computeCutPoints(size_t start, size_t end) + { size_t cut; - tuple result; - if (end - start < 2) + pair result; + if (end - start < 3) return; cut = getCandidate(start, end); if (cut == numeric_limits::max()) return; if (mdlp(start, cut, end)) { - result = completeValueCutPoint(start, cut, end); - cut = get<1>(result); - cutPoints.push_back(get<0>(result)); + result = valueCutPoint(start, cut, end); + cut = result.second; + cutPoints.push_back(result.first); computeCutPoints(start, cut); computeCutPoints(cut, end); } } - void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end) { - size_t cut; - if (end - start < 2) - return; - cut = getCandidate(start, end); - if (cut == numeric_limits::max()) - return; - if (mdlp(start, cut, end)) { - cutPoints.push_back(halfWayValueCutPoint(start, cut)); - computeCutPointsAlternative(start, cut); - computeCutPointsAlternative(cut, end); - } - } - - void CPPFImdlp::computeCutPointsClassic(size_t start, size_t end) { - size_t cut; - cut = getCandidate(start, end); - if (cut == numeric_limits::max() || !mdlp(start, cut, end)) { - // cut.value == -1 means that there is no candidate in the interval - // No boundary found, so we add both ends of the interval as cutpoints - // because they were selected by the algorithm before - if (start == end) - return; - if (start != 0) - cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2); - if (end != X.size()) - cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2); - return; - } - computeCutPoints(start, cut); - computeCutPoints(cut, end); - } - - size_t CPPFImdlp::getCandidate(size_t start, size_t end) { + size_t CPPFImdlp::getCandidate(size_t start, size_t end) + { /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which E(A, TA; S) is minimal amongst all the candidate cut points. */ size_t candidate = numeric_limits::max(), elements = end - start; - bool same_values = true; + bool sameValues = true; precision_t entropy_left, entropy_right, minEntropy; + // Check if all the values of the variable in the interval are the same + for (size_t idx = start + 1; idx < end; idx++) { + if (X[indices[idx]] != X[indices[start]]) { + sameValues = false; + break; + } + } + if (sameValues) + return candidate; minEntropy = metrics.entropy(start, end); - for (auto idx = start + 1; idx < end; idx++) { - if (X[indices[idx]] != X[indices[idx - 1]]) - same_values = false; + for (size_t idx = start + 1; idx < end; idx++) { // Cutpoints are always on boundaries (definition 2) if (y[indices[idx]] == y[indices[idx - 1]]) continue; @@ -145,13 +105,11 @@ namespace mdlp { candidate = idx; } } - // If all the values of the variable in the interval are the same, it doesn't consider the cut point - if (same_values) - candidate = numeric_limits::max(); return candidate; } - bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) { + bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) + { int k, k1, k2; precision_t ig, delta; precision_t ent, ent1, ent2; @@ -167,37 +125,28 @@ namespace mdlp { ent2 = metrics.entropy(cut, end); ig = metrics.informationGain(start, cut, end); delta = log2(pow(3, precision_t(k)) - 2) - - (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2); + (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2); precision_t term = 1 / N * (log2(N - 1) + delta); return ig > term; } // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes - indices_t CPPFImdlp::sortIndices(samples_t &X_, labels_t &y_) { + indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_) + { indices_t idx(X_.size()); iota(idx.begin(), idx.end(), 0); for (size_t i = 0; i < X_.size(); i++) stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) { - if (X_[i1] == X_[i2]) - return y_[i1] < y_[i2]; - else - return X_[i1] < X_[i2]; - }); - return idx; - } - - // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes - indices_t CPPFImdlp::sortIndices1(samples_t &X_) { - indices_t idx(X_.size()); - iota(idx.begin(), idx.end(), 0); - for (size_t i = 0; i < X_.size(); i++) - stable_sort(idx.begin(), idx.end(), [&X_](size_t i1, size_t i2) { + if (X_[i1] == X_[i2]) + return y_[i1] < y_[i2]; + else return X_[i1] < X_[i2]; - }); + }); return idx; } - cutPoints_t CPPFImdlp::getCutPoints() { + cutPoints_t CPPFImdlp::getCutPoints() + { // Remove duplicates and sort cutPoints_t output(cutPoints.size()); set s; diff --git a/CPPFImdlp.h b/CPPFImdlp.h index 24b2877..0280cd9 100644 --- a/CPPFImdlp.h +++ b/CPPFImdlp.h @@ -3,12 +3,10 @@ #include "typesFImdlp.h" #include "Metrics.h" #include -#include #include namespace mdlp { class CPPFImdlp { protected: - int algorithm; indices_t indices; samples_t X; labels_t y; @@ -16,20 +14,16 @@ namespace mdlp { cutPoints_t cutPoints; static indices_t sortIndices(samples_t&, labels_t&); - static indices_t sortIndices1(samples_t&); void computeCutPoints(size_t, size_t); - void computeCutPointsAlternative(size_t, size_t); - void computeCutPointsClassic(size_t, size_t); bool mdlp(size_t, size_t, size_t); size_t getCandidate(size_t, size_t); - precision_t halfWayValueCutPoint(size_t, size_t); - tuple completeValueCutPoint(size_t, size_t, size_t); + pair valueCutPoint(size_t, size_t, size_t); public: - CPPFImdlp(int algorithm = 0); + CPPFImdlp(); ~CPPFImdlp(); CPPFImdlp& fit(samples_t&, labels_t&); samples_t getCutPoints(); - inline string version() { return "1.0.0"; }; + inline string version() { return "1.1.0"; }; }; } #endif \ No newline at end of file diff --git a/Metrics.cpp b/Metrics.cpp index 1275b00..766e508 100644 --- a/Metrics.cpp +++ b/Metrics.cpp @@ -29,8 +29,8 @@ namespace mdlp { labels_t counts(numClasses + 1, 0); if (end - start < 2) return 0; - if (entropyCache.find(make_tuple(start, end)) != entropyCache.end()) { - return entropyCache[make_tuple(start, end)]; + if (entropyCache.find({ start, end }) != entropyCache.end()) { + return entropyCache[{start, end}]; } for (auto i = &indices[start]; i != &indices[end]; ++i) { counts[y[*i]]++; @@ -42,7 +42,7 @@ namespace mdlp { ventropy -= p * log2(p); } } - entropyCache[make_tuple(start, end)] = ventropy; + entropyCache[{start, end}] = ventropy; return ventropy; } precision_t Metrics::informationGain(size_t start, size_t cut, size_t end) diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 6fea95c..68ba4df 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.20) project(main) -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 11) add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp) diff --git a/sample/sample.cpp b/sample/sample.cpp index 18efdb9..0797e96 100644 --- a/sample/sample.cpp +++ b/sample/sample.cpp @@ -7,18 +7,20 @@ using namespace std; using namespace mdlp; + int main(int argc, char** argv) { ArffFiles file; vector lines; string path = "../../tests/datasets/"; - map datasets = { - {"mfeat-factors", true}, - {"iris", true}, - {"letter", true}, - {"glass", true}, - {"kdd_JapaneseVowels", false}, - {"test", true} + map datasets = { + {"mfeat-factors", true}, + {"iris", true}, + {"letter", true}, + {"glass", true}, + {"kdd_JapaneseVowels", false}, + {"mfeat-factors", true}, + {"test", true} }; if (argc != 2 || datasets.find(argv[1]) == datasets.end()) { cout << "Usage: " << argv[0] << " {mfeat-factors, glass, iris, letter, kdd_JapaneseVowels, test}" << endl; @@ -44,9 +46,11 @@ int main(int argc, char** argv) } cout << y[i] << endl; } - mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0); + mdlp::CPPFImdlp test = mdlp::CPPFImdlp(); for (auto i = 0; i < attributes.size(); i++) { + auto min_max = minmax_element(X[i].begin(), X[i].end()); cout << "Cut points for " << get<0>(attributes[i]) << endl; + cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl; cout << "--------------------------" << setprecision(3) << endl; test.fit(X[i], y); for (auto item : test.getCutPoints()) { diff --git a/tests/ArffFiles.cpp b/tests/ArffFiles.cpp index 7b59ef8..470f5fa 100644 --- a/tests/ArffFiles.cpp +++ b/tests/ArffFiles.cpp @@ -17,7 +17,7 @@ unsigned long int ArffFiles::getSize() { return lines.size(); } -vector> ArffFiles::getAttributes() +vector> ArffFiles::getAttributes() { return attributes; } @@ -50,7 +50,7 @@ void ArffFiles::load(string fileName, bool classLast) if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { stringstream ss(line); ss >> keyword >> attribute >> type; - attributes.push_back(make_tuple(attribute, type)); + attributes.push_back({ attribute, type }); continue; } if (line[0] == '@') { diff --git a/tests/ArffFiles.h b/tests/ArffFiles.h index 6986d3b..b56d28d 100644 --- a/tests/ArffFiles.h +++ b/tests/ArffFiles.h @@ -2,12 +2,11 @@ #define ARFFFILES_H #include #include -#include using namespace std; class ArffFiles { private: vector lines; - vector> attributes; + vector> attributes; string className, classType; vector> X; vector y; @@ -22,7 +21,7 @@ public: string trim(const string&); vector>& getX(); vector& getY(); - vector> getAttributes(); + vector> getAttributes(); vector factorize(const vector& labels_t); }; #endif \ No newline at end of file diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp index e86a156..2e5757e 100644 --- a/tests/FImdlp_unittest.cpp +++ b/tests/FImdlp_unittest.cpp @@ -13,18 +13,13 @@ namespace mdlp { { X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 }; y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 }; - algorithm = false; fit(X, y); } - void setalgorithm(bool value) - { - algorithm = value; - } void checkSortedVector() { indices_t testSortedIndices = sortIndices(X, y); precision_t prev = X[testSortedIndices[0]]; - for (auto i = 0; i < X.size(); ++i) { + for (unsigned long i = 0; i < X.size(); ++i) { EXPECT_EQ(testSortedIndices[i], indices[i]); EXPECT_LE(prev, X[testSortedIndices[i]]); prev = X[testSortedIndices[i]]; @@ -34,7 +29,7 @@ namespace mdlp { { int expectedSize = expected.size(); EXPECT_EQ(cutPoints.size(), expectedSize); - for (auto i = 0; i < cutPoints.size(); i++) { + for (unsigned long i = 0; i < cutPoints.size(); i++) { EXPECT_NEAR(cutPoints[i], expected[i], precision); } } @@ -47,6 +42,19 @@ namespace mdlp { EXPECT_NEAR(expected[i], computed[i], precision); } } + bool test_result(samples_t& X_, size_t cut, float midPoint, size_t limit, string title) + { + pair result; + labels_t y_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + X = X_; + y = y_; + indices = sortIndices(X, y); + cout << "* " << title << endl; + result = valueCutPoint(0, cut, 10); + EXPECT_NEAR(result.first, midPoint, precision); + EXPECT_EQ(result.second, limit); + return true; + } }; TEST_F(TestFImdlp, FitErrorEmptyDataset) { @@ -54,11 +62,6 @@ namespace mdlp { y = labels_t(); EXPECT_THROW(fit(X, y), std::invalid_argument); } - TEST_F(TestFImdlp, FitErrorIncorrectAlgorithm) - { - algorithm = 2; - EXPECT_THROW(fit(X, y), std::invalid_argument); - } TEST_F(TestFImdlp, FitErrorDifferentSize) { X = { 1, 2, 3 }; @@ -83,31 +86,41 @@ namespace mdlp { y = { 2, 2, 1 }; indices = { 1, 2, 0 }; } - TEST_F(TestFImdlp, TestArtificialDatasetAlternative) + TEST_F(TestFImdlp, TestShortDatasets) { - algorithm = 1; + vector computed; + X = { 1 }; + y = { 1 }; fit(X, y); - computeCutPoints(0, 20); - cutPoints_t expected = { 5.0500001907348633 }; - vector computed = getCutPoints(); computed = getCutPoints(); - int expectedSize = expected.size(); - EXPECT_EQ(computed.size(), expected.size()); - for (auto i = 0; i < computed.size(); i++) { - EXPECT_NEAR(computed[i], expected[i], precision); - } + EXPECT_EQ(computed.size(), 0); + X = { 1, 3 }; + y = { 1, 2 }; + fit(X, y); + computed = getCutPoints(); + EXPECT_EQ(computed.size(), 0); + X = { 2, 4 }; + y = { 1, 2 }; + fit(X, y); + computed = getCutPoints(); + EXPECT_EQ(computed.size(), 0); + X = { 1, 2, 3 }; + y = { 1, 2, 2 }; + fit(X, y); + computed = getCutPoints(); + EXPECT_EQ(computed.size(), 1); + EXPECT_NEAR(computed[0], 1.5, precision); } TEST_F(TestFImdlp, TestArtificialDataset) { - algorithm = 0; fit(X, y); computeCutPoints(0, 20); - cutPoints_t expected = { 5.0500001907348633 }; + cutPoints_t expected = { 5.05 }; vector computed = getCutPoints(); computed = getCutPoints(); int expectedSize = expected.size(); EXPECT_EQ(computed.size(), expected.size()); - for (auto i = 0; i < computed.size(); i++) { + for (unsigned long i = 0; i < computed.size(); i++) { EXPECT_NEAR(computed[i], expected[i], precision); } } @@ -116,44 +129,17 @@ namespace mdlp { ArffFiles file; string path = "../datasets/"; - file.load(path + "iris.arff", true); - int items = file.getSize(); - vector& X = file.getX(); - vector expected = { - { 5.4499998092651367, 6.25 }, - { 2.8499999046325684, 3, 3.0499999523162842, 3.3499999046325684 }, - { 2.4500000476837158, 4.75, 5.0500001907348633 }, - { 0.80000001192092896, 1.4500000476837158, 1.75 } - }; - labels_t& y = file.getY(); - auto attributes = file.getAttributes(); - algorithm = 0; - for (auto feature = 0; feature < attributes.size(); feature++) { - fit(X[feature], y); - vector computed = getCutPoints(); - EXPECT_EQ(computed.size(), expected[feature].size()); - for (auto i = 0; i < computed.size(); i++) { - EXPECT_NEAR(computed[i], expected[feature][i], precision); - } - } - } - TEST_F(TestFImdlp, TestIrisAlternative) - { - ArffFiles file; - string path = "../datasets/"; - file.load(path + "iris.arff", true); int items = file.getSize(); vector& X = file.getX(); vector expected = { { 5.4499998092651367, 5.75 }, - { 2.8499999046325684, 3.3499999046325684 }, - { 2.4500000476837158, 4.75 }, + { 2.75, 2.85, 2.95, 3.05, 3.35 }, + { 2.4500000476837158, 4.75, 5.0500001907348633 }, { 0.80000001192092896, 1.75 } }; labels_t& y = file.getY(); auto attributes = file.getAttributes(); - algorithm = 1; for (auto feature = 0; feature < attributes.size(); feature++) { fit(X[feature], y); vector computed = getCutPoints(); @@ -166,21 +152,30 @@ namespace mdlp { TEST_F(TestFImdlp, ComputeCutPointsGCase) { cutPoints_t expected; - algorithm = 0; expected = { 1.5 }; - samples_t X_ = { 0, 1, 2, 2 }; - labels_t y_ = { 1, 1, 1, 2 }; + samples_t X_ = { 0, 1, 2, 2, 2 }; + labels_t y_ = { 1, 1, 1, 2, 2 }; fit(X_, y_); checkCutPoints(expected); } - TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase) + TEST_F(TestFImdlp, CompleteValueCutPoint) { - cutPoints_t expected; - expected = { 1.5 }; - algorithm = true; - samples_t X_ = { 0, 1, 2, 2 }; - labels_t y_ = { 1, 1, 1, 2 }; - fit(X_, y_); - checkCutPoints(expected); + // Case titles as stated in the doc + samples_t X1a{ 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0 }; + test_result(X1a, 6, 7.3 / 2, 6, "1a"); + samples_t X2a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 }; + test_result(X2a, 6, 7.1 / 2, 4, "2a"); + samples_t X2b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 }; + test_result(X2b, 6, 7.5 / 2, 7, "2b"); + samples_t X3a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 }; + test_result(X3a, 4, 7.1 / 2, 4, "3a"); + samples_t X3b = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 }; + test_result(X3b, 4, 7.1 / 2, 4, "3b"); + samples_t X4a = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.9, 4.0 }; + test_result(X4a, 4, 6.9 / 2, 2, "4a"); + samples_t X4b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 }; + test_result(X4b, 4, 7.5 / 2, 7, "4b"); + samples_t X4c = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 }; + test_result(X4c, 4, 6.9 / 2, 2, "4c"); } } diff --git a/typesFImdlp.h b/typesFImdlp.h index 4a175cd..753e333 100644 --- a/typesFImdlp.h +++ b/typesFImdlp.h @@ -11,7 +11,7 @@ namespace mdlp { typedef vector labels_t; typedef vector indices_t; typedef vector cutPoints_t; - typedef map, precision_t> cacheEnt_t; + typedef map, precision_t> cacheEnt_t; typedef map, precision_t> cacheIg_t; } #endif