diff --git a/.gitignore b/.gitignore index fbe8f7d..be772d2 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ **/lcoverage .idea cmake-* +**/CMakeFiles diff --git a/CPPFImdlp.cpp b/CPPFImdlp.cpp index 443ffd7..010e168 100644 --- a/CPPFImdlp.cpp +++ b/CPPFImdlp.cpp @@ -4,9 +4,10 @@ #include #include "CPPFImdlp.h" #include "Metrics.h" - +// OJO QUITAR ESTO +#include namespace mdlp { - CPPFImdlp::CPPFImdlp(int proposal):proposal(proposal), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices)) + CPPFImdlp::CPPFImdlp(int algorithm):algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices)) { } CPPFImdlp::~CPPFImdlp() @@ -23,14 +24,14 @@ namespace mdlp { if (X.size() == 0 || y.size() == 0) { throw invalid_argument("X and y must have at least one element"); } - indices = sortIndices(X_); + indices = sortIndices2(X_, y_); metrics.setData(y, indices); - switch (proposal) { + switch (algorithm) { case 0: computeCutPoints(0, X.size()); break; case 1: - computeCutPointsProposal(); + computeCutPointsProposal(0, X.size()); break; case 2: computeCutPointsAlternative(0, X.size()); @@ -38,78 +39,169 @@ namespace mdlp { } return *this; } + precision_t CPPFImdlp::value_cut_point(size_t start, size_t idx) + { + size_t idxPrev = idx - 1; + precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]]; + // definition 2 of the paper => X[t-1] < X[t] + while (idxPrev-- > start && actual == previous) { + previous = X[indices[idxPrev]]; + } + return (previous + actual) / 2; + } + tuple CPPFImdlp::value_proposal_cut_point(size_t start, size_t cut, size_t end) + { + size_t idxPrev = cut - 1; + precision_t previous, next, actual; + previous = X[indices[idxPrev]]; + next = actual = X[indices[cut]]; + // definition 2 of the paper => X[t-1] < X[t] + while (idxPrev-- > start && actual == previous) { + previous = X[indices[idxPrev]]; + } + // get the last equal value of X in the interval + while (actual == X[indices[cut++]] && cut < end); + if (previous == actual && cut < end) + actual = X[indices[cut]]; + cut--; + return make_tuple((previous + actual) / 2, cut); + } + // void CPPFImdlp::computeCutPoints(size_t start, size_t end) + // { + // size_t cut; + // if (end - start < 2) + // return; + // cut = getCandidate(start, end); + // if (cut == numeric_limits::max() || !mdlp(start, cut, end)) { + // // cut == max means that there is no candidate in the interval + // // No boundary found, so we add both ends of the interval as cutpoints + // // because they were selected by the algorithm before + // if (start != 0) + // cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2); + // if (end != X.size()) + // cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2); + // //cout << "!!!Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl; + // return; + // } + // // cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl; + // computeCutPoints(start, cut); + // computeCutPoints(cut, end); + // } + // void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end) + // { + // size_t cut; + // if (end - start < 2) + // return; + // cut = getCandidate(start, end); + // if (cut == numeric_limits::max() || !mdlp(start, cut, end)) { + // // cut == max means that there is no candidate in the interval + // // No boundary found, so we add both ends of the interval as cutpoints + // // because they were selected by the algorithm before + // if (start != 0) + // cutPoints.push_back(value_cut_point(0, start)); + // if (end != X.size()) + // cutPoints.push_back(value_cut_point(start, end)); + // //cout << "!!!Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl; + // return; + // } + // // cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl; + // computeCutPointsAlternative(start, cut); + // computeCutPointsAlternative(cut, end); + // } void CPPFImdlp::computeCutPoints(size_t start, size_t end) { - int cut; + size_t cut; if (end - start < 2) return; cut = getCandidate(start, end); - if (cut == -1 || !mdlp(start, cut, end)) { - // cut.value == -1 means that there is no candidate in the interval - // No boundary found, so we add both ends of the interval as cutpoints - // because they were selected by the algorithm before - if (start != 0) - cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2); - if (end != X.size()) - cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2); + if (cut == numeric_limits::max()) return; + if (mdlp(start, cut, end)) { + cutPoints.push_back(value_cut_point(start, cut)); + //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl; } computeCutPoints(start, cut); computeCutPoints(cut, end); } void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end) { - precision_t cut; + size_t cut; if (end - start < 2) return; cut = getCandidate(start, end); - if (cut == -1) + if (cut == numeric_limits::max()) return; if (mdlp(start, cut, end)) { - cutPoints.push_back((X[indices[cut]] + X[indices[cut - 1]]) / 2); + cutPoints.push_back(value_cut_point(start, cut)); + //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl; + computeCutPointsAlternative(start, cut); + computeCutPointsAlternative(cut, end); } - computeCutPointsAlternative(start, cut); - computeCutPointsAlternative(cut, end); } - void CPPFImdlp::computeCutPointsProposal() + // void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end) + // { + // size_t cut; + // if (end - start < 2) + // return; + // cut = getCandidateWeka(start, end); + // if (cut == numeric_limits::max()) + // return; + // if (mdlp(start, cut, end)) { + // cutPoints.push_back(value_cut_point(start, cut)); + // //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl; + // } + // computeCutPointsAlternative(start, cut); + // computeCutPointsAlternative(cut, end); + // } + void CPPFImdlp::computeCutPointsProposal(size_t start, size_t end) { - precision_t xPrev, xCur, xPivot, cutPoint; - int yPrev, yCur, yPivot; - size_t idx, numElements, start; + size_t cut; + tuple result; + if (end - start < 2) + return; + cut = getCandidate(start, end); + if (cut == numeric_limits::max()) + return; + if (mdlp(start, cut, end)) { + //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl; + result = value_proposal_cut_point(start, cut, end); + cut = get<1>(result); + cutPoints.push_back(get<0>(result)); + //cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl; + computeCutPointsProposal(start, cut); + computeCutPointsProposal(cut, end); + } - xCur = xPrev = X[indices[0]]; - yCur = yPrev = y[indices[0]]; - numElements = indices.size() - 1; - idx = start = 0; - while (idx < numElements) { - xPivot = xCur; - yPivot = yCur; - // Read the same values and check class changes - do { - idx++; - xCur = X[indices[idx]]; - yCur = y[indices[idx]]; - if (yCur != yPivot && xCur == xPivot) { - yPivot = -1; + } + size_t CPPFImdlp::getCandidateWeka(size_t start, size_t end) + { + /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which + E(A, TA; S) is minimal amogst all the candidate cut points. */ + size_t candidate = numeric_limits::max(), elements = end - start; + precision_t entropy_left, entropy_right, minEntropy; + minEntropy = metrics.entropy(start, end); + for (auto idx = start + 1; idx < end; idx++) { + // Cutpoints are always on boundaries (definition 2) + if (X[indices[idx - 1]] < X[indices[idx]]) { + entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx); + entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end); + if (entropy_left + entropy_right < minEntropy) { + minEntropy = entropy_left + entropy_right; + candidate = idx; } } - while (idx < numElements && xCur == xPivot); - // Check if the class changed and there are more than 1 element - if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur) && mdlp(start, idx, indices.size())) { - start = idx; - cutPoint = (xPrev + xCur) / 2; - cutPoints.push_back(cutPoint); - } - yPrev = yPivot; - xPrev = xPivot; } + return candidate; } - long int CPPFImdlp::getCandidate(size_t start, size_t end) + size_t CPPFImdlp::getCandidate(size_t start, size_t end) { - long int candidate = -1, elements = end - start; - precision_t entropy_left, entropy_right, minEntropy = numeric_limits::max(); + /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which + E(A, TA; S) is minimal amogst all the candidate cut points. */ + size_t candidate = numeric_limits::max(), elements = end - start; + precision_t entropy_left, entropy_right, minEntropy; + minEntropy = metrics.entropy(start, end); for (auto idx = start + 1; idx < end; idx++) { - // Cutpoints are always on boundaries + // Cutpoints are always on boundaries (definition 2) if (y[indices[idx]] == y[indices[idx - 1]]) continue; entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx); @@ -137,9 +229,9 @@ namespace mdlp { ent1 = metrics.entropy(start, cut); ent2 = metrics.entropy(cut, end); ig = metrics.informationGain(start, cut, end); - delta = log2(pow(3, precision_t(k)) - 2) - + delta = log(pow(3, precision_t(k)) - 2) - (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2); - precision_t term = 1 / N * (log2(N - 1) + delta); + precision_t term = 1 / N * (log(N - 1) + delta); return ig > term; } cutPoints_t CPPFImdlp::getCutPoints() @@ -164,4 +256,17 @@ namespace mdlp { { return X_[i1] < X_[i2]; }); return idx; } + indices_t CPPFImdlp::sortIndices2(samples_t& X_, labels_t& y_) + { + indices_t idx(X_.size()); + iota(idx.begin(), idx.end(), 0); + for (size_t i = 0; i < X_.size(); i++) + stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) + { + if (X_[i1] == X_[i2]) return y_[i1] < y_[i2]; + else + return X_[i1] < X_[i2]; + }); + return idx; + } } diff --git a/CPPFImdlp.h b/CPPFImdlp.h index 64b9fa1..dfddfb0 100644 --- a/CPPFImdlp.h +++ b/CPPFImdlp.h @@ -3,10 +3,12 @@ #include "typesFImdlp.h" #include "Metrics.h" #include +#include +#include namespace mdlp { class CPPFImdlp { protected: - int proposal; + int algorithm; indices_t indices; // sorted indices to use with X and y samples_t X; labels_t y; @@ -14,17 +16,22 @@ namespace mdlp { cutPoints_t cutPoints; static indices_t sortIndices(samples_t&); + static indices_t sortIndices2(samples_t&, labels_t&); void computeCutPoints(size_t, size_t); bool mdlp(size_t, size_t, size_t); - long int getCandidate(size_t, size_t); + size_t getCandidate(size_t, size_t); + size_t getCandidateWeka(size_t, size_t); void computeCutPointsAlternative(size_t, size_t); - void computeCutPointsProposal(); + void computeCutPointsProposal(size_t, size_t); + precision_t value_cut_point(size_t, size_t); + tuple value_proposal_cut_point(size_t, size_t, size_t); public: CPPFImdlp(int); ~CPPFImdlp(); CPPFImdlp& fit(samples_t&, labels_t&); samples_t getCutPoints(); + inline string version() { return "0.8.1"; }; }; } #endif \ No newline at end of file diff --git a/Metrics.cpp b/Metrics.cpp index 1275b00..dc7bc27 100644 --- a/Metrics.cpp +++ b/Metrics.cpp @@ -39,7 +39,7 @@ namespace mdlp { for (auto count : counts) { if (count > 0) { p = (precision_t)count / nElements; - ventropy -= p * log2(p); + ventropy -= p * log(p); } } entropyCache[make_tuple(start, end)] = ventropy; diff --git a/feature0 b/feature0 new file mode 100644 index 0000000..e835df9 --- /dev/null +++ b/feature0 @@ -0,0 +1,152 @@ ++++++++++++++++++++++++ +( 0, 13) -> (4.3, 0) +( 1, 8) -> (4.4, 0) +( 2, 38) -> (4.4, 0) +( 3, 42) -> (4.4, 0) +( 4, 41) -> (4.5, 0) +( 5, 3) -> (4.6, 0) +( 6, 6) -> (4.6, 0) +( 7, 22) -> (4.6, 0) +( 8, 47) -> (4.6, 0) +( 9, 2) -> (4.7, 0) +( 10, 29) -> (4.7, 0) +( 11, 11) -> (4.8, 0) +( 12, 12) -> (4.8, 0) +( 13, 24) -> (4.8, 0) +( 14, 30) -> (4.8, 0) +( 15, 45) -> (4.8, 0) +( 16, 1) -> (4.9, 0) +( 17, 9) -> (4.9, 0) +( 18, 34) -> (4.9, 0) +( 19, 37) -> (4.9, 0) +( 20, 57) -> (4.9, 1) candidate Total Entropy: 0.633 E. left: 0.000 E. right: 0.855 = 0.539 (0, 54) No +( 21, 106) -> (4.9, 2) +( 22, 4) -> (5.0, 0) +( 23, 7) -> (5.0, 0) +( 24, 25) -> (5.0, 0) +( 25, 26) -> (5.0, 0) +( 26, 35) -> (5.0, 0) +( 27, 40) -> (5.0, 0) +( 28, 43) -> (5.0, 0) +( 29, 49) -> (5.0, 0) +( 30, 60) -> (5.0, 1) +( 31, 93) -> (5.0, 1) +( 32, 0) -> (5.1, 0) +( 33, 17) -> (5.1, 0) +( 34, 19) -> (5.1, 0) +( 35, 21) -> (5.1, 0) +( 36, 23) -> (5.1, 0) +( 37, 39) -> (5.1, 0) +( 38, 44) -> (5.1, 0) +( 39, 46) -> (5.1, 0) +( 40, 98) -> (5.1, 1) +( 41, 27) -> (5.2, 0) +( 42, 28) -> (5.2, 0) +( 43, 32) -> (5.2, 0) +( 44, 59) -> (5.2, 1) +( 45, 48) -> (5.3, 0) +( 46, 5) -> (5.4, 0) +( 47, 10) -> (5.4, 0) +( 48, 16) -> (5.4, 0) +( 49, 20) -> (5.4, 0) +( 50, 31) -> (5.4, 0) +( 51, 84) -> (5.4, 1) +( 52, 33) -> (5.5, 0) +( 53, 36) -> (5.5, 0) +( 54, 53) -> (5.5, 1) 1st cut Total Entropy: 1.585 E. left: 0.633 E. right: 1.167 = 0.975 (0, 150) Sí => 5.450 +( 55, 80) -> (5.5, 1) +( 56, 81) -> (5.5, 1) +( 57, 89) -> (5.5, 1) +( 58, 90) -> (5.5, 1) +( 59, 64) -> (5.6, 1) +( 60, 66) -> (5.6, 1) +( 61, 69) -> (5.6, 1) +( 62, 88) -> (5.6, 1) +( 63, 94) -> (5.6, 1) +( 64, 121) -> (5.6, 2) Candidate Total Entropy: 1.167 E. left: 0.966 E. right: 0.939 = 0.946 (54, 77) No +( 65, 15) -> (5.7, 0) +( 66, 18) -> (5.7, 0) +( 67, 55) -> (5.7, 1) +( 68, 79) -> (5.7, 1) +( 69, 95) -> (5.7, 1) +( 70, 96) -> (5.7, 1) +( 71, 99) -> (5.7, 1) +( 72, 113) -> (5.7, 2) +( 73, 14) -> (5.8, 0) +( 74, 67) -> (5.8, 1) +( 75, 82) -> (5.8, 1) +( 76, 92) -> (5.8, 1) +( 77, 101) -> (5.8, 2) 2nd cut Total Entropy: 1.167 E. left: 0.966 E. right: 0.939 = 0.946 (54, 150) Sí => 5.750 +( 78, 114) -> (5.8, 2) +( 79, 142) -> (5.8, 2) +( 80, 61) -> (5.9, 1) +( 81, 70) -> (5.9, 1) +( 82, 149) -> (5.9, 2) +( 83, 62) -> (6.0, 1) +( 84, 78) -> (6.0, 1) +( 85, 83) -> (6.0, 1) +( 86, 85) -> (6.0, 1) +( 87, 119) -> (6.0, 2) +( 88, 138) -> (6.0, 2) +( 89, 63) -> (6.1, 1) +( 90, 71) -> (6.1, 1) +( 91, 73) -> (6.1, 1) +( 92, 91) -> (6.1, 1) +( 93, 127) -> (6.1, 2) +( 94, 134) -> (6.1, 2) +( 95, 68) -> (6.2, 1) +( 96, 97) -> (6.2, 1) +( 97, 126) -> (6.2, 2) +( 98, 148) -> (6.2, 2) +( 99, 56) -> (6.3, 1) +(100, 72) -> (6.3, 1) +(101, 87) -> (6.3, 1) +(102, 100) -> (6.3, 2) +(103, 103) -> (6.3, 2) +(104, 123) -> (6.3, 2) +(105, 133) -> (6.3, 2) +(106, 136) -> (6.3, 2) +(107, 146) -> (6.3, 2) +(108, 51) -> (6.4, 1) +(109, 74) -> (6.4, 1) +(110, 111) -> (6.4, 2) +(111, 115) -> (6.4, 2) +(112, 128) -> (6.4, 2) +(113, 132) -> (6.4, 2) +(114, 137) -> (6.4, 2) +(115, 54) -> (6.5, 1) +(116, 104) -> (6.5, 2) +(117, 110) -> (6.5, 2) +(118, 116) -> (6.5, 2) +(119, 147) -> (6.5, 2) +(120, 58) -> (6.6, 1) +(121, 75) -> (6.6, 1) +(122, 65) -> (6.7, 1) +(123, 77) -> (6.7, 1) +(124, 86) -> (6.7, 1) +(125, 108) -> (6.7, 2) +(126, 124) -> (6.7, 2) +(127, 140) -> (6.7, 2) +(128, 144) -> (6.7, 2) +(129, 145) -> (6.7, 2) +(130, 76) -> (6.8, 1) +(131, 112) -> (6.8, 2) +(132, 143) -> (6.8, 2) +(133, 52) -> (6.9, 1) +(134, 120) -> (6.9, 2) +(135, 139) -> (6.9, 2) +(136, 141) -> (6.9, 2) +(137, 50) -> (7.0, 1) +(138, 102) -> (7.1, 2) candidate Total Entropy: 0.939 E. left: 0.984 E. right: 0.000 = 0.822 (77, 150) No +(139, 109) -> (7.2, 2) +(140, 125) -> (7.2, 2) +(141, 129) -> (7.2, 2) +(142, 107) -> (7.3, 2) +(143, 130) -> (7.4, 2) +(144, 105) -> (7.6, 2) +(145, 117) -> (7.7, 2) +(146, 118) -> (7.7, 2) +(147, 122) -> (7.7, 2) +(148, 135) -> (7.7, 2) +(149, 131) -> (7.9, 2) ++++++++++++++++++++++++ \ No newline at end of file diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index d4d1f55..92d1a48 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -4,3 +4,4 @@ project(main) set(CMAKE_CXX_STANDARD 14) add_executable(sample sample.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp) +add_executable(test test.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp) diff --git a/sample/sample.cpp b/sample/sample.cpp index 772e150..6c65255 100644 --- a/sample/sample.cpp +++ b/sample/sample.cpp @@ -5,6 +5,7 @@ #include "../CPPFImdlp.h" using namespace std; +using namespace mdlp; int main(int argc, char** argv) { @@ -33,8 +34,8 @@ int main(int argc, char** argv) cout << "Class name: " << file.getClassName() << endl; cout << "Class type: " << file.getClassType() << endl; cout << "Data: " << endl; - vector>& X = file.getX(); - vector& y = file.getY(); + vector& X = file.getX(); + labels_t& y = file.getY(); for (int i = 0; i < 50; i++) { for (auto feature : X) { cout << fixed << setprecision(1) << feature[i] << " "; diff --git a/sample/test.cpp b/sample/test.cpp new file mode 100644 index 0000000..44bdfa9 --- /dev/null +++ b/sample/test.cpp @@ -0,0 +1,95 @@ +#include "ArffFiles.h" +#include +#include +#include +#include "../CPPFImdlp.h" + +using namespace std; +using namespace mdlp; + +tuple getCutPoint(samples_t& X, labels_t& y, size_t start, size_t cut, size_t end) +{ + size_t idxPrev = cut - 1; + precision_t previous, next, actual; + previous = X[idxPrev]; + next = actual = X[cut]; + // definition 2 of the paper => X[t-1] < X[t] + while (idxPrev-- > start && actual == previous) { + previous = X[idxPrev]; + } + // get the last equal value of X in the interval + while (actual == X[cut++] && cut < end); + if (previous == actual && cut < end) + actual = X[cut]; + cut--; + return make_tuple((previous + actual) / 2, cut); +} + +void show_points(samples_t& X, labels_t& y, size_t start, size_t end) +{ + cout << "Interval: " << start << " - " << end << endl; + tuple cutPoint; + size_t cut = start + 1; + if (start >= end) { + return; + } + while (y[cut - 1] == y[cut] && cut < end) + cut++; + if (cut != end) { + cutPoint = getCutPoint(X, y, start, cut, end); + cout << cut << ": " << fixed << setprecision(1) << X[cut] << " " << y[cut] << endl; + cout << "Cut point: " << get<0>(cutPoint) << " at " << get<1>(cutPoint) << endl; + show_points(X, y, start, get<1>(cutPoint)); + show_points(X, y, get<1>(cutPoint), end); + } + +} + +int main(int argc, char** argv) +{ + ArffFiles file; + vector lines; + string path = "../tests/"; + map datasets = { + {"01", true}, + {"02", true}, + {"03", true}, + {"04", true} + }; + if (argc != 2 || datasets.find(argv[1]) == datasets.end()) { + cout << "Usage: " << argv[0] << " {01, 02, 03, 04}" << endl; + return 1; + } + + file.load(path + argv[1] + ".arff", datasets[argv[1]]); + auto attributes = file.getAttributes(); + int items = file.getSize(); + cout << "Number of lines: " << items << endl; + cout << "Attributes: " << endl; + for (auto attribute : attributes) { + cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl; + } + cout << "Class name: " << file.getClassName() << endl; + cout << "Class type: " << file.getClassType() << endl; + cout << "Data: " << endl; + vector& X = file.getX(); + labels_t& y = file.getY(); + for (int i = 0; i < y.size(); i++) { + for (auto feature : X) { + cout << i << ": " << fixed << setprecision(1) << feature[i] << " "; + } + cout << y[i] << endl; + } + mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0); + for (auto i = 0; i < attributes.size(); i++) { + cout << "Cut points for " << get<0>(attributes[i]) << endl; + cout << "--------------------------" << setprecision(3) << endl; + test.fit(X[i], y); + for (auto item : test.getCutPoints()) { + cout << item << endl; + } + } + cout << "Function test" << endl; + show_points(X[0], y, 0, items); + return 0; +} diff --git a/sample/tests/01.arff b/sample/tests/01.arff new file mode 100755 index 0000000..aaeacb6 --- /dev/null +++ b/sample/tests/01.arff @@ -0,0 +1,35 @@ +% . + +@RELATION 01 + +@ATTRIBUTE X REAL +@ATTRIBUTE class {0,1,2} + +@DATA +1, 0 +1, 0 +1, 0 +1, 0 +1, 0 +1, 0 +1, 0 +2, 0 +2, 0 +2, 0 +2, 1 +2, 2 +2, 2 +2, 2 +2, 2 +3, 0 +3, 0 +3, 0 +3, 0 +3, 0 +3, 1 +3, 1 +3, 1 +3, 2 +3, 2 +4, 0 +4, 1 \ No newline at end of file diff --git a/sample/tests/02.arff b/sample/tests/02.arff new file mode 100755 index 0000000..71df45b --- /dev/null +++ b/sample/tests/02.arff @@ -0,0 +1,25 @@ +% . + +@RELATION 01 + +@ATTRIBUTE X REAL +@ATTRIBUTE class {0,1,2} + +@DATA +2, 0 +3, 0 +3, 0 +3, 0 +3, 0 +3, 0 +3, 1 +3, 1 +3, 1 +3, 2 +3, 2 +4, 0 +4, 1 +4, 1 +4, 1 +4, 1 +4, 1 \ No newline at end of file diff --git a/sample/tests/03.arff b/sample/tests/03.arff new file mode 100755 index 0000000..601043d --- /dev/null +++ b/sample/tests/03.arff @@ -0,0 +1,24 @@ +% . + +@RELATION 01 + +@ATTRIBUTE X REAL +@ATTRIBUTE class {0,1,2} + +@DATA +3, 0 +3, 0 +3, 0 +3, 0 +3, 0 +3, 1 +3, 1 +3, 1 +3, 2 +3, 2 +4, 0 +4, 1 +4, 1 +4, 1 +4, 1 +4, 1 \ No newline at end of file diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp index 0fc0ae9..2958bee 100644 --- a/tests/FImdlp_unittest.cpp +++ b/tests/FImdlp_unittest.cpp @@ -15,12 +15,12 @@ namespace mdlp { //(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2) X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9}; y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; - proposal = false; + algorithm = false; fit(X, y); } - void setProposal(bool value) { - proposal = value; + void setalgorithm(bool value) { + algorithm = value; } // void initIndices() @@ -80,7 +80,7 @@ namespace mdlp { } TEST_F(TestFImdlp, TestDataset) { - proposal = false; + algorithm = false; fit(X, y); computeCutPointsOriginal(0, 10); cutPoints_t expected = {5.6499996185302734}; @@ -95,14 +95,14 @@ namespace mdlp { TEST_F(TestFImdlp, ComputeCutPointsOriginal) { cutPoints_t expected = {5.65}; - proposal = false; + algorithm = false; computeCutPointsOriginal(0, 10); checkCutPoints(expected); } TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase) { cutPoints_t expected; - proposal = false; + algorithm = false; expected = {2}; samples_t X_ = {0, 1, 2, 2}; labels_t y_ = {1, 1, 1, 2}; @@ -110,19 +110,19 @@ namespace mdlp { checkCutPoints(expected); } - TEST_F(TestFImdlp, ComputeCutPointsProposal) { - proposal = true; + TEST_F(TestFImdlp, ComputeCutPointsalgorithm) { + algorithm = true; cutPoints_t expected; expected = {}; fit(X, y); - computeCutPointsProposal(); + computeCutPointsalgorithm(); checkCutPoints(expected); } - TEST_F(TestFImdlp, ComputeCutPointsProposalGCase) { + TEST_F(TestFImdlp, ComputeCutPointsalgorithmGCase) { cutPoints_t expected; expected = {1.5}; - proposal = true; + algorithm = true; samples_t X_ = {0, 1, 2, 2}; labels_t y_ = {1, 1, 1, 2}; fit(X_, y_); @@ -131,7 +131,7 @@ namespace mdlp { TEST_F(TestFImdlp, GetCutPoints) { samples_t computed, expected = {5.65}; - proposal = false; + algorithm = false; computeCutPointsOriginal(0, 10); computed = getCutPoints(); for (auto item: cutPoints)