Refactor testing

2025-08-15 23:45:57 +00:00 · 2022-12-20 01:24:49 +01:00
parent 50543e4921
commit dd1e67ec78
12 changed files with 516 additions and 70 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -34,3 +34,4 @@
 **/lcoverage
 .idea
 cmake-*
+**/CMakeFiles
--- a/CPPFImdlp.cpp
+++ b/CPPFImdlp.cpp
@@ -4,9 +4,10 @@
 #include <cmath>
 #include "CPPFImdlp.h"
 #include "Metrics.h"
-
+// OJO QUITAR ESTO
+#include <iostream>
 namespace mdlp {
-    CPPFImdlp::CPPFImdlp(int proposal):proposal(proposal), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
+    CPPFImdlp::CPPFImdlp(int algorithm):algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
    {
    }
    CPPFImdlp::~CPPFImdlp()
@@ -23,14 +24,14 @@ namespace mdlp {
        if (X.size() == 0 || y.size() == 0) {
            throw invalid_argument("X and y must have at least one element");
        }
-        indices = sortIndices(X_);
+        indices = sortIndices2(X_, y_);
        metrics.setData(y, indices);
-        switch (proposal) {
+        switch (algorithm) {
            case 0:
                computeCutPoints(0, X.size());
                break;
            case 1:
-                computeCutPointsProposal();
+                computeCutPointsProposal(0, X.size());
                break;
            case 2:
                computeCutPointsAlternative(0, X.size());
@@ -38,78 +39,169 @@ namespace mdlp {
        }
        return *this;
    }
+    precision_t CPPFImdlp::value_cut_point(size_t start, size_t idx)
+    {
+        size_t idxPrev = idx - 1;
+        precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]];
+        // definition 2 of the paper => X[t-1] < X[t]
+        while (idxPrev-- > start && actual == previous) {
+            previous = X[indices[idxPrev]];
+        }
+        return (previous + actual) / 2;
+    }
+    tuple<precision_t, size_t> CPPFImdlp::value_proposal_cut_point(size_t start, size_t cut, size_t end)
+    {
+        size_t idxPrev = cut - 1;
+        precision_t previous, next, actual;
+        previous = X[indices[idxPrev]];
+        next = actual = X[indices[cut]];
+        // definition 2 of the paper => X[t-1] < X[t]
+        while (idxPrev-- > start && actual == previous) {
+            previous = X[indices[idxPrev]];
+        }
+        // get the last equal value of X in the interval
+        while (actual == X[indices[cut++]] && cut < end);
+        if (previous == actual && cut < end)
+            actual = X[indices[cut]];
+        cut--;
+        return make_tuple((previous + actual) / 2, cut);
+    }
+    // void CPPFImdlp::computeCutPoints(size_t start, size_t end)
+    // {
+    //     size_t cut;
+    //     if (end - start < 2)
+    //         return;
+    //     cut = getCandidate(start, end);
+    //     if (cut == numeric_limits<size_t>::max() || !mdlp(start, cut, end)) {
+    //         // cut == max means that there is no candidate in the interval
+    //         // No boundary found, so we add both ends of the interval as cutpoints
+    //         // because they were selected by the algorithm before
+    //         if (start != 0)
+    //             cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
+    //         if (end != X.size())
+    //             cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
+    //         //cout << "!!!Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
+    //         return;
+    //     }
+    //     // cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
+    //     computeCutPoints(start, cut);
+    //     computeCutPoints(cut, end);
+    // }
+    // void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
+    // {
+    //     size_t cut;
+    //     if (end - start < 2)
+    //         return;
+    //     cut = getCandidate(start, end);
+    //     if (cut == numeric_limits<size_t>::max() || !mdlp(start, cut, end)) {
+    //         // cut == max means that there is no candidate in the interval
+    //         // No boundary found, so we add both ends of the interval as cutpoints
+    //         // because they were selected by the algorithm before
+    //         if (start != 0)
+    //             cutPoints.push_back(value_cut_point(0, start));
+    //         if (end != X.size())
+    //             cutPoints.push_back(value_cut_point(start, end));
+    //         //cout << "!!!Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
+    //         return;
+    //     }
+    //     // cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
+    //     computeCutPointsAlternative(start, cut);
+    //     computeCutPointsAlternative(cut, end);
+    // }
    void CPPFImdlp::computeCutPoints(size_t start, size_t end)
    {
-        int cut;
+        size_t cut;
        if (end - start < 2)
            return;
        cut = getCandidate(start, end);
-        if (cut == -1 || !mdlp(start, cut, end)) {
-            // cut.value == -1 means that there is no candidate in the interval
-            // No boundary found, so we add both ends of the interval as cutpoints
-            // because they were selected by the algorithm before
-            if (start != 0)
-                cutPoints.push_back((X[indices[start]] + X[indices[start - 1]]) / 2);
-            if (end != X.size())
-                cutPoints.push_back((X[indices[end]] + X[indices[end - 1]]) / 2);
+        if (cut == numeric_limits<size_t>::max())
            return;
+        if (mdlp(start, cut, end)) {
+            cutPoints.push_back(value_cut_point(start, cut));
+            //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
        }
        computeCutPoints(start, cut);
        computeCutPoints(cut, end);
    }
    void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
    {
-        precision_t cut;
+        size_t cut;
        if (end - start < 2)
            return;
        cut = getCandidate(start, end);
-        if (cut == -1)
+        if (cut == numeric_limits<size_t>::max())
            return;
        if (mdlp(start, cut, end)) {
-            cutPoints.push_back((X[indices[cut]] + X[indices[cut - 1]]) / 2);
+            cutPoints.push_back(value_cut_point(start, cut));
+            //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
+            computeCutPointsAlternative(start, cut);
+            computeCutPointsAlternative(cut, end);
        }
-        computeCutPointsAlternative(start, cut);
-        computeCutPointsAlternative(cut, end);
    }
-    void CPPFImdlp::computeCutPointsProposal()
+    // void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
+    // {
+    //     size_t cut;
+    //     if (end - start < 2)
+    //         return;
+    //     cut = getCandidateWeka(start, end);
+    //     if (cut == numeric_limits<size_t>::max())
+    //         return;
+    //     if (mdlp(start, cut, end)) {
+    //         cutPoints.push_back(value_cut_point(start, cut));
+    //         //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
+    //     }
+    //     computeCutPointsAlternative(start, cut);
+    //     computeCutPointsAlternative(cut, end);
+    // }
+    void CPPFImdlp::computeCutPointsProposal(size_t start, size_t end)
    {
-        precision_t xPrev, xCur, xPivot, cutPoint;
-        int yPrev, yCur, yPivot;
-        size_t idx, numElements, start;
+        size_t cut;
+        tuple<precision_t, size_t> result;
+        if (end - start < 2)
+            return;
+        cut = getCandidate(start, end);
+        if (cut == numeric_limits<size_t>::max())
+            return;
+        if (mdlp(start, cut, end)) {
+            //cout << "+Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
+            result = value_proposal_cut_point(start, cut, end);
+            cut = get<1>(result);
+            cutPoints.push_back(get<0>(result));
+            //cout << "*Alg: " << algorithm << " Cut: " << cut << " Start: " << start << " End: " << end << endl;
+            computeCutPointsProposal(start, cut);
+            computeCutPointsProposal(cut, end);
+        }

-        xCur = xPrev = X[indices[0]];
-        yCur = yPrev = y[indices[0]];
-        numElements = indices.size() - 1;
-        idx = start = 0;
-        while (idx < numElements) {
-            xPivot = xCur;
-            yPivot = yCur;
-            // Read the same values and check class changes
-            do {
-                idx++;
-                xCur = X[indices[idx]];
-                yCur = y[indices[idx]];
-                if (yCur != yPivot && xCur == xPivot) {
-                    yPivot = -1;
+    }
+    size_t CPPFImdlp::getCandidateWeka(size_t start, size_t end)
+    {
+        /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
+        E(A, TA; S) is minimal amogst all the candidate cut points. */
+        size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
+        precision_t entropy_left, entropy_right, minEntropy;
+        minEntropy = metrics.entropy(start, end);
+        for (auto idx = start + 1; idx < end; idx++) {
+            // Cutpoints are always on boundaries (definition 2)
+            if (X[indices[idx - 1]] < X[indices[idx]]) {
+                entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
+                entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end);
+                if (entropy_left + entropy_right < minEntropy) {
+                    minEntropy = entropy_left + entropy_right;
+                    candidate = idx;
                }
            }
-            while (idx < numElements && xCur == xPivot);
-            // Check if the class changed and there are more than 1 element
-            if ((idx - start > 1) && (yPivot == -1 || yPrev != yCur) && mdlp(start, idx, indices.size())) {
-                start = idx;
-                cutPoint = (xPrev + xCur) / 2;
-                cutPoints.push_back(cutPoint);
-            }
-            yPrev = yPivot;
-            xPrev = xPivot;
        }
+        return candidate;
    }
-    long int CPPFImdlp::getCandidate(size_t start, size_t end)
+    size_t CPPFImdlp::getCandidate(size_t start, size_t end)
    {
-        long int candidate = -1, elements = end - start;
-        precision_t entropy_left, entropy_right, minEntropy = numeric_limits<precision_t>::max();
+        /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
+        E(A, TA; S) is minimal amogst all the candidate cut points. */
+        size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
+        precision_t entropy_left, entropy_right, minEntropy;
+        minEntropy = metrics.entropy(start, end);
        for (auto idx = start + 1; idx < end; idx++) {
-            // Cutpoints are always on boundaries
+            // Cutpoints are always on boundaries (definition 2)
            if (y[indices[idx]] == y[indices[idx - 1]])
                continue;
            entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
@@ -137,9 +229,9 @@ namespace mdlp {
        ent1 = metrics.entropy(start, cut);
        ent2 = metrics.entropy(cut, end);
        ig = metrics.informationGain(start, cut, end);
-        delta = log2(pow(3, precision_t(k)) - 2) -
+        delta = log(pow(3, precision_t(k)) - 2) -
            (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
-        precision_t term = 1 / N * (log2(N - 1) + delta);
+        precision_t term = 1 / N * (log(N - 1) + delta);
        return ig > term;
    }
    cutPoints_t CPPFImdlp::getCutPoints()
@@ -164,4 +256,17 @@ namespace mdlp {
                { return X_[i1] < X_[i2]; });
        return idx;
    }
+    indices_t CPPFImdlp::sortIndices2(samples_t& X_, labels_t& y_)
+    {
+        indices_t idx(X_.size());
+        iota(idx.begin(), idx.end(), 0);
+        for (size_t i = 0; i < X_.size(); i++)
+            stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2)
+                {
+                    if (X_[i1] == X_[i2]) return y_[i1] < y_[i2];
+                    else
+                        return X_[i1] < X_[i2];
+                });
+        return idx;
+    }
 }
--- a/CPPFImdlp.h
+++ b/CPPFImdlp.h
@@ -3,10 +3,12 @@
 #include "typesFImdlp.h"
 #include "Metrics.h"
 #include <utility>
+#include <tuple>
+#include <string>
 namespace mdlp {
    class CPPFImdlp {
    protected:
-        int proposal;
+        int algorithm;
        indices_t indices; // sorted indices to use with X and y
        samples_t X;
        labels_t y;
@@ -14,17 +16,22 @@ namespace mdlp {
        cutPoints_t cutPoints;

        static indices_t sortIndices(samples_t&);
+        static indices_t sortIndices2(samples_t&, labels_t&);
        void computeCutPoints(size_t, size_t);
        bool mdlp(size_t, size_t, size_t);
-        long int getCandidate(size_t, size_t);
+        size_t getCandidate(size_t, size_t);
+        size_t getCandidateWeka(size_t, size_t);
        void computeCutPointsAlternative(size_t, size_t);
-        void computeCutPointsProposal();
+        void computeCutPointsProposal(size_t, size_t);
+        precision_t value_cut_point(size_t, size_t);
+        tuple<precision_t, size_t> value_proposal_cut_point(size_t, size_t, size_t);

    public:
        CPPFImdlp(int);
        ~CPPFImdlp();
        CPPFImdlp& fit(samples_t&, labels_t&);
        samples_t getCutPoints();
+        inline string version() { return "0.8.1"; };
    };
 }
 #endif
--- a/Metrics.cpp
+++ b/Metrics.cpp
@@ -39,7 +39,7 @@ namespace mdlp {
        for (auto count : counts) {
            if (count > 0) {
                p = (precision_t)count / nElements;
-                ventropy -= p * log2(p);
+                ventropy -= p * log(p);
            }
        }
        entropyCache[make_tuple(start, end)] = ventropy;
--- a/152
+++ b/152
@@ -0,0 +1,152 @@
+++++++++++++++++++++++
+(  0,  13) -> (4.3, 0)
+(  1,   8) -> (4.4, 0)
+(  2,  38) -> (4.4, 0)
+(  3,  42) -> (4.4, 0)
+(  4,  41) -> (4.5, 0)
+(  5,   3) -> (4.6, 0)
+(  6,   6) -> (4.6, 0)
+(  7,  22) -> (4.6, 0)
+(  8,  47) -> (4.6, 0)
+(  9,   2) -> (4.7, 0)
+( 10,  29) -> (4.7, 0)
+( 11,  11) -> (4.8, 0)
+( 12,  12) -> (4.8, 0)
+( 13,  24) -> (4.8, 0)
+( 14,  30) -> (4.8, 0)
+( 15,  45) -> (4.8, 0)
+( 16,   1) -> (4.9, 0)
+( 17,   9) -> (4.9, 0)
+( 18,  34) -> (4.9, 0)
+( 19,  37) -> (4.9, 0)
+( 20,  57) -> (4.9, 1) candidate Total Entropy: 0.633 E. left: 0.000 E. right: 0.855 = 0.539 (0, 54) No
+( 21, 106) -> (4.9, 2)
+( 22,   4) -> (5.0, 0)
+( 23,   7) -> (5.0, 0)
+( 24,  25) -> (5.0, 0)
+( 25,  26) -> (5.0, 0)
+( 26,  35) -> (5.0, 0)
+( 27,  40) -> (5.0, 0)
+( 28,  43) -> (5.0, 0)
+( 29,  49) -> (5.0, 0)
+( 30,  60) -> (5.0, 1)
+( 31,  93) -> (5.0, 1)
+( 32,   0) -> (5.1, 0)
+( 33,  17) -> (5.1, 0)
+( 34,  19) -> (5.1, 0)
+( 35,  21) -> (5.1, 0)
+( 36,  23) -> (5.1, 0)
+( 37,  39) -> (5.1, 0)
+( 38,  44) -> (5.1, 0)
+( 39,  46) -> (5.1, 0)
+( 40,  98) -> (5.1, 1)
+( 41,  27) -> (5.2, 0)
+( 42,  28) -> (5.2, 0)
+( 43,  32) -> (5.2, 0)
+( 44,  59) -> (5.2, 1)
+( 45,  48) -> (5.3, 0)
+( 46,   5) -> (5.4, 0)
+( 47,  10) -> (5.4, 0)
+( 48,  16) -> (5.4, 0)
+( 49,  20) -> (5.4, 0)
+( 50,  31) -> (5.4, 0)
+( 51,  84) -> (5.4, 1)
+( 52,  33) -> (5.5, 0)
+( 53,  36) -> (5.5, 0)
+( 54,  53) -> (5.5, 1) 1st cut Total Entropy: 1.585 E. left: 0.633 E. right: 1.167 = 0.975 (0, 150) Sí => 5.450
+( 55,  80) -> (5.5, 1)
+( 56,  81) -> (5.5, 1)
+( 57,  89) -> (5.5, 1)
+( 58,  90) -> (5.5, 1)
+( 59,  64) -> (5.6, 1)
+( 60,  66) -> (5.6, 1)
+( 61,  69) -> (5.6, 1)
+( 62,  88) -> (5.6, 1)
+( 63,  94) -> (5.6, 1)
+( 64, 121) -> (5.6, 2) Candidate Total Entropy: 1.167 E. left: 0.966 E. right: 0.939 = 0.946 (54, 77) No
+( 65,  15) -> (5.7, 0)
+( 66,  18) -> (5.7, 0)
+( 67,  55) -> (5.7, 1)
+( 68,  79) -> (5.7, 1)
+( 69,  95) -> (5.7, 1)
+( 70,  96) -> (5.7, 1)
+( 71,  99) -> (5.7, 1)
+( 72, 113) -> (5.7, 2)
+( 73,  14) -> (5.8, 0)
+( 74,  67) -> (5.8, 1)
+( 75,  82) -> (5.8, 1)
+( 76,  92) -> (5.8, 1)
+( 77, 101) -> (5.8, 2) 2nd cut Total Entropy: 1.167 E. left: 0.966 E. right: 0.939 = 0.946 (54, 150) Sí => 5.750
+( 78, 114) -> (5.8, 2)
+( 79, 142) -> (5.8, 2)
+( 80,  61) -> (5.9, 1)
+( 81,  70) -> (5.9, 1)
+( 82, 149) -> (5.9, 2)
+( 83,  62) -> (6.0, 1)
+( 84,  78) -> (6.0, 1)
+( 85,  83) -> (6.0, 1)
+( 86,  85) -> (6.0, 1)
+( 87, 119) -> (6.0, 2)
+( 88, 138) -> (6.0, 2)
+( 89,  63) -> (6.1, 1)
+( 90,  71) -> (6.1, 1)
+( 91,  73) -> (6.1, 1)
+( 92,  91) -> (6.1, 1)
+( 93, 127) -> (6.1, 2)
+( 94, 134) -> (6.1, 2)
+( 95,  68) -> (6.2, 1)
+( 96,  97) -> (6.2, 1)
+( 97, 126) -> (6.2, 2)
+( 98, 148) -> (6.2, 2)
+( 99,  56) -> (6.3, 1)
+(100,  72) -> (6.3, 1)
+(101,  87) -> (6.3, 1)
+(102, 100) -> (6.3, 2)
+(103, 103) -> (6.3, 2)
+(104, 123) -> (6.3, 2)
+(105, 133) -> (6.3, 2)
+(106, 136) -> (6.3, 2)
+(107, 146) -> (6.3, 2)
+(108,  51) -> (6.4, 1)
+(109,  74) -> (6.4, 1)
+(110, 111) -> (6.4, 2)
+(111, 115) -> (6.4, 2)
+(112, 128) -> (6.4, 2)
+(113, 132) -> (6.4, 2)
+(114, 137) -> (6.4, 2)
+(115,  54) -> (6.5, 1)
+(116, 104) -> (6.5, 2)
+(117, 110) -> (6.5, 2)
+(118, 116) -> (6.5, 2)
+(119, 147) -> (6.5, 2)
+(120,  58) -> (6.6, 1)
+(121,  75) -> (6.6, 1)
+(122,  65) -> (6.7, 1)
+(123,  77) -> (6.7, 1)
+(124,  86) -> (6.7, 1)
+(125, 108) -> (6.7, 2)
+(126, 124) -> (6.7, 2)
+(127, 140) -> (6.7, 2)
+(128, 144) -> (6.7, 2)
+(129, 145) -> (6.7, 2)
+(130,  76) -> (6.8, 1)
+(131, 112) -> (6.8, 2)
+(132, 143) -> (6.8, 2)
+(133,  52) -> (6.9, 1)
+(134, 120) -> (6.9, 2)
+(135, 139) -> (6.9, 2)
+(136, 141) -> (6.9, 2)
+(137,  50) -> (7.0, 1)
+(138, 102) -> (7.1, 2) candidate Total Entropy: 0.939 E. left: 0.984 E. right: 0.000 = 0.822 (77, 150) No
+(139, 109) -> (7.2, 2)
+(140, 125) -> (7.2, 2)
+(141, 129) -> (7.2, 2)
+(142, 107) -> (7.3, 2)
+(143, 130) -> (7.4, 2)
+(144, 105) -> (7.6, 2)
+(145, 117) -> (7.7, 2)
+(146, 118) -> (7.7, 2)
+(147, 122) -> (7.7, 2)
+(148, 135) -> (7.7, 2)
+(149, 131) -> (7.9, 2)
+++++++++++++++++++++++
--- a/sample/CMakeLists.txt
+++ b/sample/CMakeLists.txt
@@ -4,3 +4,4 @@ project(main)
 set(CMAKE_CXX_STANDARD 14)

 add_executable(sample sample.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
+add_executable(test test.cpp ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
--- a/sample/sample.cpp
+++ b/sample/sample.cpp
@@ -5,6 +5,7 @@
 #include "../CPPFImdlp.h"

 using namespace std;
+using namespace mdlp;

 int main(int argc, char** argv)
 {
@@ -33,8 +34,8 @@ int main(int argc, char** argv)
    cout << "Class name: " << file.getClassName() << endl;
    cout << "Class type: " << file.getClassType() << endl;
    cout << "Data: " << endl;
-    vector<vector<float>>& X = file.getX();
-    vector<int>& y = file.getY();
+    vector<samples_t>& X = file.getX();
+    labels_t& y = file.getY();
    for (int i = 0; i < 50; i++) {
        for (auto feature : X) {
            cout << fixed << setprecision(1) << feature[i] << " ";
--- a/sample/test.cpp
+++ b/sample/test.cpp
@@ -0,0 +1,95 @@
+#include "ArffFiles.h"
+#include <iostream>
+#include <vector>
+#include <iomanip>
+#include "../CPPFImdlp.h"
+
+using namespace std;
+using namespace mdlp;
+
+tuple<precision_t, size_t> getCutPoint(samples_t& X, labels_t& y, size_t start, size_t cut, size_t end)
+{
+    size_t idxPrev = cut - 1;
+    precision_t previous, next, actual;
+    previous = X[idxPrev];
+    next = actual = X[cut];
+    // definition 2 of the paper => X[t-1] < X[t]
+    while (idxPrev-- > start && actual == previous) {
+        previous = X[idxPrev];
+    }
+    // get the last equal value of X in the interval
+    while (actual == X[cut++] && cut < end);
+    if (previous == actual && cut < end)
+        actual = X[cut];
+    cut--;
+    return make_tuple((previous + actual) / 2, cut);
+}
+
+void show_points(samples_t& X, labels_t& y, size_t start, size_t end)
+{
+    cout << "Interval: " << start << " - " << end << endl;
+    tuple<precision_t, size_t> cutPoint;
+    size_t cut = start + 1;
+    if (start >= end) {
+        return;
+    }
+    while (y[cut - 1] == y[cut] && cut < end)
+        cut++;
+    if (cut != end) {
+        cutPoint = getCutPoint(X, y, start, cut, end);
+        cout << cut << ": " << fixed << setprecision(1) << X[cut] << " " << y[cut] << endl;
+        cout << "Cut point: " << get<0>(cutPoint) << " at " << get<1>(cutPoint) << endl;
+        show_points(X, y, start, get<1>(cutPoint));
+        show_points(X, y, get<1>(cutPoint), end);
+    }
+
+}
+
+int main(int argc, char** argv)
+{
+    ArffFiles file;
+    vector<string> lines;
+    string path = "../tests/";
+    map<string, bool > datasets = {
+        {"01", true},
+        {"02", true},
+        {"03", true},
+        {"04", true}
+    };
+    if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
+        cout << "Usage: " << argv[0] << " {01, 02, 03, 04}" << endl;
+        return 1;
+    }
+
+    file.load(path + argv[1] + ".arff", datasets[argv[1]]);
+    auto attributes = file.getAttributes();
+    int items = file.getSize();
+    cout << "Number of lines: " << items << endl;
+    cout << "Attributes: " << endl;
+    for (auto attribute : attributes) {
+        cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
+    }
+    cout << "Class name: " << file.getClassName() << endl;
+    cout << "Class type: " << file.getClassType() << endl;
+    cout << "Data: " << endl;
+    vector<samples_t>& X = file.getX();
+    labels_t& y = file.getY();
+    for (int i = 0; i < y.size(); i++) {
+        for (auto feature : X) {
+            cout << i << ": " << fixed << setprecision(1) << feature[i] << " ";
+        }
+        cout << y[i] << endl;
+    }
+    mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0);
+    for (auto i = 0; i < attributes.size(); i++) {
+        cout << "Cut points for " << get<0>(attributes[i]) << endl;
+        cout << "--------------------------" << setprecision(3) << endl;
+        test.fit(X[i], y);
+        for (auto item : test.getCutPoints()) {
+            cout << item << endl;
+        }
+    }
+    cout << "Function test" << endl;
+    show_points(X[0], y, 0, items);
+    return 0;
+}
--- a/sample/tests/01.arff
+++ b/sample/tests/01.arff
@@ -0,0 +1,35 @@
+% .
+
+@RELATION 01
+
+@ATTRIBUTE X	REAL
+@ATTRIBUTE class 	{0,1,2}
+
+@DATA
+1, 0
+1, 0
+1, 0
+1, 0
+1, 0
+1, 0
+1, 0
+2, 0
+2, 0
+2, 0
+2, 1
+2, 2
+2, 2
+2, 2
+2, 2
+3, 0
+3, 0
+3, 0
+3, 0
+3, 0
+3, 1
+3, 1
+3, 1
+3, 2
+3, 2
+4, 0
+4, 1
--- a/sample/tests/02.arff
+++ b/sample/tests/02.arff
@@ -0,0 +1,25 @@
+% .
+
+@RELATION 01
+
+@ATTRIBUTE X	REAL
+@ATTRIBUTE class 	{0,1,2}
+
+@DATA
+2, 0
+3, 0
+3, 0
+3, 0
+3, 0
+3, 0
+3, 1
+3, 1
+3, 1
+3, 2
+3, 2
+4, 0
+4, 1
+4, 1
+4, 1
+4, 1
+4, 1
--- a/sample/tests/03.arff
+++ b/sample/tests/03.arff
@@ -0,0 +1,24 @@
+% .
+
+@RELATION 01
+
+@ATTRIBUTE X	REAL
+@ATTRIBUTE class 	{0,1,2}
+
+@DATA
+3, 0
+3, 0
+3, 0
+3, 0
+3, 0
+3, 1
+3, 1
+3, 1
+3, 2
+3, 2
+4, 0
+4, 1
+4, 1
+4, 1
+4, 1
+4, 1
--- a/tests/FImdlp_unittest.cpp
+++ b/tests/FImdlp_unittest.cpp
@@ -15,12 +15,12 @@ namespace mdlp {
            //(5.0, 1) (5.1, 1) (5.1, 2) (5.1, 2) (5.2, 1) (5.3, 1) (5.6, 2) (5.7, 1) (5.9, 2) (6.0, 2)
            X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9};
            y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
-            proposal = false;
+            algorithm = false;
            fit(X, y);
        }

-        void setProposal(bool value) {
-            proposal = value;
+        void setalgorithm(bool value) {
+            algorithm = value;
        }

        // void initIndices()
@@ -80,7 +80,7 @@ namespace mdlp {
    }

    TEST_F(TestFImdlp, TestDataset) {
-        proposal = false;
+        algorithm = false;
        fit(X, y);
        computeCutPointsOriginal(0, 10);
        cutPoints_t expected = {5.6499996185302734};
@@ -95,14 +95,14 @@ namespace mdlp {

    TEST_F(TestFImdlp, ComputeCutPointsOriginal) {
        cutPoints_t expected = {5.65};
-        proposal = false;
+        algorithm = false;
        computeCutPointsOriginal(0, 10);
        checkCutPoints(expected);
    }

    TEST_F(TestFImdlp, ComputeCutPointsOriginalGCase) {
        cutPoints_t expected;
-        proposal = false;
+        algorithm = false;
        expected = {2};
        samples_t X_ = {0, 1, 2, 2};
        labels_t y_ = {1, 1, 1, 2};
@@ -110,19 +110,19 @@ namespace mdlp {
        checkCutPoints(expected);
    }

-    TEST_F(TestFImdlp, ComputeCutPointsProposal) {
-        proposal = true;
+    TEST_F(TestFImdlp, ComputeCutPointsalgorithm) {
+        algorithm = true;
        cutPoints_t expected;
        expected = {};
        fit(X, y);
-        computeCutPointsProposal();
+        computeCutPointsalgorithm();
        checkCutPoints(expected);
    }

-    TEST_F(TestFImdlp, ComputeCutPointsProposalGCase) {
+    TEST_F(TestFImdlp, ComputeCutPointsalgorithmGCase) {
        cutPoints_t expected;
        expected = {1.5};
-        proposal = true;
+        algorithm = true;
        samples_t X_ = {0, 1, 2, 2};
        labels_t y_ = {1, 1, 1, 2};
        fit(X_, y_);
@@ -131,7 +131,7 @@ namespace mdlp {

    TEST_F(TestFImdlp, GetCutPoints) {
        samples_t computed, expected = {5.65};
-        proposal = false;
+        algorithm = false;
        computeCutPointsOriginal(0, 10);
        computed = getCutPoints();
        for (auto item: cutPoints)