Refactor tests

2025-08-17 16:35:52 +00:00 · 2022-12-02 12:54:09 +01:00
parent 0686195854
commit 5657c1cd9f
14 changed files with 265 additions and 174 deletions
--- a/fimdlp/CPPFImdlp.cpp
+++ b/fimdlp/CPPFImdlp.cpp
@@ -2,16 +2,9 @@
 #include <numeric>
 #include <iostream>
 #include <iomanip>
-#include <stdio.h>
 #include <algorithm>
 #include "Metrics.h"
-namespace mdlp
-{
-    std::ostream &operator<<(std::ostream &os, const CutPointBody &cut)
-    {
-        os << "(" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ",  " << cut.toValue << "]";
-        return os;
-    }
+namespace mdlp {
    CPPFImdlp::CPPFImdlp() : debug(false), precision(6)
    {
        divider = pow(10, precision);
@@ -23,70 +16,125 @@ namespace mdlp
    CPPFImdlp::~CPPFImdlp()
    {
    }
-    std::vector<CutPointBody> CPPFImdlp::getCutPoints()
+    std::vector<CutPoint_t> CPPFImdlp::getCutPoints()
    {
        return cutPoints;
    }
-    std::vector<float> CPPFImdlp::getDiscretizedValues()
+    labels CPPFImdlp::getDiscretizedValues()
    {
        return xDiscretized;
    }
-    void CPPFImdlp::fit(std::vector<float> &X, std::vector<int> &y)
+    void CPPFImdlp::fit(samples& X, labels& y)
    {
        this->X = X;
        this->y = y;
        this->indices = sortIndices(X);
+        this->xDiscretized = labels(X.size(), -1);
+        this->numClasses = Metrics::numClasses(y, indices, 0, X.size());
+
        computeCutPoints();
        filterCutPoints();
        applyCutPoints();
    }
-    std::vector<float> &CPPFImdlp::transform(std::vector<float> &X)
+    labels& CPPFImdlp::transform(samples& X)
    {
-        std::vector<size_t> indices_transform = sortIndices(X);
+        indices_t indices_transform = sortIndices(X);
        applyCutPoints();
        return xDiscretized;
    }
-    void CPPFImdlp::debugPoints(std::vector<float> &X, std::vector<int> &y)
+    void CPPFImdlp::debugPoints(samples& X, labels& y)
    {
        std::cout << "+++++++++++++++++++++++" << std::endl;
        // for (auto i : sortIndices(X))
-        std::vector<size_t> indices = sortIndices(X);
-        for (size_t i = 0; i < indices.size(); i++)
-        {
+        indices_t indices = sortIndices(X);
+        for (size_t i = 0; i < indices.size(); i++) {
            printf("(%3lu, %3lu) -> (%3.1f, %d)\n", i, indices[i], X[indices[i]], y[indices[i]]);
        }
        std::cout << "+++++++++++++++++++++++" << std::endl;
        fit(X, y);
-        for (auto item : cutPoints)
-        {
-            std::cout << item << "  X[" << item.end << "]=" << X[item.end] << std::endl;
+        for (auto item : cutPoints) {
+            std::cout << item.start << "  X[" << item.end << "]=" << X[item.end] << std::endl;
        }
    }
    void CPPFImdlp::applyCutPoints()
    {
+        for (auto cut : cutPoints) {
+            for (size_t i = cut.start; i < cut.end; i++) {
+                xDiscretized[indices[i]] = cut.classNumber;
            }
-    bool CPPFImdlp::evaluateCutPoint(CutPointBody point)
+        }
+    }
+    bool CPPFImdlp::evaluateCutPoint(CutPoint_t rest, CutPoint_t candidate)
    {
-        return true;
+        int k, k1, k2;
+        float ig, delta;
+        float ent, ent1, ent2;
+        float N = float(rest.end - rest.start);
+        if (N < 2) {
+            return false;
+        }
+
+        k = Metrics::numClasses(y, indices, rest.start, rest.end);
+        k1 = Metrics::numClasses(y, indices, rest.start, candidate.end);
+        k2 = Metrics::numClasses(y, indices, candidate.end, rest.end);
+        ent = Metrics::entropy(y, indices, rest.start, rest.end, numClasses);
+        ent1 = Metrics::entropy(y, indices, rest.start, candidate.end, numClasses);
+        ent2 = Metrics::entropy(y, indices, candidate.end, rest.end, numClasses);
+        ig = Metrics::informationGain(y, indices, rest.start, rest.end, candidate.end, numClasses);
+        delta = log2(pow(3, k) - 2) - (k * ent - k1 * ent1 - k2 * ent2);
+        float term = 1 / N * (log2(N - 1) + delta);
+        std::cout << candidate
+            std::cout << "k=" << k << " k1=" << k1 << " k2=" << k2 << " ent=" << ent << " ent1=" << ent1 << " ent2=" << ent2 << std::endl;
+        std::cout << "ig=" << ig << " delta=" << delta << " N " << N << " term " << term << std::endl;
+        return (ig > term);
    }
    void CPPFImdlp::filterCutPoints()
    {
-        std::vector<CutPointBody> filtered;
-        for (auto item : cutPoints)
-        {
-            if (evaluateCutPoint(item))
-            {
+        std::vector<CutPoint_t> filtered;
+        CutPoint_t rest;
+        int classNumber = 0;
+
+        rest.start = 0;
+        rest.end = X.size();
+        rest.fromValue = std::numeric_limits<float>::lowest();
+        rest.toValue = std::numeric_limits<float>::max();
+        rest.classNumber = classNumber;
+        bool lastReject = false, first = true;
+        for (auto item : cutPoints) {
+            if (evaluateCutPoint(rest, item)) {
+                std::cout << "Accepted" << std::endl;
+                if (lastReject) {
+                    if (first) {
+                        item.fromValue = std::numeric_limits<float>::lowest();
+                        item.start = indices[0];
+                    } else {
+                        item.fromValue = filtered.back().toValue;
+                        item.start = filtered.back().end;
+                    }
+                }
+                //Assign class number to the interval (cutpoint)
+                item.classNumber = classNumber++;
                filtered.push_back(item);
+                first = false;
+            } else {
+                std::cout << "Rejected" << std::endl;
+                lastReject = true;
            }
        }
+        if (!first)
+            filtered.back().toValue = std::numeric_limits<float>::max();
+        else {
+            filtered.push_back(rest);
+        }
+
        cutPoints = filtered;
    }
    void CPPFImdlp::computeCutPoints()
    {

-        std::vector<CutPointBody> cutPts;
-        CutPointBody cutPoint;
-        std::vector<size_t> cutIdx;
+        std::vector<CutPoint_t> cutPts;
+        CutPoint_t cutPoint;
+        indices_t cutIdx;
        float xPrev, xCur, xPivot;
        int yPrev, yCur, yPivot;
        size_t idxPrev, idxPivot, idx, numElements, start;
@@ -99,28 +147,25 @@ namespace mdlp
        bool firstCutPoint = true;
        if (debug)
            printf("*idx=%lu -> (-1, -1) Prev(%3.1f, %d) Elementos: %lu\n", idx, xCur, yCur, numElements);
-        while (idx < numElements)
-        {
+        while (idx < numElements) {
            xPivot = xCur;
            yPivot = yCur;
            idxPivot = indices[idx];
            if (debug)
                printf("<idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
            // Read the same values and check class changes
-            do
-            {
+            do {
                idx++;
                xCur = X[indices[idx]];
                yCur = y[indices[idx]];
-                if (yCur != yPivot && xCur == xPivot)
-                {
+                if (yCur != yPivot && xCur == xPivot) {
                    yPivot = -1;
                }
                if (debug)
                    printf(">idx=%lu -> Prev(%3.1f, %d) Pivot(%3.1f, %d) Cur(%3.1f, %d) \n", idx, xPrev, yPrev, xPivot, yPivot, xCur, yCur);
-            } while (idx < numElements && xCur == xPivot);
-            if (yPivot == -1 || yPrev != yCur)
-            {
+            }
+            while (idx < numElements && xCur == xPivot);
+            if (yPivot == -1 || yPrev != yCur) {
                cutPoint.start = start;
                cutPoint.end = idx - 1;
                start = idx;
@@ -128,8 +173,7 @@ namespace mdlp
                cutPoint.toValue = (xPrev + xCur) / 2;
                cutPoint.classNumber = -1;
                firstCutPoint = false;
-                if (debug)
-                {
+                if (debug) {
                    printf("Cutpoint idx=%lu Cur(%3.1f, %d) Prev(%3.1f, %d) Pivot(%3.1f, %d) = (%3.1g, %3.1g] \n", idx, xCur, yCur, xPrev, yPrev, xPivot, yPivot, cutPoint.fromValue, cutPoint.toValue);
                }
                cutPts.push_back(cutPoint);
@@ -139,8 +183,7 @@ namespace mdlp
            xPrev = xPivot;
            idxPrev = indices[idxPivot];
        }
-        if (idx == numElements)
-        {
+        if (idx == numElements) {
            cutPoint.start = start;
            cutPoint.end = numElements;
            cutPoint.fromValue = firstCutPoint ? std::numeric_limits<float>::lowest() : cutPts.back().toValue;
@@ -155,26 +198,22 @@ namespace mdlp
    }
    void CPPFImdlp::computeCutPointsAnt()
    {
-        std::vector<float> cutPts;
-        std::vector<int> cutIdx;
+        samples cutPts;
+        labels cutIdx;
        float xPrev, cutPoint;
        int yPrev;
        size_t idxPrev;
        xPrev = X.at(indices[0]);
        yPrev = y.at(indices[0]);
        idxPrev = indices[0];
-        if (debug)
-        {
+        if (debug) {
            std::cout << "Entropy: " << Metrics::entropy(y, indices, 0, y.size(), Metrics::numClasses(y, indices, 0, indices.size())) << std::endl;
        }
-        for (auto index = indices.begin(); index != indices.end(); ++index)
-        {
+        for (auto index = indices.begin(); index != indices.end(); ++index) {
            //  Definition 2 Cut points are always on boundaries
-            if (y.at(*index) != yPrev && xPrev < X.at(*index))
-            {
+            if (y.at(*index) != yPrev && xPrev < X.at(*index)) {
                cutPoint = round(divider * (X.at(*index) + xPrev) / 2) / divider;
-                if (debug)
-                {
+                if (debug) {
                    std::cout << "Cut point: " << (xPrev + X.at(*index)) / 2 << " //";
                    std::cout << X.at(*index) << " -> " << y.at(*index) << " yPrev= " << yPrev;
                    std::cout << "* (" << X.at(*index) << ", " << xPrev << ")="
@@ -191,9 +230,9 @@ namespace mdlp
        // cutPoints = cutPts;
    }
    // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
-    std::vector<size_t> CPPFImdlp::sortIndices(std::vector<float> &X)
+    indices_t CPPFImdlp::sortIndices(samples& X)
    {
-        std::vector<size_t> idx(X.size());
+        indices_t idx(X.size());
        std::iota(idx.begin(), idx.end(), 0);
        for (std::size_t i = 0; i < X.size(); i++)
            stable_sort(idx.begin(), idx.end(), [&X](size_t i1, size_t i2)
--- a/fimdlp/CPPFImdlp.h
+++ b/fimdlp/CPPFImdlp.h
@@ -1,44 +1,37 @@
 #ifndef CPPFIMDLP_H
 #define CPPFIMDLP_H
-#include <vector>
+#include "typesFImdlp.h"
 #include <utility>
-namespace mdlp
-{
-    struct CutPointBody
-    {
-        size_t start, end;        // indices of the sorted vector
-        int classNumber;          // class assigned to the cut point
-        float fromValue, toValue; // Values of the variable
-    };
-    class CPPFImdlp
-    {
+namespace mdlp {
+    class CPPFImdlp {
    private:
        bool debug;
        int precision;
        float divider;
-        std::vector<size_t> indices; // sorted indices to use with X and y
-        std::vector<float> X;
-        std::vector<int> y;
-        std::vector<float> xDiscretized;
-        std::vector<CutPointBody> cutPoints;
+        indices_t indices; // sorted indices to use with X and y
+        samples X;
+        labels y;
+        labels xDiscretized;
+        int numClasses;
+        std::vector<CutPoint_t> cutPoints;

    protected:
-        std::vector<size_t> sortIndices(std::vector<float> &);
-        bool evaluateCutPoint(CutPointBody);
-        void filterCutPoints();
-        void computeCutPoints();
-        void applyCutPoints();
+        indices_t sortIndices(samples&);
        void computeCutPointsAnt();
+        void computeCutPoints();
+        bool evaluateCutPoint(CutPoint_t, CutPoint_t);
+        void filterCutPoints();
+        void applyCutPoints();

    public:
        CPPFImdlp();
        CPPFImdlp(int, bool debug = false);
        ~CPPFImdlp();
-        std::vector<CutPointBody> getCutPoints();
-        std::vector<float> getDiscretizedValues();
-        void debugPoints(std::vector<float> &, std::vector<int> &);
-        void fit(std::vector<float> &, std::vector<int> &);
-        std::vector<float> &transform(std::vector<float> &);
+        std::vector<CutPoint_t> getCutPoints();
+        labels getDiscretizedValues();
+        void debugPoints(samples&, labels&);
+        void fit(samples&, labels&);
+        labels& transform(samples&);
    };
 }
 #endif
--- a/fimdlp/Metrics.cpp
+++ b/fimdlp/Metrics.cpp
@@ -1,40 +1,35 @@
 #include "Metrics.h"
 #include <set>
-namespace mdlp
-{
+namespace mdlp {
    Metrics::Metrics()
    {
    }
-    int Metrics::numClasses(std::vector<int> &y, std::vector<size_t> indices, size_t start, size_t end)
+    int Metrics::numClasses(labels& y, indices_t indices, size_t start, size_t end)
    {
        std::set<int> numClasses;
-        for (auto i = start; i < end; ++i)
-        {
+        for (auto i = start; i < end; ++i) {
            numClasses.insert(y[indices[i]]);
        }
        return numClasses.size();
    }
-    float Metrics::entropy(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, int nClasses)
+    float Metrics::entropy(labels& y, indices_t& indices, size_t start, size_t end, int nClasses)
    {
        float entropy = 0;
        int nElements = 0;
-        std::vector<int> counts(nClasses + 1, 0);
-        for (auto i = &indices[start]; i != &indices[end]; ++i)
-        {
+        labels counts(nClasses + 1, 0);
+        for (auto i = &indices[start]; i != &indices[end]; ++i) {
            counts[y[*i]]++;
            nElements++;
        }
-        for (auto count : counts)
-        {
-            if (count > 0)
-            {
+        for (auto count : counts) {
+            if (count > 0) {
                float p = (float)count / nElements;
                entropy -= p * log2(p);
            }
        }
        return entropy;
    }
-    float Metrics::informationGain(std::vector<int> &y, std::vector<size_t> &indices, size_t start, size_t end, size_t cutPoint, int nClasses)
+    float Metrics::informationGain(labels& y, indices_t& indices, size_t start, size_t end, size_t cutPoint, int nClasses)
    {
        float iGain = 0.0;
        float entropy, entropyLeft, entropyRight;
--- a/fimdlp/Metrics.h
+++ b/fimdlp/Metrics.h
@@ -1,16 +1,14 @@
 #ifndef METRICS_H
 #define METRICS_H
-#include <vector>
+#include "typesFImdlp.h"
 #include <cmath>
-namespace mdlp
-{
-    class Metrics
-    {
+namespace mdlp {
+    class Metrics {
    public:
        Metrics();
-        static int numClasses(std::vector<int> &, std::vector<size_t>, size_t, size_t);
-        static float entropy(std::vector<int> &, std::vector<size_t> &, size_t, size_t, int);
-        static float informationGain(std::vector<int> &, std::vector<size_t> &, size_t, size_t, size_t, int);
+        static int numClasses(labels&, indices_t, size_t, size_t);
+        static float entropy(labels&, indices_t&, size_t, size_t, int);
+        static float informationGain(labels&, indices_t&, size_t, size_t, size_t, int);
    };
 }
 #endif
--- a/fimdlp/cfimdlp.pyx
+++ b/fimdlp/cfimdlp.pyx
@@ -12,13 +12,13 @@ cdef extern from "CPPFImdlp.h" namespace "mdlp":
        CPPFImdlp() except + 
        CPPFImdlp(int, bool) except + 
        void fit(vector[float]&, vector[int]&)
-        vector[float] transform(vector[float]&)
-        vector[float] getDiscretizedValues()
+        vector[int] transform(vector[float]&)
+        vector[int] getDiscretizedValues()
        vector[CutPointBody] getCutPoints()
        void debugPoints(vector[float]&, vector[int]&)
        

-class PCutPointBody:
+class PCutPoint_t:
    def __init__(self, start, end, fromValue, toValue):
        self.start = start
        self.end = end
@@ -37,7 +37,7 @@ cdef class CFImdlp:
        return self.thisptr.transform(X)
    def get_discretized_values(self):
        return self.thisptr.getDiscretizedValues()
-    def get_cut_points(self, X, y):
+    def get_cut_points(self):
        return  self.thisptr.getCutPoints()
    def debug_points(self, X, y):
        return self.thisptr.debugPoints(X, y)
--- a/fimdlp/cppfimdlp.cpython-310-darwin.so
+++ b/fimdlp/cppfimdlp.cpython-310-darwin.so
--- a/fimdlp/testcpp/FImdlp_unittest.cc
+++ b/fimdlp/testcpp/FImdlp_unittest.cc
@@ -1,33 +1,51 @@
 #include "gtest/gtest.h"
+#include "../Metrics.h"
 #include "../CPPFImdlp.h"
-namespace
-{
-    float precision = 0.000001;
-    class TestMetrics : protected mdlp::CPPFImdlp
-    {
+namespace mdlp {
+    class TestMetrics : public CPPFImdlp, public testing::Test {
    public:
-        std::vector<size_t> testSort(std::vector<float> &X)
+        //TestMetrics(samples X, labels y, indices_t indices) : X(X), y(y), indices(indices), CPPFImdlp(true) {}
+        indices_t indices; // sorted indices to use with X and y
+        samples X;
+        labels y;
+        samples xDiscretized;
+        int numClasses;
+        float precision_test = 0.000001;
+        void SetUp() override
        {
-            return sortIndices(X);
+            X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
+            indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
+            y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
+            numClasses = 2;
        }
-    };
-    void check_sorted_vector(std::vector<float> &X, std::vector<size_t> indices)
+        void check_sorted_vector(samples& X, indices_t indices)
        {
-        TestMetrics testClass = TestMetrics();
-        std::vector<size_t> testSortedIndices = testClass.testSort(X);
+            this->X = X;
+            this->indices = indices;
+            indices_t testSortedIndices = sortIndices(X);
            float prev = X[testSortedIndices[0]];
-        for (auto i = 0; i < X.size(); ++i)
-        {
+            for (auto i = 0; i < X.size(); ++i) {
                EXPECT_EQ(testSortedIndices[i], indices[i]);
                EXPECT_LE(prev, X[testSortedIndices[i]]);
                prev = X[testSortedIndices[i]];
            }
        }
-    TEST(FImdlpTest, SortIndices)
+        std::vector<CutPoint_t> testCutPoints(samples& X, indices_t& indices, labels& y)
        {
+            this->X = X;
+            this->y = y;
+            this->indices = indices;
+            this->numClasses = Metrics::numClasses(y, indices, 0, X.size());

-        std::vector<float> X = {5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9};
-        std::vector<size_t> indices = {4, 3, 6, 8, 2, 1, 5, 0, 9, 7};
+            //computeCutPoints();
+            return getCutPoints();
+        }
+    };
+    // 
+    TEST_F(TestMetrics, SortIndices)
+    {
+        samples X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
+        indices_t indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
        check_sorted_vector(X, indices);
        X = { 5.77, 5.88, 5.99 };
        indices = { 0, 1, 2 };
@@ -36,4 +54,23 @@ namespace
        indices = { 2, 1, 0 };
        check_sorted_vector(X, indices);
    }
+    // TEST_F(TestMetrics, EvaluateCutPoint)
+    // {
+    //     CutPoint_t rest, candidate;
+    //     rest.start = 0;
+    //     rest.end = 10;
+    //     candidate.start = 0;
+    //     candidate.end = 5;
+    //     float computed = evaluateCutPoint(rest, candidate);
+    //     ASSERT_NEAR(0.468996, computed, precision_test);
+    // }
+    TEST_F(TestMetrics, ComputeCutPoints)
+    {
+        std::vector<CutPoint_t> computed, expected;
+        computeCutPoints();
+        computed = getCutPoints();
+        for (auto cut : computed) {
+            std::cout << "(" << cut.start << ", " << cut.end << ") -> (" << cut.fromValue << ",  " << cut.toValue << ")" << std::endl;
+        }
+    }
 }
--- a/fimdlp/testcpp/Metrics_unittest.cc
+++ b/fimdlp/testcpp/Metrics_unittest.cc
@@ -1,33 +1,31 @@
 #include "gtest/gtest.h"
 #include "../Metrics.h"

-namespace
-{
-
+namespace mdlp {
    float precision = 0.000001;
    TEST(MetricTest, NumClasses)
    {
-        std::vector<int> y = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
-        std::vector<size_t> indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-        EXPECT_EQ(1, mdlp::Metrics::numClasses(y, indices, 4, 8));
-        EXPECT_EQ(2, mdlp::Metrics::numClasses(y, indices, 0, 10));
-        EXPECT_EQ(2, mdlp::Metrics::numClasses(y, indices, 8, 10));
+        labels y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
+        indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        EXPECT_EQ(1, Metrics::numClasses(y, indices, 4, 8));
+        EXPECT_EQ(2, Metrics::numClasses(y, indices, 0, 10));
+        EXPECT_EQ(2, Metrics::numClasses(y, indices, 8, 10));
    }
    TEST(MetricTest, Entropy)
    {
-        std::vector<int> y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
-        std::vector<size_t> indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-        EXPECT_EQ(1, mdlp::Metrics::entropy(y, indices, 0, 10, 2));
-        EXPECT_EQ(0, mdlp::Metrics::entropy(y, indices, 0, 5, 1));
-        std::vector<int> yz = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
-        ASSERT_NEAR(0.468996, mdlp::Metrics::entropy(yz, indices, 0, 10, 2), precision);
+        labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
+        indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        EXPECT_EQ(1, Metrics::entropy(y, indices, 0, 10, 2));
+        EXPECT_EQ(0, Metrics::entropy(y, indices, 0, 5, 1));
+        labels yz = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
+        ASSERT_NEAR(0.468996, Metrics::entropy(yz, indices, 0, 10, 2), precision);
    }
    TEST(MetricTest, InformationGain)
    {
-        std::vector<int> y = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2};
-        std::vector<size_t> indices = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-        std::vector<int> yz = {1, 1, 1, 1, 1, 1, 1, 1, 2, 1};
-        ASSERT_NEAR(1, mdlp::Metrics::informationGain(y, indices, 0, 10, 5, 2), precision);
-        ASSERT_NEAR(0.108032, mdlp::Metrics::informationGain(yz, indices, 0, 10, 5, 2), precision);
+        labels y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
+        indices_t indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        labels yz = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
+        ASSERT_NEAR(1, Metrics::informationGain(y, indices, 0, 10, 5, 2), precision);
+        ASSERT_NEAR(0.108032, Metrics::informationGain(yz, indices, 0, 10, 5, 2), precision);
    }
 }
--- a/fimdlp/testcpp/test
+++ b/fimdlp/testcpp/test
@@ -0,0 +1,12 @@
+cmake -S . -B build -Wno-dev 
+if test $? -ne 0; then
+   echo "Error in creating build commands."
+   exit 1
+fi
+cmake --build build
+if test $? -ne 0; then
+   echo "Error in build command."
+   exit 1
+fi
+cd build
+ctest --output-on-failure
--- a/fimdlp/testcpp/test.sh
+++ b/fimdlp/testcpp/test.sh
@@ -1,4 +1,4 @@
-cmake -S . -B build
+cmake -S . -B build -Wno-dev 
 if test $? -ne 0; then
   echo "Error in creating build commands."
   exit 1
--- a/fimdlp/typesFImdlp.h
+++ b/fimdlp/typesFImdlp.h
@@ -0,0 +1,15 @@
+#ifndef TYPES_H
+#define TYPES_H
+#include <vector>
+namespace mdlp {
+    typedef std::vector<float> samples;
+    typedef std::vector<int> labels;
+    typedef std::vector<size_t> indices_t;
+    struct CutPointBody {
+        size_t start, end;        // indices of the sorted vector
+        int classNumber;          // class assigned to the cut point
+        float fromValue, toValue;
+    };
+    typedef CutPointBody CutPoint_t;
+}
+#endif
--- a/prueba/FImdlp.cpp
+++ b/prueba/FImdlp.cpp
@@ -1,21 +1,18 @@
 #include "FImdlp.h"
-namespace FImdlp
-{
+namespace FImdlp {
    FImdlp::FImdlp()
    {
    }
    FImdlp::~FImdlp()
    {
    }
-    std::vector<float> FImdlp::cutPoints(std::vector<int> &X, std::vector<int> &y)
+    samples FImdlp::cutPoints(labels& X, labels& y)
    {
-        std::vector<float> cutPts;
+        samples cutPts;
        int i, ant = X.at(0);
        int n = X.size();
-        for (i = 1; i < n; i++)
-        {
-            if (X.at(i) != ant)
-            {
+        for (i = 1; i < n; i++) {
+            if (X.at(i) != ant) {
                cutPts.push_back(float(X.at(i) + ant) / 2);
                ant = X.at(i);
            }
--- a/prueba/FImdlp.h
+++ b/prueba/FImdlp.h
@@ -2,14 +2,12 @@
 #define FIMDLP_H
 #include <vector>
 #include <Python.h>
-namespace FImdlp
-{
-    class FImdlp
-    {
+namespace FImdlp {
+    class FImdlp {
    public:
        FImdlp();
        ~FImdlp();
-        std::vector<float> cutPoints(std::vector<int> &, std::vector<int> &);
+        samples cutPoints(labels&, labels&);
    };
 }
 #endif
--- a/sample.py
+++ b/sample.py
@@ -18,13 +18,22 @@ test = CFImdlp(debug=False)
 # k = test.cut_points_ant(X[:, 0], y)
 # print(k)
 # test.debug_points(X[:, 0], y)
-result = test.cut_points(X[:, 0], y)
+X = [5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9]
+indices = [4, 3, 6, 8, 2, 1, 5, 0, 9, 7]
+y = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
+# test.fit(X[:, 0], y)
+test.fit(X, y)
+result = test.get_cut_points()
 for item in result:
    print(
        f"Class={item['classNumber']} - ({item['start']:3d}, {item['end']:3d})"
        f" -> ({item['fromValue']:3.1f}, {item['toValue']:3.1f}]"
    )
-
+print(test.get_discretized_values())
+# print(test.transform(X))
+# print(X)
+# print(indices)
+# print(np.array(X)[indices])

 # X = np.array(
 #     [