Merge c488ace719 into 7b0673fd4b

Fix FImdlp tests
Fix BinDisc quantile mistakes
2025-08-21 02:15:57 +00:00 · 2024-07-02 11:50:55 +02:00 · 2024-07-02 11:50:42 +02:00 · 2024-07-02 09:40:06 +02:00 · 2024-06-24 11:47:03 +02:00 · 2024-06-24 10:55:26 +02:00
33 changed files with 1259 additions and 379 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -22,15 +22,19 @@ jobs:
        run: |
          sudo apt-get -y install lcov
          sudo apt-get -y install gcovr
      - name: Install Libtorch
        run: |
          wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcpu.zip
          unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip
      - name: Tests & build-wrapper
        run: |
-          cmake -S . -B build -Wno-dev 
+          cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch -DENABLE_TESTING=ON
          build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
          cd build
          make
          ctest -C Release --output-on-failure --test-dir tests
          cd ..
-          gcovr -f CPPFImdlp.cpp -f Metrics.cpp  -f BinDisc.cpp --txt --sonarqube=coverage.xml
+          gcovr -f CPPFImdlp.cpp -f Metrics.cpp -f BinDisc.cpp -f Discretizer.cpp --txt --sonarqube=coverage.xml
      - name: Run sonar-scanner
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -33,6 +33,8 @@
 **/build
 build_Debug
 build_Release
 build_debug
 build_release
 **/lcoverage
 .idea
 cmake-*
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -8,15 +8,10 @@
            "name": "C++ Launch config",
            "type": "cppdbg",
            "request": "launch",
-            "program": "${workspaceFolder}/build/sample/sample",
+            "program": "${workspaceFolder}/tests/build/BinDisc_unittest",
-            "cwd": "${workspaceFolder}/build/sample",
+            "cwd": "${workspaceFolder}/tests/build",
-            "args": [
+            "args": [],
                "-f",
                "glass"
            ],
            "targetArchitecture": "arm64",
            "launchCompleteCommand": "exec-run",
            "preLaunchTask": "CMake: build",
            "stopAtEntry": false,
            "linux": {
                "MIMode": "gdb",
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -5,5 +5,105 @@
    },
    "C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
    "cmake.configureOnOpen": true,
-    "sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json"
+    "sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json",
    "files.associations": {
        "*.rmd": "markdown",
        "*.py": "python",
        "vector": "cpp",
        "__bit_reference": "cpp",
        "__bits": "cpp",
        "__config": "cpp",
        "__debug": "cpp",
        "__errc": "cpp",
        "__hash_table": "cpp",
        "__locale": "cpp",
        "__mutex_base": "cpp",
        "__node_handle": "cpp",
        "__nullptr": "cpp",
        "__split_buffer": "cpp",
        "__string": "cpp",
        "__threading_support": "cpp",
        "__tuple": "cpp",
        "array": "cpp",
        "atomic": "cpp",
        "bitset": "cpp",
        "cctype": "cpp",
        "chrono": "cpp",
        "clocale": "cpp",
        "cmath": "cpp",
        "compare": "cpp",
        "complex": "cpp",
        "concepts": "cpp",
        "cstdarg": "cpp",
        "cstddef": "cpp",
        "cstdint": "cpp",
        "cstdio": "cpp",
        "cstdlib": "cpp",
        "cstring": "cpp",
        "ctime": "cpp",
        "cwchar": "cpp",
        "cwctype": "cpp",
        "exception": "cpp",
        "initializer_list": "cpp",
        "ios": "cpp",
        "iosfwd": "cpp",
        "istream": "cpp",
        "limits": "cpp",
        "locale": "cpp",
        "memory": "cpp",
        "mutex": "cpp",
        "new": "cpp",
        "optional": "cpp",
        "ostream": "cpp",
        "ratio": "cpp",
        "sstream": "cpp",
        "stdexcept": "cpp",
        "streambuf": "cpp",
        "string": "cpp",
        "string_view": "cpp",
        "system_error": "cpp",
        "tuple": "cpp",
        "type_traits": "cpp",
        "typeinfo": "cpp",
        "unordered_map": "cpp",
        "variant": "cpp",
        "algorithm": "cpp",
        "iostream": "cpp",
        "iomanip": "cpp",
        "numeric": "cpp",
        "set": "cpp",
        "__tree": "cpp",
        "deque": "cpp",
        "list": "cpp",
        "map": "cpp",
        "unordered_set": "cpp",
        "any": "cpp",
        "condition_variable": "cpp",
        "forward_list": "cpp",
        "fstream": "cpp",
        "stack": "cpp",
        "thread": "cpp",
        "__memory": "cpp",
        "filesystem": "cpp",
        "*.toml": "toml",
        "utility": "cpp",
        "span": "cpp",
        "*.tcc": "cpp",
        "bit": "cpp",
        "charconv": "cpp",
        "cinttypes": "cpp",
        "codecvt": "cpp",
        "functional": "cpp",
        "iterator": "cpp",
        "memory_resource": "cpp",
        "random": "cpp",
        "source_location": "cpp",
        "format": "cpp",
        "numbers": "cpp",
        "semaphore": "cpp",
        "stop_token": "cpp",
        "text_encoding": "cpp",
        "typeindex": "cpp",
        "valarray": "cpp"
    }
 }
--- a/BinDisc.cpp
+++ b/BinDisc.cpp
@@ -1,5 +1,4 @@
 #include <algorithm>
 #include <limits>
 #include <cmath>
 #include "BinDisc.h"
 #include <iostream>
@@ -7,7 +6,8 @@
 namespace mdlp {
-    BinDisc::BinDisc(int n_bins, strategy_t strategy) : n_bins{ n_bins }, strategy{ strategy }
+    BinDisc::BinDisc(int n_bins, strategy_t strategy) :
        Discretizer(), n_bins{ n_bins }, strategy{ strategy }
    {
        if (n_bins < 3) {
            throw std::invalid_argument("n_bins must be greater than 2");
@@ -16,9 +16,11 @@ namespace mdlp {
    BinDisc::~BinDisc() = default;
    void BinDisc::fit(samples_t& X)
    {
        // y is included for compatibility with the Discretizer interface
        cutPoints.clear();
        if (X.empty()) {
-            cutPoints.push_back(std::numeric_limits<precision_t>::max());
+            cutPoints.push_back(0.0);
            cutPoints.push_back(0.0);
            return;
        }
        if (strategy == strategy_t::QUANTILE) {
@@ -27,15 +29,18 @@ namespace mdlp {
            fit_uniform(X);
        }
    }
    void BinDisc::fit(samples_t& X, labels_t& y)
    {
        fit(X);
    }
    std::vector<precision_t> linspace(precision_t start, precision_t end, int num)
    {
        // Doesn't include end point as it is not needed
        if (start == end) {
-            return { 0 };
+            return { start, end };
        }
        precision_t delta = (end - start) / static_cast<precision_t>(num - 1);
        std::vector<precision_t> linspc;
-        for (size_t i = 0; i < num - 1; ++i) {
+        for (size_t i = 0; i < num; ++i) {
            precision_t val = start + delta * static_cast<precision_t>(i);
            linspc.push_back(val);
        }
@@ -49,17 +54,19 @@ namespace mdlp {
    {
        // Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html
        std::vector<precision_t> results;
        bool first = true;
        results.reserve(percentiles.size());
        for (auto percentile : percentiles) {
            const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
-            const auto indexLower = clip(i, 0, data.size() - 1);
+            const auto indexLower = clip(i, 0, data.size() - 2);
            const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
            const double fraction =
                (percentile / 100.0 - percentI) /
                (static_cast<double>(indexLower + 1) / static_cast<double>(data.size() - 1) - percentI);
            const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction;
-            if (value != results.back())
+            if (value != results.back() || first) // first needed as results.back() return is undefined for empty vectors
                results.push_back(value);
            first = false;
        }
        return results;
    }
@@ -69,70 +76,16 @@ namespace mdlp {
        auto data = X;
        std::sort(data.begin(), data.end());
        if (data.front() == data.back() || data.size() == 1) {
-            // if X is constant
+            // if X is constant, pass any two given points that shall be ignored in transform
-            cutPoints.push_back(std::numeric_limits<precision_t>::max());
+            cutPoints.push_back(data.front());
            cutPoints.push_back(data.front());
            return;
        }
        cutPoints = percentile(data, quantiles);
        normalizeCutPoints();
    }
    void BinDisc::fit_uniform(samples_t& X)
    {
        auto minmax = std::minmax_element(X.begin(), X.end());
        cutPoints = linspace(*minmax.first, *minmax.second, n_bins + 1);
        normalizeCutPoints();
    }
    void BinDisc::normalizeCutPoints()
    {
        // Add max value to the end
        cutPoints.push_back(std::numeric_limits<precision_t>::max());
        // Remove first as it is not needed
        cutPoints.erase(cutPoints.begin());
    }
    labels_t& BinDisc::transform(const samples_t& X)
    {
        discretizedData.clear();
        discretizedData.reserve(X.size());
        for (const precision_t& item : X) {
            auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
            discretizedData.push_back(upper - cutPoints.begin());
        }
        return discretizedData;
    }
 }
 // void BinDisc::fit_quantile(samples_t& X)
    // {
    //     cutPoints.clear();
    //     if (X.empty()) {
    //         cutPoints.push_back(std::numeric_limits<float>::max());
    //         return;
    //     }
    //     samples_t data = X;
    //     std::sort(data.begin(), data.end());
    //     float min_val = data.front();
    //     float max_val = data.back();
    //     // Handle case of all data points having the same value
    //     if (min_val == max_val) {
    //         cutPoints.push_back(std::numeric_limits<float>::max());
    //         return;
    //     }
    //     int first = X.size() / n_bins;
    //     cutPoints.push_back(data.at(first - 1));
    //     int bins_done = 1;
    //     int prev = first - 1;
    //     while (bins_done < n_bins) {
    //         int next = first * (bins_done + 1) - 1;
    //         while (next < X.size() && data.at(next) == data[prev]) {
    //             ++next;
    //         }
    //         if (next == X.size() || bins_done == n_bins - 1) {
    //             cutPoints.push_back(std::numeric_limits<float>::max());
    //             break;
    //         } else {
    //             cutPoints.push_back(data[next]);
    //             bins_done++;
    //             prev = next;
    //         }
    //     }
    // }
--- a/BinDisc.h
+++ b/BinDisc.h
@@ -2,30 +2,26 @@
 #define BINDISC_H
 #include "typesFImdlp.h"
 #include "Discretizer.h"
 #include <string>
 namespace mdlp {
    enum class strategy_t {
        UNIFORM,
        QUANTILE
    };
-    class BinDisc {
+    class BinDisc : public Discretizer {
    public:
        BinDisc(int n_bins = 3, strategy_t strategy = strategy_t::UNIFORM);
        ~BinDisc();
-        void fit(samples_t&);
+        // y is included for compatibility with the Discretizer interface
-        inline cutPoints_t getCutPoints() const { return cutPoints; };
+        void fit(samples_t& X_, labels_t& y) override;
-        labels_t& transform(const samples_t&);
+        void fit(samples_t& X);
        static inline std::string version() { return "1.0.0"; };
    private:
        void fit_uniform(samples_t&);
        void fit_quantile(samples_t&);
        void normalizeCutPoints();
        int n_bins;
        strategy_t strategy;
        labels_t discretizedData = labels_t();
        cutPoints_t cutPoints;
    };
 }
 #endif
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,13 +1,11 @@
 cmake_minimum_required(VERSION 3.20)
 project(mdlp)
-
+set(CMAKE_CXX_STANDARD 17)
-if (POLICY CMP0135)
+find_package(Torch REQUIRED)
-    cmake_policy(SET CMP0135 NEW)
+include_directories(${TORCH_INCLUDE_DIRS})
-endif ()
+add_library(mdlp CPPFImdlp.cpp Metrics.cpp BinDisc.cpp Discretizer.cpp)
-
+target_link_libraries(mdlp "${TORCH_LIBRARIES}")
 set(CMAKE_CXX_STANDARD 11)
 add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
 add_subdirectory(sample)
-add_subdirectory(tests)
+if (ENABLE_TESTING)
-
+    add_subdirectory(tests)
 endif(ENABLE_TESTING)
--- a/CPPFImdlp.cpp
+++ b/CPPFImdlp.cpp
@@ -6,16 +6,14 @@
 namespace mdlp {
-    CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
+    CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) :
        Discretizer(),
        min_length(min_length_),
        max_depth(max_depth_),
        proposed_cuts(proposed)
    {
    }
    CPPFImdlp::CPPFImdlp() = default;
    CPPFImdlp::~CPPFImdlp() = default;
    size_t CPPFImdlp::compute_max_num_cut_points() const
    {
        // Set the actual maximum number of cut points as a number or as a percentage of the number of samples
@@ -27,7 +25,7 @@ namespace mdlp {
        }
        if (proposed_cuts < 1)
            return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
-        return static_cast<size_t>(proposed_cuts);
+        return static_cast<size_t>(proposed_cuts); // The 2 extra cutpoints should not be considered here as this parameter is considered before they are added
    }
    void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
@@ -60,6 +58,10 @@ namespace mdlp {
                resizeCutPoints();
            }
        }
        // Insert first & last X value to the cutpoints as them shall be ignored in transform
        auto minmax = std::minmax_element(X.begin(), X.end());
        cutPoints.push_back(*minmax.second);
        cutPoints.insert(cutPoints.begin(), *minmax.first);
    }
    pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
@@ -208,14 +210,5 @@ namespace mdlp {
        }
        cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
    }
-    labels_t& CPPFImdlp::transform(const samples_t& data)
+
    {
        discretizedData.clear();
        discretizedData.reserve(data.size());
        for (const precision_t& item : data) {
            auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
            discretizedData.push_back(upper - cutPoints.begin());
        }
        return discretizedData;
    }
 }
--- a/CPPFImdlp.h
+++ b/CPPFImdlp.h
@@ -6,18 +6,16 @@
 #include <utility>
 #include <string>
 #include "Metrics.h"
 #include "Discretizer.h"
 namespace mdlp {
-    class CPPFImdlp {
+    class CPPFImdlp : public Discretizer {
    public:
-        CPPFImdlp();
+        CPPFImdlp() = default;
-        CPPFImdlp(size_t, int, float);
+        CPPFImdlp(size_t min_length_, int max_depth_, float proposed);
-        ~CPPFImdlp();
+        virtual ~CPPFImdlp() = default;
-        void fit(samples_t&, labels_t&);
+        void fit(samples_t& X_, labels_t& y_) override;
        inline cutPoints_t getCutPoints() const { return cutPoints; };
        labels_t& transform(const samples_t&);
        inline int get_depth() const { return depth; };
        static inline std::string version() { return "1.1.3"; };
    protected:
        size_t min_length = 3;
        int depth = 0;
@@ -27,9 +25,7 @@ namespace mdlp {
        samples_t X = samples_t();
        labels_t y = labels_t();
        Metrics metrics = Metrics(y, indices);
        cutPoints_t cutPoints;
        size_t num_cut_points = numeric_limits<size_t>::max();
        labels_t discretizedData = labels_t();
        static indices_t sortIndices(samples_t&, labels_t&);
        void computeCutPoints(size_t, size_t, int);
        void resizeCutPoints();
--- a/Discretizer.cpp
+++ b/Discretizer.cpp
@@ -0,0 +1,51 @@
 #include "Discretizer.h"
 namespace mdlp {
    labels_t& Discretizer::transform(const samples_t& data)
    {
        discretizedData.clear();
        discretizedData.reserve(data.size());
        // CutPoints always have more than two items
        // Have to ignore first and last cut points provided
        auto first = cutPoints.begin() + 1;
        auto last = cutPoints.end() - 1;
        for (const precision_t& item : data) {
            auto upper = std::lower_bound(first, last, item);
            int number = upper - first;
            /*
            OJO
            */
            if (number < 0)
                throw std::runtime_error("number is less than 0 in discretizer::transform");
            discretizedData.push_back(number);
        }
        return discretizedData;
    }
    labels_t& Discretizer::fit_transform(samples_t& X_, labels_t& y_)
    {
        fit(X_, y_);
        return transform(X_);
    }
    void Discretizer::fit_t(torch::Tensor& X_, torch::Tensor& y_)
    {
        auto num_elements = X_.numel();
        samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
        labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
        fit(X, y);
    }
    torch::Tensor Discretizer::transform_t(torch::Tensor& X_)
    {
        auto num_elements = X_.numel();
        samples_t X(X_.data_ptr<float>(), X_.data_ptr<float>() + num_elements);
        auto result = transform(X);
        return torch::tensor(result, torch::kInt32);
    }
    torch::Tensor Discretizer::fit_transform_t(torch::Tensor& X_, torch::Tensor& y_)
    {
        auto num_elements = X_.numel();
        samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
        labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
        auto result = fit_transform(X, y);
        return torch::tensor(result, torch::kInt32);
    }
 }
--- a/Discretizer.h
+++ b/Discretizer.h
@@ -0,0 +1,27 @@
 #ifndef DISCRETIZER_H
 #define DISCRETIZER_H
 #include <string>
 #include <algorithm>
 #include <torch/torch.h>
 #include "typesFImdlp.h"
 namespace mdlp {
    class Discretizer {
    public:
        Discretizer() = default;
        virtual ~Discretizer() = default;
        inline cutPoints_t getCutPoints() const { return cutPoints; };
        virtual void fit(samples_t& X_, labels_t& y_) = 0;
        labels_t& transform(const samples_t& data);
        labels_t& fit_transform(samples_t& X_, labels_t& y_);
        void fit_t(torch::Tensor& X_, torch::Tensor& y_);
        torch::Tensor transform_t(torch::Tensor& X_);
        torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_);
        static inline std::string version() { return "1.2.3"; };
    protected:
        labels_t discretizedData = labels_t();
        cutPoints_t cutPoints; // At least two cutpoints must be provided, the first and the last will be ignored in transform
    };
 }
 #endif
--- a/13
+++ b/13
@@ -0,0 +1,13 @@
 SHELL := /bin/bash
 .DEFAULT_GOAL := build
 .PHONY: build test
 build: 
 	@if [ -d build_release ]; then rm -fr build_release; fi
 	@mkdir build_release
 	@cmake -B build_release -S . -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF
 	@cmake --build build_release
 test:
 	@echo "Testing..."
 	@cd tests && ./test
--- a/Metrics.cpp
+++ b/Metrics.cpp
@@ -4,8 +4,8 @@
 using namespace std;
 namespace mdlp {
-    Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_),
+    Metrics::Metrics(labels_t& y_, indices_t& indices_) : y(y_), indices(indices_),
-        numClasses(computeNumClasses(0, indices.size()))
+        numClasses(computeNumClasses(0, indices_.size()))
    {
    }
--- a/README.md
+++ b/README.md
@@ -14,21 +14,27 @@ The implementation tries to mitigate the problem of different label values with
 Other features:
 - Intervals with the same value of the variable are not taken into account for cutpoints.
- Intervals have to have more than two examples to be evaluated.
+- Intervals have to have more than two examples to be evaluated (mdlp).
-The algorithm returns the cut points for the variable.
+- The algorithm returns the cut points for the variable.
 - The transform method uses the cut points returning its index in the following way:
        cut[i - 1] <= x < cut[i]
    using the [std::upper_bound](https://en.cppreference.com/w/cpp/algorithm/upper_bound) method
 - K-Bins discretization is also implemented, and "quantile" and "uniform" strategies are available.
 ## Sample
 To run the sample, just execute the following commands:
 ```bash
-cd sample
+cmake -B build -S .
-cmake -B build
+cmake --build build
-cd build
+build/sample/sample -f iris -m 2
-make
+build/sample/sample -h
 ./sample -f iris -m 2
 ./sample -h
 ```
 ## Test
@@ -36,6 +42,5 @@ make
 To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:
 ```bash
-cd tests
+make test
 ./test
 ```
--- a/sample/.vscode/launch.json
+++ b/sample/.vscode/launch.json
@@ -1,21 +0,0 @@
 {
    "version": "0.2.0",
    "configurations": [
        {
            "name": "lldb puro",
            "type": "cppdbg",
            // "targetArchitecture": "arm64",
            "request": "launch",
            "program": "${workspaceRoot}/build/sample",
            "args": [
                "-f",
                "iris"
            ],
            "stopAtEntry": false,
            "cwd": "${workspaceRoot}/build/",
            "environment": [],
            "externalConsole": false,
            "MIMode": "lldb"
        },
    ]
 }
--- a/sample/CMakeLists.txt
+++ b/sample/CMakeLists.txt
@@ -1,5 +1,6 @@
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_BUILD_TYPE Debug)
-add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
+add_executable(sample sample.cpp ../tests/ArffFiles.cpp)
 target_link_libraries(sample mdlp "${TORCH_LIBRARIES}")
--- a/sample/sample.cpp
+++ b/sample/sample.cpp
@@ -5,13 +5,13 @@
 #include <algorithm>
 #include <cstring>
 #include <getopt.h>
 #include <torch/torch.h>
 #include "../Discretizer.h"
 #include "../CPPFImdlp.h"
 #include "../BinDisc.h"
 #include "../tests/ArffFiles.h"
-using namespace std;
+const string PATH = "tests/datasets/";
 using namespace mdlp;
 const string PATH = "../../tests/datasets/";
 /* print a description of all supported options */
 void usage(const char* path)
@@ -20,17 +20,17 @@ void usage(const char* path)
    const char* basename = strrchr(path, '/');
    basename = basename ? basename + 1 : path;
-    cout << "usage: " << basename << "[OPTION]" << endl;
+    std::cout << "usage: " << basename << "[OPTION]" << std::endl;
-    cout << "  -h, --help\t\t Print this help and exit." << endl;
+    std::cout << "  -h, --help\t\t Print this help and exit." << std::endl;
-    cout
+    std::cout
        << "  -f, --file[=FILENAME]\t {all, diabetes, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
-        << endl;
+        << std::endl;
-    cout << "  -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
+    std::cout << "  -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << std::endl;
-    cout << "  -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
+    std::cout << "  -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << std::endl;
-    cout
+    std::cout
        << "  -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any"
-        << endl;
+        << std::endl;
-    cout << "  -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
+    std::cout << "  -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << std::endl;
 }
 tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
@@ -96,56 +96,79 @@ void process_file(const string& path, const string& file_name, bool class_last,
    file.load(path + file_name + ".arff", class_last);
    const auto attributes = file.getAttributes();
    const auto items = file.getSize();
-    cout << "Number of lines: " << items << endl;
+    std::cout << "Number of lines: " << items << std::endl;
-    cout << "Attributes: " << endl;
+    std::cout << "Attributes: " << std::endl;
    for (auto attribute : attributes) {
-        cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
+        std::cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << std::endl;
    }
-    cout << "Class name: " << file.getClassName() << endl;
+    std::cout << "Class name: " << file.getClassName() << std::endl;
-    cout << "Class type: " << file.getClassType() << endl;
+    std::cout << "Class type: " << file.getClassType() << std::endl;
-    cout << "Data: " << endl;
+    std::cout << "Data: " << std::endl;
-    vector<samples_t>& X = file.getX();
+    std::vector<mdlp::samples_t>& X = file.getX();
-    labels_t& y = file.getY();
+    mdlp::labels_t& y = file.getY();
    for (int i = 0; i < 5; i++) {
        for (auto feature : X) {
-            cout << fixed << setprecision(1) << feature[i] << " ";
+            std::cout << fixed << setprecision(1) << feature[i] << " ";
        }
-        cout << y[i] << endl;
+        std::cout << y[i] << std::endl;
    }
    auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
    size_t total = 0;
    for (auto i = 0; i < attributes.size(); i++) {
        auto min_max = minmax_element(X[i].begin(), X[i].end());
-        cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
+        std::cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
        test.fit(X[i], y);
        auto cut_points = test.getCutPoints();
        for (auto item : cut_points) {
-            cout << item;
+            std::cout << item;
            if (item != cut_points.back())
-                cout << ", ";
+                std::cout << ", ";
        }
        total += test.getCutPoints().size();
-        cout << "]" << endl;
+        std::cout << "]" << std::endl;
-        cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
+        std::cout << "Min: " << *min_max.first << " Max: " << *min_max.second << std::endl;
-        cout << "--------------------------" << endl;
+        std::cout << "--------------------------" << std::endl;
    }
    std::cout << "Total cut points ...: " << total << std::endl;
    std::cout << "Total feature states: " << total + attributes.size() << std::endl;
    std::cout << "Version ............: " << test.version() << std::endl;
    std::cout << "Transformed data (vector)..: " << std::endl;
    test.fit(X[0], y);
    auto data = test.transform(X[0]);
    for (int i = 130; i < 135; i++) {
        std::cout << std::fixed << std::setprecision(1) << X[0][i] << " " << data[i] << std::endl;
    }
    auto Xt = torch::tensor(X[0], torch::kFloat32);
    auto yt = torch::tensor(y, torch::kInt32);
    //test.fit_t(Xt, yt);
    auto result = test.fit_transform_t(Xt, yt);
    std::cout << "Transformed data (torch)...: " << std::endl;
    for (int i = 130; i < 135; i++) {
        std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << result[i].item<int>() << std::endl;
    }
    auto disc = mdlp::BinDisc(3);
    auto res_v = disc.fit_transform(X[0], y);
    disc.fit_t(Xt, yt);
    auto res_t = disc.transform_t(Xt);
    std::cout << "Transformed data (BinDisc)...: " << std::endl;
    for (int i = 130; i < 135; i++) {
        std::cout << std::fixed << std::setprecision(1) << Xt[i].item<float>() << " " << res_v[i] << " " << res_t[i].item<int>() << std::endl;
    }
    cout << "Total cut points ...: " << total << endl;
    cout << "Total feature states: " << total + attributes.size() << endl;
 }
 void process_all_files(const map<string, bool>& datasets, const string& path, int max_depth, int min_length,
    float max_cutpoints)
 {
-    cout << "Results: " << "Max_depth: " << max_depth << "  Min_length: " << min_length << "  Max_cutpoints: "
+    std::cout << "Results: " << "Max_depth: " << max_depth << "  Min_length: " << min_length << "  Max_cutpoints: "
-        << max_cutpoints << endl << endl;
+        << max_cutpoints << std::endl << std::endl;
    printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
    printf("==================== ==== ==== ========\n");
    for (const auto& dataset : datasets) {
        ArffFiles file;
        file.load(path + dataset.first + ".arff", dataset.second);
        auto attributes = file.getAttributes();
-        vector<samples_t>& X = file.getX();
+        std::vector<mdlp::samples_t>& X = file.getX();
-        labels_t& y = file.getY();
+        mdlp::labels_t& y = file.getY();
        size_t timing = 0;
        size_t cut_points = 0;
        for (auto i = 0; i < attributes.size(); i++) {
@@ -163,7 +186,7 @@ void process_all_files(const map<string, bool>& datasets, const string& path, in
 int main(int argc, char** argv)
 {
-    map<string, bool> datasets = {
+    std::map<std::string, bool> datasets = {
            {"diabetes",           true},
            {"glass",              true},
            {"iris",               true},
@@ -173,14 +196,14 @@ int main(int argc, char** argv)
            {"mfeat-factors",      true},
            {"test",               true}
    };
-    string file_name;
+    std::string file_name;
-    string path;
+    std::string path;
    int max_depth;
    int min_length;
    float max_cutpoints;
    tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
    if (datasets.find(file_name) == datasets.end() && file_name != "all") {
-        cout << "Invalid file name: " << file_name << endl;
+        std::cout << "Invalid file name: " << file_name << std::endl;
        usage(argv[0]);
        exit(1);
    }
@@ -188,10 +211,10 @@ int main(int argc, char** argv)
        process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
    else {
        process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
-        cout << "File name ....: " << file_name << endl;
+        std::cout << "File name ....: " << file_name << std::endl;
-        cout << "Max depth ....: " << max_depth << endl;
+        std::cout << "Max depth ....: " << max_depth << std::endl;
-        cout << "Min length ...: " << min_length << endl;
+        std::cout << "Min length ...: " << min_length << std::endl;
-        cout << "Max cutpoints : " << max_cutpoints << endl;
+        std::cout << "Max cutpoints : " << max_cutpoints << std::endl;
    }
    return 0;
 }
--- a/sonar-project.properties
+++ b/sonar-project.properties
@@ -3,7 +3,7 @@ sonar.organization=rmontanana
 # This is the name and version displayed in the SonarCloud UI.
 sonar.projectName=mdlp
-sonar.projectVersion=1.1.3
+sonar.projectVersion=1.2.1
 # sonar.test.exclusions=tests/**
 # sonar.tests=tests/
 # sonar.coverage.exclusions=tests/**,sample/**
--- a/tests/BinDisc_unittest.cpp
+++ b/tests/BinDisc_unittest.cpp
@@ -4,6 +4,7 @@
 #include "gtest/gtest.h"
 #include "ArffFiles.h"
 #include "../BinDisc.h"
 #include "Experiments.hpp"
 namespace mdlp {
    const float margin = 1e-4;
@@ -37,12 +38,14 @@ namespace mdlp {
    TEST_F(TestBinDisc3U, Easy3BinsUniform)
    {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
-        fit(X);
+        auto y = labels_t();
        fit(X, y);
        auto cuts = getCutPoints();
-        EXPECT_NEAR(3.66667, cuts[0], margin);
+        ASSERT_EQ(4, cuts.size());
-        EXPECT_NEAR(6.33333, cuts[1], margin);
+        EXPECT_NEAR(1, cuts.at(0), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_NEAR(3.66667, cuts.at(1), margin);
-        EXPECT_EQ(3, cuts.size());
+        EXPECT_NEAR(6.33333, cuts.at(2), margin);
        EXPECT_NEAR(9.0, cuts.at(3), margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
        EXPECT_EQ(expected, labels);
@@ -52,10 +55,11 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_NEAR(3.666667, cuts[0], margin);
+        ASSERT_EQ(4, cuts.size());
-        EXPECT_NEAR(6.333333, cuts[1], margin);
+        EXPECT_NEAR(1, cuts[0], margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_NEAR(3.666667, cuts[1], margin);
-        EXPECT_EQ(3, cuts.size());
+        EXPECT_NEAR(6.333333, cuts[2], margin);
        EXPECT_NEAR(9, cuts[3], margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
        EXPECT_EQ(expected, labels);
@@ -65,12 +69,13 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(4.0, cuts[0]);
+        ASSERT_EQ(4, cuts.size());
-        EXPECT_EQ(7.0, cuts[1]);
+        EXPECT_NEAR(1, cuts.at(0), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_NEAR(4.0, cuts.at(1), margin);
-        EXPECT_EQ(3, cuts.size());
+        EXPECT_NEAR(7.0, cuts.at(2), margin);
        EXPECT_NEAR(10.0, cuts.at(3), margin);
        auto labels = transform(X);
-        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 };
        EXPECT_EQ(expected, labels);
    }
    TEST_F(TestBinDisc3Q, X10BinsQuantile)
@@ -78,12 +83,13 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(4, cuts[0]);
+        ASSERT_EQ(4, cuts.size());
-        EXPECT_EQ(7, cuts[1]);
+        EXPECT_NEAR(1, cuts.at(0), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_NEAR(4.0, cuts.at(1), margin);
-        EXPECT_EQ(3, cuts.size());
+        EXPECT_NEAR(7.0, cuts.at(2), margin);
        EXPECT_NEAR(10.0, cuts.at(3), margin);
        auto labels = transform(X);
-        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 };
        EXPECT_EQ(expected, labels);
    }
    TEST_F(TestBinDisc3U, X11BinsUniform)
@@ -91,10 +97,11 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_NEAR(4.33333, cuts[0], margin);
+        ASSERT_EQ(4, cuts.size());
-        EXPECT_NEAR(7.66667, cuts[1], margin);
+        EXPECT_NEAR(1, cuts.at(0), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_NEAR(4.33333, cuts.at(1), margin);
-        EXPECT_EQ(3, cuts.size());
+        EXPECT_NEAR(7.66667, cuts.at(2), margin);
        EXPECT_NEAR(11.0, cuts.at(3), margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
        EXPECT_EQ(expected, labels);
@@ -104,10 +111,11 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_NEAR(4.33333, cuts[0], margin);
+        ASSERT_EQ(4, cuts.size());
-        EXPECT_NEAR(7.66667, cuts[1], margin);
+        EXPECT_NEAR(1, cuts.at(0), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_NEAR(4.33333, cuts.at(1), margin);
-        EXPECT_EQ(3, cuts.size());
+        EXPECT_NEAR(7.66667, cuts.at(2), margin);
        EXPECT_NEAR(11.0, cuts.at(3), margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
        EXPECT_EQ(expected, labels);
@@ -117,8 +125,9 @@ namespace mdlp {
        samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
+        ASSERT_EQ(2, cuts.size());
-        EXPECT_EQ(1, cuts.size());
+        EXPECT_NEAR(1, cuts.at(0), margin);
        EXPECT_NEAR(1, cuts.at(1), margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 0, 0, 0 };
        EXPECT_EQ(expected, labels);
@@ -128,8 +137,9 @@ namespace mdlp {
        samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
+        ASSERT_EQ(2, cuts.size());
-        EXPECT_EQ(1, cuts.size());
+        EXPECT_NEAR(1, cuts.at(0), margin);
        EXPECT_NEAR(1, cuts.at(1), margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 0, 0, 0 };
        EXPECT_EQ(expected, labels);
@@ -139,16 +149,18 @@ namespace mdlp {
        samples_t X = {};
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
+        ASSERT_EQ(2, cuts.size());
-        EXPECT_EQ(1, cuts.size());
+        EXPECT_NEAR(0, cuts.at(0), margin);
        EXPECT_NEAR(0, cuts.at(1), margin);
    }
    TEST_F(TestBinDisc3Q, EmptyQuantile)
    {
        samples_t X = {};
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
+        ASSERT_EQ(2, cuts.size());
-        EXPECT_EQ(1, cuts.size());
+        EXPECT_NEAR(0, cuts.at(0), margin);
        EXPECT_NEAR(0, cuts.at(1), margin);
    }
    TEST(TestBinDisc3, ExceptionNumberBins)
    {
@@ -159,44 +171,41 @@ namespace mdlp {
        samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_NEAR(1.66667, cuts[0], margin);
+        ASSERT_EQ(4, cuts.size());
-        EXPECT_NEAR(2.33333, cuts[1], margin);
+        EXPECT_NEAR(1, cuts.at(0), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_NEAR(1.66667, cuts.at(1), margin);
-        EXPECT_EQ(3, cuts.size());
+        EXPECT_NEAR(2.33333, cuts.at(2), margin);
        EXPECT_NEAR(3.0, cuts.at(3), margin);
        auto labels = transform(X);
        labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 };
        EXPECT_EQ(expected, labels);
-        EXPECT_EQ(3.0, X[0]); // X is not modified
+        ASSERT_EQ(3.0, X[0]); // X is not modified
    }
    TEST_F(TestBinDisc3Q, EasyRepeated)
    {
        samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
        fit(X);
        auto cuts = getCutPoints();
-        std::cout << "cuts: ";
+        ASSERT_EQ(3, cuts.size());
-        for (auto cut : cuts) {
+        EXPECT_NEAR(1, cuts.at(0), margin);
-            std::cout << cut << " ";
+        EXPECT_NEAR(1.66667, cuts.at(1), margin);
-        }
+        EXPECT_NEAR(3.0, cuts.at(2), margin);
        std::cout << std::endl;
        std::cout << std::string(80, '-') << std::endl;
        EXPECT_NEAR(1.66667, cuts[0], margin);
        EXPECT_EQ(numeric_limits<float>::max(), cuts[1]);
        EXPECT_EQ(2, cuts.size());
        auto labels = transform(X);
        labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 };
        EXPECT_EQ(expected, labels);
-        EXPECT_EQ(3.0, X[0]); // X is not modified
+        ASSERT_EQ(3.0, X[0]); // X is not modified
    }
    TEST_F(TestBinDisc4U, Easy4BinsUniform)
    {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(3.75, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(6.5, cuts[1]);
+        EXPECT_NEAR(1.0, cuts.at(0), margin);
-        EXPECT_EQ(9.25, cuts[2]);
+        EXPECT_NEAR(3.75, cuts.at(1), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_NEAR(6.5, cuts.at(2), margin);
-        EXPECT_EQ(4, cuts.size());
+        EXPECT_NEAR(9.25, cuts.at(3), margin);
        EXPECT_NEAR(12.0, cuts.at(4), margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
        EXPECT_EQ(expected, labels);
@@ -206,11 +215,12 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(3.75, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(6.5, cuts[1]);
+        EXPECT_NEAR(1.0, cuts.at(0), margin);
-        EXPECT_EQ(9.25, cuts[2]);
+        EXPECT_NEAR(3.75, cuts.at(1), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_NEAR(6.5, cuts.at(2), margin);
-        EXPECT_EQ(4, cuts.size());
+        EXPECT_NEAR(9.25, cuts.at(3), margin);
        EXPECT_NEAR(12.0, cuts.at(4), margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
        EXPECT_EQ(expected, labels);
@@ -220,13 +230,14 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(4.0, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(7.0, cuts[1]);
+        EXPECT_NEAR(1.0, cuts.at(0), margin);
-        EXPECT_EQ(10.0, cuts[2]);
+        EXPECT_NEAR(4.0, cuts.at(1), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_NEAR(7.0, cuts.at(2), margin);
-        EXPECT_EQ(4, cuts.size());
+        EXPECT_NEAR(10.0, cuts.at(3), margin);
        EXPECT_NEAR(13.0, cuts.at(4), margin);
        auto labels = transform(X);
-        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
        EXPECT_EQ(expected, labels);
    }
    TEST_F(TestBinDisc4Q, X13BinsQuantile)
@@ -234,13 +245,14 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(4.0, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(7.0, cuts[1]);
+        EXPECT_NEAR(1.0, cuts.at(0), margin);
-        EXPECT_EQ(10.0, cuts[2]);
+        EXPECT_NEAR(4.0, cuts.at(1), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_NEAR(7.0, cuts.at(2), margin);
-        EXPECT_EQ(4, cuts.size());
+        EXPECT_NEAR(10.0, cuts.at(3), margin);
        EXPECT_NEAR(13.0, cuts.at(4), margin);
        auto labels = transform(X);
-        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
        EXPECT_EQ(expected, labels);
    }
    TEST_F(TestBinDisc4U, X14BinsUniform)
@@ -248,11 +260,12 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(4.25, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(7.5, cuts[1]);
+        EXPECT_NEAR(1.0, cuts.at(0), margin);
-        EXPECT_EQ(10.75, cuts[2]);
+        EXPECT_NEAR(4.25, cuts.at(1), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_NEAR(7.5, cuts.at(2), margin);
-        EXPECT_EQ(4, cuts.size());
+        EXPECT_NEAR(10.75, cuts.at(3), margin);
        EXPECT_NEAR(14.0, cuts.at(4), margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
        EXPECT_EQ(expected, labels);
@@ -262,11 +275,12 @@ namespace mdlp {
        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(4.25, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(7.5, cuts[1]);
+        EXPECT_NEAR(1.0, cuts.at(0), margin);
-        EXPECT_EQ(10.75, cuts[2]);
+        EXPECT_NEAR(4.25, cuts.at(1), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_NEAR(7.5, cuts.at(2), margin);
-        EXPECT_EQ(4, cuts.size());
+        EXPECT_NEAR(10.75, cuts.at(3), margin);
        EXPECT_NEAR(14.0, cuts.at(4), margin);
        auto labels = transform(X);
        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
        EXPECT_EQ(expected, labels);
@@ -276,13 +290,14 @@ namespace mdlp {
        samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(4.5, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(8, cuts[1]);
+        EXPECT_NEAR(1.0, cuts.at(0), margin);
-        EXPECT_EQ(11.5, cuts[2]);
+        EXPECT_NEAR(4.5, cuts.at(1), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_NEAR(8, cuts.at(2), margin);
-        EXPECT_EQ(4, cuts.size());
+        EXPECT_NEAR(11.5, cuts.at(3), margin);
        EXPECT_NEAR(15.0, cuts.at(4), margin);
        auto labels = transform(X);
-        labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
+        labels_t expected = { 3, 1, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
        EXPECT_EQ(expected, labels);
    }
    TEST_F(TestBinDisc4Q, X15BinsQuantile)
@@ -290,13 +305,14 @@ namespace mdlp {
        samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(4.5, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(8, cuts[1]);
+        EXPECT_NEAR(1.0, cuts.at(0), margin);
-        EXPECT_EQ(11.5, cuts[2]);
+        EXPECT_NEAR(4.5, cuts.at(1), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_NEAR(8, cuts.at(2), margin);
-        EXPECT_EQ(4, cuts.size());
+        EXPECT_NEAR(11.5, cuts.at(3), margin);
        EXPECT_NEAR(15.0, cuts.at(4), margin);
        auto labels = transform(X);
-        labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 };
+        labels_t expected = { 3, 3, 3, 3, 1, 0, 1, 2, 2, 2, 1, 0, 0, 1, 0 };
        EXPECT_EQ(expected, labels);
    }
    TEST_F(TestBinDisc4U, RepeatedValuesUniform)
@@ -305,13 +321,14 @@ namespace mdlp {
        //               0    1     2   3    4    5    6    7    8    9
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(1.0, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(2.0, cuts[1]);
+        EXPECT_NEAR(0.0, cuts.at(0), margin);
-        EXPECT_EQ(3.0, cuts[2]);
+        EXPECT_NEAR(1.0, cuts.at(1), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_NEAR(2.0, cuts.at(2), margin);
-        EXPECT_EQ(4, cuts.size());
+        EXPECT_NEAR(3.0, cuts.at(3), margin);
        EXPECT_NEAR(4.0, cuts.at(4), margin);
        auto labels = transform(X);
-        labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 };
        EXPECT_EQ(expected, labels);
    }
    TEST_F(TestBinDisc4Q, RepeatedValuesQuantile)
@@ -320,12 +337,14 @@ namespace mdlp {
        //               0    1     2   3    4    5    6    7    8    9
        fit(X);
        auto cuts = getCutPoints();
-        EXPECT_EQ(2.0, cuts[0]);
+        ASSERT_EQ(5, cuts.size());
-        EXPECT_EQ(3.0, cuts[1]);
+        EXPECT_NEAR(0.0, cuts.at(0), margin);
-        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_NEAR(1.0, cuts.at(1), margin);
-        EXPECT_EQ(3, cuts.size());
+        EXPECT_NEAR(2.0, cuts.at(2), margin);
        EXPECT_NEAR(3.0, cuts.at(3), margin);
        EXPECT_NEAR(4.0, cuts.at(4), margin);
        auto labels = transform(X);
-        labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 };
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 };
        EXPECT_EQ(expected, labels);
    }
    TEST_F(TestBinDisc4U, irisUniform)
@@ -337,6 +356,13 @@ namespace mdlp {
        auto Xt = transform(X[0]);
        labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
        EXPECT_EQ(expected, Xt);
        auto Xtt = fit_transform(X[0], file.getY());
        EXPECT_EQ(expected, Xtt);
        auto Xt_t = torch::tensor(X[0], torch::kFloat32);
        auto y_t = torch::tensor(file.getY(), torch::kInt32);
        auto Xtt_t = fit_transform_t(Xt_t, y_t);
        for (int i = 0; i < expected.size(); i++)
            EXPECT_EQ(expected[i], Xtt_t[i].item<int>());
    }
    TEST_F(TestBinDisc4Q, irisQuantile)
    {
@@ -347,5 +373,44 @@ namespace mdlp {
        auto Xt = transform(X[0]);
        labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
        EXPECT_EQ(expected, Xt);
        auto Xtt = fit_transform(X[0], file.getY());
        EXPECT_EQ(expected, Xtt);
        auto Xt_t = torch::tensor(X[0], torch::kFloat32);
        auto y_t = torch::tensor(file.getY(), torch::kInt32);
        auto Xtt_t = fit_transform_t(Xt_t, y_t);
        for (int i = 0; i < expected.size(); i++)
            EXPECT_EQ(expected[i], Xtt_t[i].item<int>());
        fit_t(Xt_t, y_t);
        auto Xt_t2 = transform_t(Xt_t);
        for (int i = 0; i < expected.size(); i++)
            EXPECT_EQ(expected[i], Xt_t2[i].item<int>());
    }
    TEST(TestBinDiscGeneric, Fileset)
    {
        Experiments exps(data_path + "tests.txt");
        int num = 0;
        while (exps.is_next()) {
            Experiment exp = exps.next();
            std::cout << "Exp #: " << ++num << " From: " << exp.from_ << " To: " << exp.to_ << " Step: " << exp.step_ << " Bins: " << exp.n_bins_ << " Strategy: " << exp.strategy_ << std::endl;
            BinDisc disc(exp.n_bins_, exp.strategy_ == "Q" ? strategy_t::QUANTILE : strategy_t::UNIFORM);
            std::vector<float> test;
            for (float i = exp.from_; i < exp.to_; i += exp.step_) {
                test.push_back(i);
            }
            // show_vector(test, "Test");
            auto empty = std::vector<int>();
            auto Xt = disc.fit_transform(test, empty);
            auto cuts = disc.getCutPoints();
            EXPECT_EQ(exp.discretized_data_.size(), Xt.size());
            for (int i = 0; i < exp.discretized_data_.size(); ++i) {
                if (exp.discretized_data_.at(i) != Xt.at(i)) {
                    std::cout << "Error at " << i << " Expected: " << exp.discretized_data_.at(i) << " Got: " << Xt.at(i) << std::endl;
                }
            }
            EXPECT_EQ(exp.cutpoints_.size(), cuts.size());
            for (int i = 0; i < exp.cutpoints_.size(); ++i) {
                EXPECT_NEAR(exp.cutpoints_.at(i), cuts.at(i), margin);
            }
        }
    }
 }
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,10 +1,8 @@
 cmake_minimum_required(VERSION 3.20)
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
 cmake_policy(SET CMP0135 NEW)
 include(FetchContent)
 include_directories(${GTEST_INCLUDE_DIRS})
 FetchContent_Declare(
        googletest
        URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
@@ -13,22 +11,35 @@ FetchContent_Declare(
 set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
 FetchContent_MakeAvailable(googletest)
 find_package(Torch REQUIRED)
 enable_testing()
 include_directories(${TORCH_INCLUDE_DIRS})
 add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
 add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
 add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp)
 target_link_libraries(Metrics_unittest GTest::gtest_main)
 target_link_libraries(FImdlp_unittest GTest::gtest_main)
 target_link_libraries(BinDisc_unittest GTest::gtest_main)
 target_compile_options(Metrics_unittest PRIVATE --coverage)
 target_compile_options(FImdlp_unittest PRIVATE --coverage)
 target_compile_options(BinDisc_unittest PRIVATE --coverage)
 target_link_options(Metrics_unittest PRIVATE --coverage)
 add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp ../Discretizer.cpp)
 target_link_libraries(FImdlp_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
 target_compile_options(FImdlp_unittest PRIVATE --coverage)
 target_link_options(FImdlp_unittest PRIVATE --coverage)
 add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp ../Discretizer.cpp)
 target_link_libraries(BinDisc_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
 target_compile_options(BinDisc_unittest PRIVATE --coverage)
 target_link_options(BinDisc_unittest PRIVATE --coverage)
 add_executable(Discretizer_unittest ../BinDisc.cpp ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp ../Discretizer.cpp Discretizer_unittest.cpp)
 target_link_libraries(Discretizer_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
 target_compile_options(Discretizer_unittest PRIVATE --coverage)
 target_link_options(Discretizer_unittest PRIVATE --coverage)
 include(GoogleTest)
 gtest_discover_tests(Metrics_unittest)
 gtest_discover_tests(FImdlp_unittest)
 gtest_discover_tests(BinDisc_unittest)
 gtest_discover_tests(Discretizer_unittest)
--- a/tests/Discretizer_unittest.cpp
+++ b/tests/Discretizer_unittest.cpp
@@ -0,0 +1,83 @@
 #include <fstream>
 #include <string>
 #include <iostream>
 #include "gtest/gtest.h"
 #include "ArffFiles.h"
 #include "../Discretizer.h"
 #include "../BinDisc.h"
 #include "../CPPFImdlp.h"
 namespace mdlp {
    const float margin = 1e-4;
    static std::string set_data_path()
    {
        std::string path = "../datasets/";
        std::ifstream file(path + "iris.arff");
        if (file.is_open()) {
            file.close();
            return path;
        }
        return "../../tests/datasets/";
    }
    const std::string data_path = set_data_path();
    TEST(Discretizer, Version)
    {
        Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
        auto version = disc->version();
        delete disc;
        std::cout << "Version computed: " << version;
        EXPECT_EQ("1.2.3", version);
    }
    TEST(Discretizer, BinIrisUniform)
    {
        ArffFiles file;
        Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
        file.load(data_path + "iris.arff", true);
        vector<samples_t>& X = file.getX();
        auto y = labels_t();
        disc->fit(X[0], y);
        auto Xt = disc->transform(X[0]);
        labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
        delete disc;
        EXPECT_EQ(expected, Xt);
    }
    TEST(Discretizer, BinIrisQuantile)
    {
        ArffFiles file;
        Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
        file.load(data_path + "iris.arff", true);
        vector<samples_t>& X = file.getX();
        auto y = labels_t();
        disc->fit(X[0], y);
        auto Xt = disc->transform(X[0]);
        labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
        delete disc;
        EXPECT_EQ(expected, Xt);
    }
    TEST(Discretizer, FImdlpIris)
    {
        labels_t expected = {
            5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
            5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5,
            5, 3, 5, 4, 5, 4, 4, 4, 4, 0, 1, 1, 4, 0, 2, 0, 0, 3, 0, 2, 2, 4,
            3, 0, 0, 0, 4, 1, 0, 1, 2, 3, 1, 3, 2, 0, 0, 0, 0, 0, 3, 5, 4, 0,
            3, 0, 0, 3, 0, 0, 0, 3, 2, 2, 0, 1, 4, 0, 3, 2, 3, 3, 0, 2, 0, 5,
            4, 0, 3, 0, 1, 4, 3, 5, 0, 0, 4, 1, 1, 0, 4, 4, 1, 3, 1, 3, 1, 5,
            1, 1, 0, 3, 5, 4, 3, 4, 4, 4, 0, 4, 4, 3, 0, 3, 5, 3
        };
        ArffFiles file;
        Discretizer* disc = new CPPFImdlp();
        file.load(data_path + "iris.arff", true);
        vector<samples_t>& X = file.getX();
        labels_t& y = file.getY();
        disc->fit(X[1], y);
        auto computed = disc->transform(X[1]);
        delete disc;
        EXPECT_EQ(computed.size(), expected.size());
        for (unsigned long i = 0; i < computed.size(); i++) {
            EXPECT_EQ(computed[i], expected[i]);
        }
    }
 }
--- a/tests/Experiments.hpp
+++ b/tests/Experiments.hpp
@@ -0,0 +1,108 @@
 #ifndef EXPERIMENTS_HPP
 #define EXPERIMENTS_HPP
 #include<sstream>
 #include<iostream>
 #include<string>
 #include<fstream>
 #include<vector>
 #include<tuple>
 #include "../typesFImdlp.h"
 class Experiment {
 public:
    Experiment(float from_, float to_, float step_, int n_bins, std::string strategy, std::vector<int> data_discretized, std::vector<float> cutpoints) :
        from_{ from_ }, to_{ to_ }, step_{ step_ }, n_bins_{ n_bins }, strategy_{ strategy }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }
    {
        if (strategy != "Q" && strategy != "U") {
            throw std::invalid_argument("Invalid strategy " + strategy);
        }
    }
    float from_;
    float to_;
    float step_;
    int n_bins_;
    std::string strategy_;
    std::vector<int> discretized_data_;
    std::vector<float> cutpoints_;
 };
 class Experiments {
 public:
    Experiments(const std::string filename) : filename{ filename }
    {
        test_file.open(filename);
        if (!test_file.is_open()) {
            throw std::runtime_error("File " + filename + " not found");
        }
        exp_end = false;
    }
    ~Experiments()
    {
        test_file.close();
    }
    bool end() const
    {
        return exp_end;
    }
    bool is_next()
    {
        while (std::getline(test_file, line) && line[0] == '#');
        if (test_file.eof()) {
            exp_end = true;
            return false;
        }
        return true;
    }
    Experiment next()
    {
        return parse_experiment(line);
    }
 private:
    std::tuple<float, float, float, int, std::string> parse_header(const std::string& line)
    {
        std::istringstream iss(line);
        std::string from_, to_, step_, n_bins, strategy;
        iss >> from_ >> to_ >> step_ >> n_bins >> strategy;
        return { std::stof(from_), std::stof(to_), std::stof(step_), std::stoi(n_bins), strategy };
    }
    template <typename T>
    std::vector<T> parse_vector(const std::string& line)
    {
        std::istringstream iss(line);
        std::vector<T> data;
        std::string d;
        while (iss >> d) {
            data.push_back(std::is_same<T, float>::value ? std::stof(d) : std::stoi(d));
        }
        return data;
    }
    Experiment parse_experiment(std::string& line)
    {
        if (line == "RANGE") {
            std::getline(test_file, line);
            auto [from_, to_, step_, n_bins, strategy] = parse_header(line);
        } else {
            std::getline(test_file, line);
        }
        std::getline(test_file, line);
        auto data_discretized = parse_vector<int>(line);
        std::getline(test_file, line);
        auto cutpoints = parse_vector<float>(line);
        return Experiment{ from_, to_, step_, n_bins, strategy, data_discretized, cutpoints };
    }
    std::ifstream test_file;
    std::string filename;
    std::string line;
    bool exp_end;
 };
 template <typename T>
 void show_vector(const std::vector<T>& data, std::string title)
 {
    std::cout << title << ": ";
    std::string sep = "";
    for (const auto& d : data) {
        std::cout << sep << d;
        sep = ", ";
    }
    std::cout << std::endl;
 }
 #endif
--- a/tests/FImdlp_unittest.cpp
+++ b/tests/FImdlp_unittest.cpp
@@ -124,7 +124,7 @@ namespace mdlp {
    {
        samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 };
        labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 };
-        cutPoints_t expected = { 1.5f, 2.5f };
+        cutPoints_t expected = { 1.0, 1.5f, 2.5f, 4.0 };
        fit(X_, y_);
        auto computed = getCutPoints();
        EXPECT_EQ(computed.size(), expected.size());
@@ -167,29 +167,31 @@ namespace mdlp {
        y = { 1 };
        fit(X, y);
        computed = getCutPoints();
-        EXPECT_EQ(computed.size(), 0);
+        EXPECT_EQ(computed.size(), 2);
        X = { 1, 3 };
        y = { 1, 2 };
        fit(X, y);
        computed = getCutPoints();
-        EXPECT_EQ(computed.size(), 0);
+        EXPECT_EQ(computed.size(), 2);
        X = { 2, 4 };
        y = { 1, 2 };
        fit(X, y);
        computed = getCutPoints();
-        EXPECT_EQ(computed.size(), 0);
+        EXPECT_EQ(computed.size(), 2);
        X = { 1, 2, 3 };
        y = { 1, 2, 2 };
        fit(X, y);
        computed = getCutPoints();
-        EXPECT_EQ(computed.size(), 1);
+        EXPECT_EQ(computed.size(), 3);
-        EXPECT_NEAR(computed[0], 1.5, precision);
+        EXPECT_NEAR(computed[0], 1, precision);
        EXPECT_NEAR(computed[1], 1.5, precision);
        EXPECT_NEAR(computed[2], 3, precision);
    }
    TEST_F(TestFImdlp, TestArtificialDataset)
    {
        fit(X, y);
-        cutPoints_t expected = { 5.05f };
+        cutPoints_t expected = { 4.7, 5.05, 6.0 };
        vector<precision_t> computed = getCutPoints();
        EXPECT_EQ(computed.size(), expected.size());
        for (unsigned long i = 0; i < computed.size(); i++) {
@@ -200,10 +202,10 @@ namespace mdlp {
    TEST_F(TestFImdlp, TestIris)
    {
        vector<cutPoints_t> expected = {
-                {5.45f, 5.75f},
+                {4.3, 5.45f, 5.75f, 7.9},
-                {2.75f, 2.85f, 2.95f, 3.05f, 3.35f},
+                {2, 2.75f, 2.85f, 2.95f, 3.05f, 3.35f, 4.4},
-                {2.45f, 4.75f, 5.05f},
+                {1, 2.45f, 4.75f, 5.05f, 6.9},
-                {0.8f,  1.75f}
+                {0.1, 0.8f,  1.75f, 2.5}
        };
        vector<int> depths = { 3, 5, 4, 3 };
        auto test = CPPFImdlp();
@@ -213,7 +215,7 @@ namespace mdlp {
    TEST_F(TestFImdlp, ComputeCutPointsGCase)
    {
        cutPoints_t expected;
-        expected = { 1.5 };
+        expected = { 0, 1.5, 2 };
        samples_t X_ = { 0, 1, 2, 2, 2 };
        labels_t y_ = { 1, 1, 1, 2, 2 };
        fit(X_, y_);
@@ -247,10 +249,10 @@ namespace mdlp {
        // Set max_depth to 1
        auto test = CPPFImdlp(3, 1, 0);
        vector<cutPoints_t> expected = {
-                {5.45f},
+                {4.3, 5.45f, 7.9},
-                {3.35f},
+                {2, 3.35f, 4.4},
-                {2.45f},
+                {1, 2.45f, 6.9},
-                {0.8f}
+                {0.1, 0.8f, 2.5}
        };
        vector<int> depths = { 1, 1, 1, 1 };
        test_dataset(test, "iris", expected, depths);
@@ -261,10 +263,10 @@ namespace mdlp {
        auto test = CPPFImdlp(75, 100, 0);
        // Set min_length to 75
        vector<cutPoints_t> expected = {
-                {5.45f, 5.75f},
+                {4.3, 5.45f, 5.75f, 7.9},
-                {2.85f, 3.35f},
+                {2, 2.85f, 3.35f, 4.4},
-                {2.45f, 4.75f},
+                {1, 2.45f, 4.75f, 6.9},
-                {0.8f,  1.75f}
+                {0.1, 0.8f,  1.75f, 2.5}
        };
        vector<int> depths = { 3, 2, 2, 2 };
        test_dataset(test, "iris", expected, depths);
@@ -275,10 +277,10 @@ namespace mdlp {
        // Set min_length to 75
        auto test = CPPFImdlp(75, 2, 0);
        vector<cutPoints_t> expected = {
-                {5.45f, 5.75f},
+                {4.3, 5.45f, 5.75f, 7.9},
-                {2.85f, 3.35f},
+                {2, 2.85f, 3.35f, 4.4},
-                {2.45f, 4.75f},
+                {1, 2.45f, 4.75f, 6.9},
-                {0.8f,  1.75f}
+                {0.1, 0.8f,  1.75f, 2.5}
        };
        vector<int> depths = { 2, 2, 2, 2 };
        test_dataset(test, "iris", expected, depths);
@@ -289,10 +291,10 @@ namespace mdlp {
        // Set min_length to 75
        auto test = CPPFImdlp(75, 2, 1);
        vector<cutPoints_t> expected = {
-                {5.45f},
+                {4.3, 5.45f, 7.9},
-                {2.85f},
+                {2, 2.85f, 4.4},
-                {2.45f},
+                {1, 2.45f, 6.9},
-                {0.8f}
+                {0.1, 0.8f, 2.5}
        };
        vector<int> depths = { 2, 2, 2, 2 };
        test_dataset(test, "iris", expected, depths);
@@ -304,10 +306,10 @@ namespace mdlp {
        // Set min_length to 75
        auto test = CPPFImdlp(75, 2, 0.2f);
        vector<cutPoints_t> expected = {
-                {5.45f, 5.75f},
+                {4.3, 5.45f, 5.75f, 7.9},
-                {2.85f, 3.35f},
+                {2, 2.85f, 3.35f, 4.4},
-                {2.45f, 4.75f},
+                {1, 2.45f, 4.75f, 6.9},
-                {0.8f,  1.75f}
+                {0.1, 0.8f,  1.75f, 2.5}
        };
        vector<int> depths = { 2, 2, 2, 2 };
        test_dataset(test, "iris", expected, depths);
@@ -327,7 +329,6 @@ namespace mdlp {
            computed = compute_max_num_cut_points();
            ASSERT_EQ(expected, computed);
        }
    }
    TEST_F(TestFImdlp, TransformTest)
    {
@@ -350,5 +351,10 @@ namespace mdlp {
        for (unsigned long i = 0; i < computed.size(); i++) {
            EXPECT_EQ(computed[i], expected[i]);
        }
        auto computed_ft = fit_transform(X[1], y);
        EXPECT_EQ(computed_ft.size(), expected.size());
        for (unsigned long i = 0; i < computed_ft.size(); i++) {
            EXPECT_EQ(computed_ft[i], expected[i]);
        }
    }
 }
--- a/tests/Metrics_unittest.cpp
+++ b/tests/Metrics_unittest.cpp
@@ -2,13 +2,13 @@
 #include "../Metrics.h"
 namespace mdlp {
-    class TestMetrics: public Metrics, public testing::Test {
+    class TestMetrics : public Metrics, public testing::Test {
    public:
        labels_t y_ = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
        indices_t indices_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
-        precision_t precision = 0.000001f;
+        precision_t precision = 1e-6;
-        TestMetrics(): Metrics(y_, indices_) {};
+        TestMetrics() : Metrics(y_, indices_) {};
        void SetUp() override
        {
--- a/tests/datasets/tests.txt
+++ b/tests/datasets/tests.txt
@@ -0,0 +1,149 @@
 #
 # from, to, step, #bins, Q/U
 # discretized data
 # cut points
 #
 RANGE
 0, 100, 1, 4, Q
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 0.0, 24.75, 49.5, 74.25, 99.0
 RANGE
 0, 50, 1, 4, Q
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 0.0, 12.25, 24.5, 36.75, 49.0
 RANGE
 0, 100, 1, 3, Q
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 0.0, 33.0, 66.0, 99.0
 RANGE
 0, 50, 1, 3, Q
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 0.0, 16.33333, 32.66667, 49.0
 RANGE
 0, 10, 1, 3, Q
 0, 0, 0, 0, 1, 1, 1, 2, 2, 2
 0.0, 3.0, 6.0, 9.0
 RANGE
 0, 100, 1, 4, U
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 0.0, 24.75, 49.5, 74.25, 99.0
 RANGE
 0, 50, 1, 4, U
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
 0.0, 12.25, 24.5, 36.75, 49.0
 RANGE
 0, 100, 1, 3, U
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 0.0, 33.0, 66.0, 99.0
 RANGE
 0, 50, 1, 3, U
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 0.0, 16.33333, 32.66667, 49.0
 RANGE
 0, 10, 1, 3, U
 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
 0.0, 3.0, 6.0, 9.0
 RANGE
 1, 10, 1, 3, Q
 0, 0, 0, 1, 1, 1, 2, 2, 2
 1.0, 3.66667, 6.33333, 9.0
 RANGE
 1, 10, 1, 3, U
 0, 0, 0, 1, 1, 1, 2, 2, 2
 1.0, 3.66667, 6.33333, 9.0
 RANGE
 1, 11, 1, 3, Q
 0, 0, 0, 1, 1, 1, 1, 2, 2, 2
 1.0, 4.0, 7.0, 10.0
 RANGE
 1, 11, 1, 3, U
 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
 1.0, 4.0, 7.0, 10.0
 RANGE
 1, 12, 1, 3, Q
 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
 1.0, 4.33333, 7.66667, 11.0
 RANGE
 1, 12, 1, 3, U
 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
 1.0, 4.33333, 7.66667, 11.0
 RANGE
 1, 13, 1, 3, Q
 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
 1.0, 4.66667, 8.33333, 12.0
 RANGE
 1, 13, 1, 3, U
 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
 1.0, 4.66667, 8.33333, 12.0
 RANGE
 1, 14, 1, 3, Q
 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.0, 9.0, 13.0
 RANGE
 1, 14, 1, 3, U
 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.0, 9.0, 13.0
 RANGE
 1, 15, 1, 3, Q
 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.33333, 9.66667, 14.0
 RANGE
 1, 15, 1, 3, U
 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.33333, 9.66667, 14.0
 VECTOR
 Q3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
 1, 0, 0, 1, 0, 0, 1, 0, 0
 1.0, 1.66667, 3.0
 VECTOR
 U3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
 2, 0, 0, 2, 0, 0, 2, 0, 0
 1.0, 1.66667, 2.33333, 3.0
 VECTOR
 Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
 1.0, 4.66667, 8.33333, 12.0
 VECTOR
 U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
 1.0, 4.66667, 8.33333, 12.0
 VECTOR
 Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.0, 9.0, 13.0
 VECTOR
 U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.0, 9.0, 13.0
 VECTOR
 Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.33333, 9.66667, 14.0
 VECTOR
 U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.33333, 9.66667, 14.0
 VECTOR
 Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.66667, 10.33333, 15.0
 VECTOR
 U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
 1.0, 5.66667, 10.33333, 15.0
 VECTOR
 Q3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
 2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
 1.0, 5.66667, 10.33333, 15.0
 VECTOR
 U3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
 2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
 1.0, 5.66667, 10.33333, 15.0
 VECTOR
 Q3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
 0, 0, 0, 0, 1, 1, 2, 2, 2, 2
 0.0, 1.0, 3.0, 4.0
 VECTOR
 U3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
 0, 0, 0, 0, 1, 1, 2, 2, 2, 2
 0.0, 1.33333, 2.66667, 4.0
--- a/tests/k
+++ b/tests/k
--- a/tests/k.cpp
+++ b/tests/k.cpp
@@ -0,0 +1,32 @@
 #include <iostream>
 #include <vector>
 #include <algorithm> // For std::lower_bound
 std::vector<int> searchsorted(const std::vector<float>& cuts, const std::vector<float>& data) {
    std::vector<int> indices;
    indices.reserve(data.size());
    for (const float& value : data) {
        // Find the first position in 'a' where 'value' could be inserted to maintain order
        auto it = std::lower_bound(cuts.begin(), cuts.end(), value);
        // Calculate the index
        int index = it - cuts.begin();
        indices.push_back(index);
    }
    return indices;
 }
 int main() {
    std::vector<float> cuts = { 10.0 };
    std::vector<float> data = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
    std::vector<int> result = searchsorted(cuts, data);
    for (int idx : result) {
        std::cout << idx << " ";
    }
    return 0;
 }
--- a/tests/t
+++ b/tests/t
--- a/tests/t.cpp
+++ b/tests/t.cpp
@@ -0,0 +1,102 @@
 #include <iostream>
 #include <algorithm>
 #include <cmath>
 #include <vector>
 #include <string>
 typedef float precision_t;
 std::vector<int> transform(const std::vector<float> cutPoints, const std::vector<float>& data)
 {
    std::vector<int> discretizedData;
    discretizedData.reserve(data.size());
    for (const float& item : data) {
        auto upper = std::lower_bound(cutPoints.begin(), cutPoints.end(), item);
        discretizedData.push_back(upper - cutPoints.begin());
    }
    return discretizedData;
 }
 template <typename T>
 void show_vector(const std::vector<T>& data, std::string title)
 {
    std::cout << title << ": ";
    std::string sep = "";
    for (const auto& d : data) {
        std::cout << sep << d;
        sep = ", ";
    }
    std::cout << std::endl;
 }
 std::vector<precision_t> linspace(precision_t start, precision_t end, int num)
 {
    if (start == end) {
        return { start, end };
    }
    precision_t delta = (end - start) / static_cast<precision_t>(num - 1);
    std::vector<precision_t> linspc;
    for (size_t i = 0; i < num - 1; ++i) {
        precision_t val = start + delta * static_cast<precision_t>(i);
        linspc.push_back(val);
    }
    return linspc;
 }
 size_t clip(const size_t n, size_t lower, size_t upper)
 {
    return std::max(lower, std::min(n, upper));
 }
 std::vector<precision_t> percentile(std::vector<precision_t>& data, std::vector<precision_t>& percentiles)
 {
    // Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html
    std::vector<precision_t> results;
    results.reserve(percentiles.size());
    for (auto percentile : percentiles) {
        const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
        const auto indexLower = clip(i, 0, data.size() - 2);
        const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
        const double fraction =
            (percentile / 100.0 - percentI) /
            (static_cast<double>(indexLower + 1) / static_cast<double>(data.size() - 1) - percentI);
        const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction;
        if (value != results.back())
            results.push_back(value);
    }
    return results;
 }
 int main()
 {
    // std::vector<float> test;
    // std::vector<float> cuts = { 0, 24.75, 49.5, 74.25, 10000 };
    // for (int i = 0; i < 100; ++i) {
    //     test.push_back(i);
    // }
    // auto Xt = transform(cuts, test);
    // show_vector(Xt, "Discretized data:");
    // std::vector<float> test2 = { 0,1,2,3,4,5,6,7,8,9,10,11 };
    // std::vector<float> cuts2 = { 0,1,2,3,4,5,6,7,8,9 };
    // auto Xt2 = transform(cuts2, test2);
    // show_vector(Xt2, "discretized data2: ");
    auto quantiles = linspace(0.0, 100.0, 3 + 1);
    std::vector<float> data = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
    std::vector<float> cutPoints;
    std::sort(data.begin(), data.end());
    cutPoints = percentile(data, quantiles);
    cutPoints.push_back(std::numeric_limits<precision_t>::max());
    data.push_back(15);
    data.push_back(0);
    cutPoints.pop_back();
    cutPoints.erase(cutPoints.begin());
    cutPoints.clear();
    cutPoints.push_back(9.0);
    auto Xt = transform(cutPoints, data);
    show_vector(data, "Original data");
    show_vector(Xt, "Discretized data");
    show_vector(cutPoints, "Cutpoints");
    return 0;
 }
 /*
 n_bins = 3
 data = [1,2,3,4,5,6,7,8,9,10]
 quantiles = np.linspace(0, 100, n_bins + 1)
 bin_edges = np.percentile(data, quantiles)
 */
--- a/tests/test
+++ b/tests/test
@@ -1,18 +1,15 @@
 #!/bin/bash
-if [ -d build ] ; then
+if [ -d build ] && [ "$1" != "run" ]; then
 	rm -fr build
 fi
 if [ -d gcovr-report ] ; then
 	rm -fr gcovr-report
 fi
-cmake -S . -B build -Wno-dev 
+cmake -S . -B build -Wno-dev -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="--coverage" -DCMAKE_C_FLAGS="--coverage"
 cmake --build build
 cd build
 ctest --output-on-failure
 cd ..
 mkdir gcovr-report
 #lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
 #lcov --remove lcoverage/main_coverage.info 'v1/*' '/Applications/*' '*/tests/*' --output-file lcoverage/main_coverage.info -q
 #lcov --list lcoverage/main_coverage.info
 cd ..
-gcovr  --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --gcov-filter "BinDisc.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml --exclude-noncode-lines
+gcovr  --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --gcov-filter "BinDisc.cpp" --gcov-filter "Discretizer.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml --exclude-noncode-lines
--- a/tests/testKbins.py
+++ b/tests/testKbins.py
@@ -89,6 +89,7 @@ print(f"Quaintile {clf4q.bin_edges_=}")
 print("-" * 80)
 #
 data, meta = loadarff("tests/datasets/iris.arff")
 labelsu = [
    0,
    0,
@@ -117,12 +118,12 @@ labelsu = [
    0,
    0,
    0,
-    1,
+    0,
-    1,
+    0,
    0,
    0,
    1,
-    1,
+    0,
    1,
    0,
    0,
@@ -149,11 +150,11 @@ labelsu = [
    2,
    0,
    2,
-    1,
+    0,
    0,
    1,
    1,
-    2,
+    1,
    1,
    2,
    1,
@@ -161,9 +162,9 @@ labelsu = [
    2,
    1,
    1,
    1,
    2,
-    2,
+    1,
    2,
    2,
    2,
    2,
@@ -181,7 +182,7 @@ labelsu = [
    1,
    1,
    1,
-    2,
+    1,
    1,
    0,
    1,
@@ -217,14 +218,14 @@ labelsu = [
    2,
    3,
    2,
-    2,
+    1,
    2,
    3,
    3,
    3,
    2,
    2,
-    2,
+    1,
    3,
    2,
    2,
@@ -393,12 +394,19 @@ labelsq = [
    2,
    2,
 ]
-test(clf4u, data["sepallength"], labelsu, title="IrisUniform")
+# test(clf4u, data["sepallength"], labelsu, title="IrisUniform")
-test(clf4q, data["sepallength"], labelsq, title="IrisQuantile")
+# test(clf4q, data["sepallength"], labelsq, title="IrisQuantile")
-# print("Labels")
+sepallength = [[x] for x in data["sepallength"]]
-# print(labels)
+clf4u.fit(sepallength)
-# print("Expected")
+clf4q.fit(sepallength)
-# print(expected)
+computedu = clf4u.transform(sepallength)
-# for i in range(len(labels)):
+computedq = clf4q.transform(sepallength)
-#     if labels[i] != expected[i]:
+wrongu = 0
-#         print(f"Error at {i} {labels[i]} != {expected[i]}")
+wrongq = 0
 for i in range(len(labelsu)):
    if labelsu[i] != computedu[i]:
        wrongu += 1
    if labelsq[i] != computedq[i]:
        wrongq += 1
 print(f"Iris sepallength diff. between BinDisc & sklearn::KBins Uniform  ={wrongu:3d}")
 print(f"Iris sepallength diff. between BinDisc & sklearn::KBins Quantile ={wrongq:3d}")
--- a/tests/tests_do.py
+++ b/tests/tests_do.py
@@ -0,0 +1,50 @@
 import json
 from sklearn.preprocessing import KBinsDiscretizer
 with open("datasets/tests.txt") as f:
    data = f.readlines()
 data = [x.strip() for x in data if x[0] != "#"]
 for i in range(0, len(data), 4):
    experiment_type = data[i]
    print("Experiment:", data[i + 1])
    if experiment_type == "RANGE":
        range_data = data[i + 1]
        from_, to_, step_, n_bins_, strategy_ = range_data.split(",")
        X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
    else:
        strategy_ = data[i + 1][0]
        n_bins_ = data[i + 1][1]
        vector = data[i + 1][2:]
        X = [[float(x)] for x in json.loads(vector)]
    strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
    disc = KBinsDiscretizer(
        n_bins=int(n_bins_),
        encode="ordinal",
        strategy=strategy,
    )
    expected_data = data[i + 2]
    cuts_data = data[i + 3]
    disc.fit(X)
    result = disc.transform(X)
    result = [int(x) for x in result.flatten()]
    expected = [int(x) for x in expected_data.split(",")]
    assert len(result) == len(expected)
    for j in range(len(result)):
        if result[j] != expected[j]:
            print("Error at", j, "Expected=", expected[j], "Result=", result[j])
    expected_cuts = disc.bin_edges_[0]
    computed_cuts = [float(x) for x in cuts_data.split(",")]
    assert len(expected_cuts) == len(computed_cuts)
    for j in range(len(expected_cuts)):
        if round(expected_cuts[j], 5) != computed_cuts[j]:
            print(
                "Error at",
                j,
                "Expected=",
                expected_cuts[j],
                "Result=",
                computed_cuts[j],
            )
--- a/tests/tests_generate.ipynb
+++ b/tests/tests_generate.ipynb
@@ -0,0 +1,133 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import KBinsDiscretizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "experiments_range = [\n",
    "    [0, 100, 1, 4, \"Q\"],\n",
    "    [0, 50, 1, 4, \"Q\"],\n",
    "    [0, 100, 1, 3, \"Q\"],\n",
    "    [0, 50, 1, 3, \"Q\"],\n",
    "    [0, 10, 1, 3, \"Q\"],\n",
    "    [0, 100, 1, 4, \"U\"],\n",
    "    [0, 50, 1, 4, \"U\"],\n",
    "    [0, 100, 1, 3, \"U\"],\n",
    "    [0, 50, 1, 3, \"U\"],\n",
    "# \n",
    "    [0, 10, 1, 3, \"U\"],\n",
    "    [1, 10, 1, 3, \"Q\"],\n",
    "    [1, 10, 1, 3, \"U\"],\n",
    "    [1, 11, 1, 3, \"Q\"],\n",
    "    [1, 11, 1, 3, \"U\"],\n",
    "    [1, 12, 1, 3, \"Q\"],\n",
    "    [1, 12, 1, 3, \"U\"],\n",
    "    [1, 13, 1, 3, \"Q\"],\n",
    "    [1, 13, 1, 3, \"U\"],\n",
    "    [1, 14, 1, 3, \"Q\"],\n",
    "    [1, 14, 1, 3, \"U\"],\n",
    "    [1, 15, 1, 3, \"Q\"],\n",
    "    [1, 15, 1, 3, \"U\"]\n",
    "]\n",
    "experiments_vectors = [\n",
    "    (3, [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]),\n",
    "    (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]),\n",
    "    (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]),\n",
    "    (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]),\n",
    "    (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]),\n",
    "    (3, [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]),\n",
    "    (3, [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0])\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/rmontanana/miniconda3/lib/python3.11/site-packages/sklearn/preprocessing/_discretization.py:307: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "def write_lists(file, data, cuts):\n",
    "    sep = \"\"\n",
    "    for res in data:\n",
    "        file.write(f\"{sep}{int(res):d}\")\n",
    "        sep= \", \"\n",
    "    file.write(\"\\n\")\n",
    "    sep = \"\"\n",
    "    for res in cuts:\n",
    "        file.write(sep + str(round(res,5)))\n",
    "        sep = \", \"\n",
    "    file.write(\"\\n\")\n",
    "\n",
    "with open(\"datasets/tests.txt\", \"w\") as file:\n",
    "    file.write(\"#\\n\")\n",
    "    file.write(\"# from, to, step, #bins, Q/U\\n\")\n",
    "    file.write(\"# discretized data\\n\")\n",
    "    file.write(\"# cut points\\n\")\n",
    "    file.write(\"#\\n\")\n",
    "    for experiment in experiments_range:\n",
    "        file.write(\"RANGE\\n\")\n",
    "        (from_, to_, step_, bins_, strategy) = experiment\n",
    "        disc = KBinsDiscretizer(n_bins=bins_, encode='ordinal', strategy='quantile' if strategy.strip() == \"Q\" else 'uniform')\n",
    "        data = [[x] for x in range(from_, to_, step_)]\n",
    "        disc.fit(data)\n",
    "        result = disc.transform(data)\n",
    "        file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n",
    "        write_lists(file, result, disc.bin_edges_[0])\n",
    "    for n_bins, experiment in experiments_vectors:\n",
    "        for strategy in [\"Q\", \"U\"]:\n",
    "            file.write(\"VECTOR\\n\")\n",
    "            file.write(f\"{strategy}{n_bins}{experiment}\\n\")\n",
    "            disc = KBinsDiscretizer(\n",
    "                n_bins=n_bins,\n",
    "                encode=\"ordinal\",\n",
    "                \n",
    "                strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\",\n",
    "            )\n",
    "            data = [[x] for x in experiment]\n",
    "            result = disc.fit_transform(data)\n",
    "            write_lists(file, result, disc.bin_edges_[0])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
Author	SHA1	Message	Date
Ricardo Montañana Gómez	cb9babace1	Merge `c488ace719` into `7b0673fd4b`	2024-07-02 11:50:55 +02:00
Ricardo Montañana Gómez	c488ace719	Fix FImdlp tests	2024-07-02 11:50:42 +02:00
Ricardo Montañana Gómez	8f6e16f04f	Fix BinDisc quantile mistakes	2024-07-02 09:40:06 +02:00
Ricardo Montañana Gómez	7b0673fd4b	Update README	2024-06-24 11:47:03 +02:00
Ricardo Montañana Gómez	a1346e1943	Fix Error in percentile method	2024-06-24 10:55:26 +02:00
Ricardo Montañana Gómez	b3fc598c29	Update build.yml	2024-06-14 22:04:29 +02:00
Ricardo Montañana Gómez	cc1efa0b4e	Update README	2024-06-14 22:01:11 +02:00
Ricardo Montañana Gómez	90965877eb	Add Makefile with build & test actions	2024-06-14 21:17:30 +02:00
Ricardo Montañana Gómez	c4e6c041fe	Fix int type	2024-06-09 00:29:55 +02:00
Ricardo Montañana Gómez	7938df7f0f	Update sonar mdlp version	2024-06-08 13:25:28 +02:00
Ricardo Montañana Gómez	7ee9896734	Fix mistake in github action	2024-06-08 12:36:56 +02:00
Ricardo Montañana Gómez	8f7f605670	Fix mistake in github action	2024-06-08 12:32:18 +02:00
Ricardo Montañana Gómez	2f55b27691	Fix mistake in github action	2024-06-08 12:28:23 +02:00
Ricardo Montañana Gómez	378fbd51ef	Fix mistake in github action	2024-06-08 12:25:17 +02:00
Ricardo Montañana Gómez	402d0da878	Fix mistake in github action	2024-06-08 12:23:28 +02:00
Ricardo Montañana Gómez	f34bcc2ed7	Add libtorch to github action	2024-06-08 12:20:51 +02:00
Ricardo Montañana Gómez	c9ba35fb58	update test script	2024-06-08 12:02:16 +02:00
Ricardo Montañana Gómez	e205668906	Add torch methods to discretize Add fit_transform methods	2024-06-07 23:54:42 +02:00
Ricardo Montañana Gómez	633aa52849	Refactor sample build	2024-06-06 12:04:55 +02:00
Ricardo Montañana Gómez	61de687476	Fix library creation problem	2024-06-06 11:13:50 +02:00
Ricardo Montañana Gómez	7ff88c8e4b	Update Discretizer version	2024-06-05 17:55:45 +02:00
Ricardo Montañana Gómez	638bb2a59e	Discretizer (#8 ) * Add better check in testKBins.py * Add Discretizer base class for Both discretizers * Refactor order of constructors init	2024-06-05 17:53:08 +02:00