Merge pull request #7 from rmontanana/BinDisc

Implement BinDisc and tests
Update workflow build
2025-08-16 07:55:58 +00:00 · 2024-06-05 11:08:56 +02:00 · 2024-06-05 10:56:49 +02:00 · 2024-06-05 10:45:11 +02:00 · 2024-05-02 12:51:40 +02:00 · 2024-05-02 10:46:29 +00:00
34 changed files with 3145 additions and 324 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,16 @@
+FROM mcr.microsoft.com/devcontainers/cpp:0-ubuntu-22.04
+
+RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
+    && apt-get -y install --no-install-recommends \
+    python3 \
+    python3-pip \
+    lcov \
+    cmake \
+    && apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*
+
+RUN pip3 install --no-cache-dir \
+    cpplint \
+    cmake-format\
+    gcovr
+# [Optional] Uncomment this section to install additional vcpkg ports.
+# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install <your-port-name-here>"
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,32 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/cpp
+{
+	"name": "C++",
+	"build": {
+		"dockerfile": "Dockerfile"
+	},
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+	// Use 'postCreateCommand' to run commands after the container is created.
+	// "postCreateCommand": "gcc -v",
+	// Configure tool-specific properties.
+	"customizations": {
+		// Configure properties specific to VS Code.
+		"vscode": {
+			"settings": {},
+			"extensions": [
+				"ms-vscode.cpptools",
+				"ms-vscode.cpptools-extension-pack",
+				"ms-vscode.cpptools-themes",
+				"jbenden.c-cpp-flylint",
+				"matepek.vscode-catch2-test-adapter",
+				"ms-vscode.cmake-tools",
+				"GitHub.copilot"
+			]
+		}
+	}
+	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "root"
+}
--- a/.devcontainer/reinstall-cmake.sh
+++ b/.devcontainer/reinstall-cmake.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information.
+#-------------------------------------------------------------------------------------------------------------
+#
+set -e
+
+CMAKE_VERSION=${1:-"none"}
+
+if [ "${CMAKE_VERSION}" = "none" ]; then
+    echo "No CMake version specified, skipping CMake reinstallation"
+    exit 0
+fi
+
+# Cleanup temporary directory and associated files when exiting the script.
+cleanup() {
+    EXIT_CODE=$?
+    set +e
+    if [[ -n "${TMP_DIR}" ]]; then
+        echo "Executing cleanup of tmp files"
+        rm -Rf "${TMP_DIR}"
+    fi
+    exit $EXIT_CODE
+}
+trap cleanup EXIT
+
+
+echo "Installing CMake..."
+apt-get -y purge --auto-remove cmake
+mkdir -p /opt/cmake
+
+architecture=$(dpkg --print-architecture)
+case "${architecture}" in
+    arm64)
+        ARCH=aarch64 ;;
+    amd64)
+        ARCH=x86_64 ;;
+    *)
+        echo "Unsupported architecture ${architecture}."
+        exit 1
+        ;;
+esac
+
+CMAKE_BINARY_NAME="cmake-${CMAKE_VERSION}-linux-${ARCH}.sh"
+CMAKE_CHECKSUM_NAME="cmake-${CMAKE_VERSION}-SHA-256.txt"
+TMP_DIR=$(mktemp -d -t cmake-XXXXXXXXXX)
+
+echo "${TMP_DIR}"
+cd "${TMP_DIR}"
+
+curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_BINARY_NAME}" -O
+curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_CHECKSUM_NAME}" -O
+
+sha256sum -c --ignore-missing "${CMAKE_CHECKSUM_NAME}"
+sh "${TMP_DIR}/${CMAKE_BINARY_NAME}" --prefix=/opt/cmake --skip-license
+
+ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
+ln -s /opt/cmake/bin/ctest /usr/local/bin/ctest
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,26 +1,40 @@
 name: Build
-
 on:
  push:
    branches:
      - main
-
-
+      - "*"
+  pull_request:
+    types: [ opened, synchronize, reopened ]
 jobs:
  build:
    name: Build
    runs-on: ubuntu-latest
+    env:
+      BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4.1.6
        with:
-          fetch-depth: 0  # Shallow clones should be disabled for a better relevancy of analysis
-      - uses: sonarsource/sonarqube-scan-action@master
+          fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
+      - name: Install sonar-scanner and build-wrapper
+        uses: SonarSource/sonarcloud-github-c-cpp@v2
+      - name: Install lcov & gcovr
+        run: |
+          sudo apt-get -y install lcov
+          sudo apt-get -y install gcovr
+      - name: Tests & build-wrapper
+        run: |
+          cmake -S . -B build -Wno-dev 
+          build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Release
+          cd build
+          make
+          ctest -C Release --output-on-failure --test-dir tests
+          cd ..
+          gcovr -f CPPFImdlp.cpp -f Metrics.cpp  -f BinDisc.cpp --txt --sonarqube=coverage.xml
+      - name: Run sonar-scanner
        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
-          SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
-      # If you wish to fail your job when the Quality Gate is red, uncomment the
-      # following lines. This would typically be used to fail a deployment.
-      # - uses: sonarsource/sonarqube-quality-gate-action@master
-      #   timeout-minutes: 5
-      #   env:
-      #     SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
+        run: |
+          sonar-scanner --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
+                        --define sonar.coverageReportPaths=coverage.xml
--- a/.gitignore
+++ b/.gitignore
@@ -31,8 +31,10 @@
 *.out
 *.app
 **/build
+build_Debug
+build_Release
 **/lcoverage
 .idea
 cmake-*
 **/CMakeFiles
-sonar-project.properties
+**/gcovr-report
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -5,18 +5,43 @@
    "version": "0.2.0",
    "configurations": [
        {
-            "name": "(lldb) Launch",
+            "name": "C++ Launch config",
            "type": "cppdbg",
            "request": "launch",
-            "program": "${workspaceRoot}/sample/build/sample",
+            "program": "${workspaceFolder}/build/sample/sample",
+            "cwd": "${workspaceFolder}/build/sample",
            "args": [
-                "mfeat-factors"
+                "-f",
+                "glass"
            ],
+            "targetArchitecture": "arm64",
+            "launchCompleteCommand": "exec-run",
+            "preLaunchTask": "CMake: build",
            "stopAtEntry": false,
-            "cwd": "${workspaceRoot}/sample/build/",
-            "environment": [],
-            "externalConsole": false,
-            "MIMode": "lldb"
-        }
+            "linux": {
+                "MIMode": "gdb",
+                "miDebuggerPath": "/usr/bin/gdb",
+                "setupCommands": [
+                    {
+                        "description": "Enable pretty-printing for gdb",
+                        "text": "-enable-pretty-printing",
+                        "ignoreFailures": true
+                    },
+                    {
+                        "description": "Auto load symbols when loading an .so file",
+                        "text": "set auto-solib-add",
+                        "ignoreFailures": false
+                    }
+                ]
+            },
+            "osx": {
+                "type": "lldb",
+                "MIMode": "lldb"
+            },
+            "windows": {
+                "MIMode": "gdb",
+                "miDebuggerPath": "C:\\MinGw\\bin\\gdb.exe"
+            }
+        },
    ]
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,9 @@
 {
    "sonarlint.connectedMode.project": {
-        "projectKey": "rmontanana_mdlp_AYZkjILJHyjW-meBaElG"
-    }
+        "connectionId": "rmontanana",
+        "projectKey": "rmontanana_mdlp"
+    },
+    "C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
+    "cmake.configureOnOpen": true,
+    "sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json"
 }
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -1,29 +1,26 @@
 {
-    "tasks": [
-        {
-            "type": "cppbuild",
-            "label": "C/C++: clang++ build active file",
-            "command": "/usr/bin/clang++",
-            "args": [
-                "-fcolor-diagnostics",
-                "-fansi-escape-codes",
-                "-g",
-                "${file}",
-                "-o",
-                "${fileDirname}/${fileBasenameNoExtension}"
-            ],
-            "options": {
-                "cwd": "${fileDirname}"
-            },
-            "problemMatcher": [
-                "$gcc"
-            ],
-            "group": {
-                "kind": "build",
-                "isDefault": true
-            },
-            "detail": "Task generated by Debugger."
-        }
-    ],
-    "version": "2.0.0"
+	"version": "2.0.0",
+	"tasks": [
+		{
+			"type": "cmake",
+			"label": "CMake: build",
+			"command": "build",
+			"targets": [
+				"all"
+			],
+			"group": {
+				"kind": "build",
+				"isDefault": true
+			},
+			"problemMatcher": [],
+			"detail": "CMake template build task"
+		},
+		{
+			"type": "cmake",
+			"label": "CMake: configure",
+			"command": "configure",
+			"problemMatcher": [],
+			"detail": "CMake template configure task"
+		}
+	]
 }
--- a/BinDisc.cpp
+++ b/BinDisc.cpp
@@ -0,0 +1,138 @@
+#include <algorithm>
+#include <limits>
+#include <cmath>
+#include "BinDisc.h"
+#include <iostream>
+#include <string>
+
+namespace mdlp {
+
+    BinDisc::BinDisc(int n_bins, strategy_t strategy) : n_bins{ n_bins }, strategy{ strategy }
+    {
+        if (n_bins < 3) {
+            throw std::invalid_argument("n_bins must be greater than 2");
+        }
+    }
+    BinDisc::~BinDisc() = default;
+    void BinDisc::fit(samples_t& X)
+    {
+        cutPoints.clear();
+        if (X.empty()) {
+            cutPoints.push_back(std::numeric_limits<precision_t>::max());
+            return;
+        }
+        if (strategy == strategy_t::QUANTILE) {
+            fit_quantile(X);
+        } else if (strategy == strategy_t::UNIFORM) {
+            fit_uniform(X);
+        }
+    }
+    std::vector<precision_t> linspace(precision_t start, precision_t end, int num)
+    {
+        // Doesn't include end point as it is not needed
+        if (start == end) {
+            return { 0 };
+        }
+        precision_t delta = (end - start) / static_cast<precision_t>(num - 1);
+        std::vector<precision_t> linspc;
+        for (size_t i = 0; i < num - 1; ++i) {
+            precision_t val = start + delta * static_cast<precision_t>(i);
+            linspc.push_back(val);
+        }
+        return linspc;
+    }
+    size_t clip(const size_t n, size_t lower, size_t upper)
+    {
+        return std::max(lower, std::min(n, upper));
+    }
+    std::vector<precision_t> percentile(samples_t& data, std::vector<precision_t>& percentiles)
+    {
+        // Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html
+        std::vector<precision_t> results;
+        results.reserve(percentiles.size());
+        for (auto percentile : percentiles) {
+            const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
+            const auto indexLower = clip(i, 0, data.size() - 1);
+            const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
+            const double fraction =
+                (percentile / 100.0 - percentI) /
+                (static_cast<double>(indexLower + 1) / static_cast<double>(data.size() - 1) - percentI);
+            const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction;
+            if (value != results.back())
+                results.push_back(value);
+        }
+        return results;
+    }
+    void BinDisc::fit_quantile(samples_t& X)
+    {
+        auto quantiles = linspace(0.0, 100.0, n_bins + 1);
+        auto data = X;
+        std::sort(data.begin(), data.end());
+        if (data.front() == data.back() || data.size() == 1) {
+            // if X is constant
+            cutPoints.push_back(std::numeric_limits<precision_t>::max());
+            return;
+        }
+        cutPoints = percentile(data, quantiles);
+        normalizeCutPoints();
+    }
+    void BinDisc::fit_uniform(samples_t& X)
+    {
+
+        auto minmax = std::minmax_element(X.begin(), X.end());
+        cutPoints = linspace(*minmax.first, *minmax.second, n_bins + 1);
+        normalizeCutPoints();
+    }
+    void BinDisc::normalizeCutPoints()
+    {
+        // Add max value to the end
+        cutPoints.push_back(std::numeric_limits<precision_t>::max());
+        // Remove first as it is not needed
+        cutPoints.erase(cutPoints.begin());
+    }
+    labels_t& BinDisc::transform(const samples_t& X)
+    {
+        discretizedData.clear();
+        discretizedData.reserve(X.size());
+        for (const precision_t& item : X) {
+            auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
+            discretizedData.push_back(upper - cutPoints.begin());
+        }
+        return discretizedData;
+    }
+}
+// void BinDisc::fit_quantile(samples_t& X)
+    // {
+    //     cutPoints.clear();
+    //     if (X.empty()) {
+    //         cutPoints.push_back(std::numeric_limits<float>::max());
+    //         return;
+    //     }
+    //     samples_t data = X;
+    //     std::sort(data.begin(), data.end());
+    //     float min_val = data.front();
+    //     float max_val = data.back();
+    //     // Handle case of all data points having the same value
+    //     if (min_val == max_val) {
+    //         cutPoints.push_back(std::numeric_limits<float>::max());
+    //         return;
+    //     }
+    //     int first = X.size() / n_bins;
+    //     cutPoints.push_back(data.at(first - 1));
+    //     int bins_done = 1;
+    //     int prev = first - 1;
+    //     while (bins_done < n_bins) {
+    //         int next = first * (bins_done + 1) - 1;
+    //         while (next < X.size() && data.at(next) == data[prev]) {
+    //             ++next;
+    //         }
+    //         if (next == X.size() || bins_done == n_bins - 1) {
+    //             cutPoints.push_back(std::numeric_limits<float>::max());
+    //             break;
+    //         } else {
+    //             cutPoints.push_back(data[next]);
+    //             bins_done++;
+    //             prev = next;
+    //         }
+    //     }
+    // }
--- a/BinDisc.h
+++ b/BinDisc.h
@@ -0,0 +1,31 @@
+#ifndef BINDISC_H
+#define BINDISC_H
+
+#include "typesFImdlp.h"
+#include <string>
+
+namespace mdlp {
+
+    enum class strategy_t {
+        UNIFORM,
+        QUANTILE
+    };
+    class BinDisc {
+    public:
+        BinDisc(int n_bins = 3, strategy_t strategy = strategy_t::UNIFORM);
+        ~BinDisc();
+        void fit(samples_t&);
+        inline cutPoints_t getCutPoints() const { return cutPoints; };
+        labels_t& transform(const samples_t&);
+        static inline std::string version() { return "1.0.0"; };
+    private:
+        void fit_uniform(samples_t&);
+        void fit_quantile(samples_t&);
+        void normalizeCutPoints();
+        int n_bins;
+        strategy_t strategy;
+        labels_t discretizedData = labels_t();
+        cutPoints_t cutPoints;
+    };
+}
+#endif
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,13 @@
 cmake_minimum_required(VERSION 3.20)
 project(mdlp)

+if (POLICY CMP0135)
+    cmake_policy(SET CMP0135 NEW)
+endif ()
+
 set(CMAKE_CXX_STANDARD 11)

 add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
+add_subdirectory(sample)
+add_subdirectory(tests)

--- a/CPPFImdlp.cpp
+++ b/CPPFImdlp.cpp
@@ -3,20 +3,40 @@
 #include <set>
 #include <cmath>
 #include "CPPFImdlp.h"
-#include "Metrics.h"

 namespace mdlp {

-    CPPFImdlp::CPPFImdlp(): indices(indices_t()), X(samples_t()), y(labels_t()),
-        metrics(Metrics(y, indices))
+    CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
+        max_depth(max_depth_),
+        proposed_cuts(proposed)
    {
    }
+
+    CPPFImdlp::CPPFImdlp() = default;
+
    CPPFImdlp::~CPPFImdlp() = default;

-    CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_)
+    size_t CPPFImdlp::compute_max_num_cut_points() const
+    {
+        // Set the actual maximum number of cut points as a number or as a percentage of the number of samples
+        if (proposed_cuts == 0) {
+            return numeric_limits<size_t>::max();
+        }
+        if (proposed_cuts < 0 || proposed_cuts > static_cast<float>(X.size())) {
+            throw invalid_argument("wrong proposed num_cuts value");
+        }
+        if (proposed_cuts < 1)
+            return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
+        return static_cast<size_t>(proposed_cuts);
+    }
+
+    void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
    {
        X = X_;
        y = y_;
+        num_cut_points = compute_max_num_cut_points();
+        depth = 0;
+        discretizedData.clear();
        cutPoints.clear();
        if (X.size() != y.size()) {
            throw invalid_argument("X and y must have the same size");
@@ -24,20 +44,34 @@ namespace mdlp {
        if (X.empty() || y.empty()) {
            throw invalid_argument("X and y must have at least one element");
        }
+        if (min_length < 3) {
+            throw invalid_argument("min_length must be greater than 2");
+        }
+        if (max_depth < 1) {
+            throw invalid_argument("max_depth must be greater than 0");
+        }
        indices = sortIndices(X_, y_);
        metrics.setData(y, indices);
-        computeCutPoints(0, X.size());
-        return *this;
+        computeCutPoints(0, X.size(), 1);
+        sort(cutPoints.begin(), cutPoints.end());
+        if (num_cut_points > 0) {
+            // Select the best (with lower entropy) cut points
+            while (cutPoints.size() > num_cut_points) {
+                resizeCutPoints();
+            }
+        }
    }

    pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
    {
-        size_t n, m, idxPrev = cut - 1 >= start ? cut - 1 : cut;
+        size_t n;
+        size_t m;
+        size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
        size_t idxNext = cut + 1 < end ? cut + 1 : cut;
-        bool backWall; // true if duplicates reach begining of the interval
-        precision_t previous, actual, next;
-        if (cut - 1 < start || cut + 1 >= end)
-            throw logic_error("Invalid cutpoint index");
+        bool backWall; // true if duplicates reach beginning of the interval
+        precision_t previous;
+        precision_t actual;
+        precision_t next;
        previous = X[indices[idxPrev]];
        actual = X[indices[cut]];
        next = X[indices[idxNext]];
@@ -61,12 +95,14 @@ namespace mdlp {
        return { (actual + previous) / 2, cut };
    }

-    void CPPFImdlp::computeCutPoints(size_t start, size_t end)
+    void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
    {
        size_t cut;
        pair<precision_t, size_t> result;
-        if (end - start < 3)
+        // Check if the interval length and the depth are Ok
+        if (end - start < min_length || depth_ > max_depth)
            return;
+        depth = depth_ > depth ? depth_ : depth;
        cut = getCandidate(start, end);
        if (cut == numeric_limits<size_t>::max())
            return;
@@ -74,8 +110,8 @@ namespace mdlp {
            result = valueCutPoint(start, cut, end);
            cut = result.second;
            cutPoints.push_back(result.first);
-            computeCutPoints(start, cut);
-            computeCutPoints(cut, end);
+            computeCutPoints(start, cut, depth_ + 1);
+            computeCutPoints(cut, end, depth_ + 1);
        }
    }

@@ -83,9 +119,12 @@ namespace mdlp {
    {
        /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
        E(A, TA; S) is minimal amongst all the candidate cut points. */
-        size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
+        size_t candidate = numeric_limits<size_t>::max();
+        size_t elements = end - start;
        bool sameValues = true;
-        precision_t entropy_left, entropy_right, minEntropy;
+        precision_t entropy_left;
+        precision_t entropy_right;
+        precision_t minEntropy;
        // Check if all the values of the variable in the interval are the same
        for (size_t idx = start + 1; idx < end; idx++) {
            if (X[indices[idx]] != X[indices[start]]) {
@@ -100,8 +139,8 @@ namespace mdlp {
            // Cutpoints are always on boundaries (definition 2)
            if (y[indices[idx]] == y[indices[idx - 1]])
                continue;
-            entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
-            entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end);
+            entropy_left = precision_t(idx - start) / static_cast<precision_t>(elements) * metrics.entropy(start, idx);
+            entropy_right = precision_t(end - idx) / static_cast<precision_t>(elements) * metrics.entropy(idx, end);
            if (entropy_left + entropy_right < minEntropy) {
                minEntropy = entropy_left + entropy_right;
                candidate = idx;
@@ -112,13 +151,15 @@ namespace mdlp {

    bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
    {
-        int k, k1, k2;
-        precision_t ig, delta;
-        precision_t ent, ent1, ent2;
+        int k;
+        int k1;
+        int k2;
+        precision_t ig;
+        precision_t delta;
+        precision_t ent;
+        precision_t ent1;
+        precision_t ent2;
        auto N = precision_t(end - start);
-        if (N < 2) {
-            return false;
-        }
        k = metrics.computeNumClasses(start, end);
        k1 = metrics.computeNumClasses(start, cut);
        k2 = metrics.computeNumClasses(cut, end);
@@ -126,8 +167,8 @@ namespace mdlp {
        ent1 = metrics.entropy(start, cut);
        ent2 = metrics.entropy(cut, end);
        ig = metrics.informationGain(start, cut, end);
-        delta = log2(pow(3, precision_t(k)) - 2) -
-            (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
+        delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
+            (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
        precision_t term = 1 / N * (log2(N - 1) + delta);
        return ig > term;
    }
@@ -136,27 +177,45 @@ namespace mdlp {
    indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
    {
        indices_t idx(X_.size());
-        iota(idx.begin(), idx.end(), 0);
-        for (size_t i = 0; i < X_.size(); i++)
-            stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
+        std::iota(idx.begin(), idx.end(), 0);
+        stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
            if (X_[i1] == X_[i2])
                return y_[i1] < y_[i2];
            else
                return X_[i1] < X_[i2];
-                });
+            });
        return idx;
    }

-    cutPoints_t CPPFImdlp::getCutPoints()
+    void CPPFImdlp::resizeCutPoints()
    {
-        // Remove duplicates and sort
-        cutPoints_t output(cutPoints.size());
-        set<precision_t> s;
-        unsigned size = cutPoints.size();
-        for (unsigned i = 0; i < size; i++)
-            s.insert(cutPoints[i]);
-        output.assign(s.begin(), s.end());
-        sort(output.begin(), output.end());
-        return output;
+        //Compute entropy of each of the whole cutpoint set and discards the biggest value
+        precision_t maxEntropy = 0;
+        precision_t entropy;
+        size_t maxEntropyIdx = 0;
+        size_t begin = 0;
+        size_t end;
+        for (size_t idx = 0; idx < cutPoints.size(); idx++) {
+            end = begin;
+            while (X[indices[end]] < cutPoints[idx] && end < X.size())
+                end++;
+            entropy = metrics.entropy(begin, end);
+            if (entropy > maxEntropy) {
+                maxEntropy = entropy;
+                maxEntropyIdx = idx;
+            }
+            begin = end;
+        }
+        cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
+    }
+    labels_t& CPPFImdlp::transform(const samples_t& data)
+    {
+        discretizedData.clear();
+        discretizedData.reserve(data.size());
+        for (const precision_t& item : data) {
+            auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
+            discretizedData.push_back(upper - cutPoints.begin());
+        }
+        return discretizedData;
    }
 }
--- a/CPPFImdlp.h
+++ b/CPPFImdlp.h
@@ -1,29 +1,42 @@
 #ifndef CPPFIMDLP_H
 #define CPPFIMDLP_H
+
 #include "typesFImdlp.h"
-#include "Metrics.h"
+#include <limits>
 #include <utility>
 #include <string>
+#include "Metrics.h"
+
 namespace mdlp {
    class CPPFImdlp {
-    protected:
-        indices_t indices;
-        samples_t X;
-        labels_t y;
-        Metrics metrics;
-        cutPoints_t cutPoints;
-
-        static indices_t sortIndices(samples_t&, labels_t&);
-        void computeCutPoints(size_t, size_t);
-        bool mdlp(size_t, size_t, size_t);
-        size_t getCandidate(size_t, size_t);
-        pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
    public:
        CPPFImdlp();
+        CPPFImdlp(size_t, int, float);
        ~CPPFImdlp();
-        CPPFImdlp& fit(samples_t&, labels_t&);
-        samples_t getCutPoints();
-        inline string version() { return "1.1.0"; };
+        void fit(samples_t&, labels_t&);
+        inline cutPoints_t getCutPoints() const { return cutPoints; };
+        labels_t& transform(const samples_t&);
+        inline int get_depth() const { return depth; };
+        static inline std::string version() { return "1.1.3"; };
+    protected:
+        size_t min_length = 3;
+        int depth = 0;
+        int max_depth = numeric_limits<int>::max();
+        float proposed_cuts = 0;
+        indices_t indices = indices_t();
+        samples_t X = samples_t();
+        labels_t y = labels_t();
+        Metrics metrics = Metrics(y, indices);
+        cutPoints_t cutPoints;
+        size_t num_cut_points = numeric_limits<size_t>::max();
+        labels_t discretizedData = labels_t();
+        static indices_t sortIndices(samples_t&, labels_t&);
+        void computeCutPoints(size_t, size_t, int);
+        void resizeCutPoints();
+        bool mdlp(size_t, size_t, size_t);
+        size_t getCandidate(size_t, size_t);
+        size_t compute_max_num_cut_points() const;
+        pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
    };
 }
-#endif
+#endif
--- a/Metrics.cpp
+++ b/Metrics.cpp
@@ -1,20 +1,24 @@
 #include "Metrics.h"
 #include <set>
 #include <cmath>
+
 using namespace std;
 namespace mdlp {
-    Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
+    Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_),
+        numClasses(computeNumClasses(0, indices.size()))
    {
    }
+
    int Metrics::computeNumClasses(size_t start, size_t end)
    {
        set<int> nClasses;
        for (auto i = start; i < end; ++i) {
            nClasses.insert(y[indices[i]]);
        }
-        return nClasses.size();
+        return static_cast<int>(nClasses.size());
    }
-    void Metrics::setData(labels_t& y_, indices_t& indices_)
+
+    void Metrics::setData(const labels_t& y_, const indices_t& indices_)
    {
        indices = indices_;
        y = y_;
@@ -22,9 +26,11 @@ namespace mdlp {
        entropyCache.clear();
        igCache.clear();
    }
+
    precision_t Metrics::entropy(size_t start, size_t end)
    {
-        precision_t p, ventropy = 0;
+        precision_t p;
+        precision_t ventropy = 0;
        int nElements = 0;
        labels_t counts(numClasses + 1, 0);
        if (end - start < 2)
@@ -38,26 +44,33 @@ namespace mdlp {
        }
        for (auto count : counts) {
            if (count > 0) {
-                p = (precision_t)count / nElements;
+                p = static_cast<precision_t>(count) / static_cast<precision_t>(nElements);
                ventropy -= p * log2(p);
            }
        }
        entropyCache[{start, end}] = ventropy;
        return ventropy;
    }
+
    precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
    {
        precision_t iGain;
-        precision_t entropyInterval, entropyLeft, entropyRight;
-        int nElementsLeft = cut - start, nElementsRight = end - cut;
-        int nElements = end - start;
+        precision_t entropyInterval;
+        precision_t entropyLeft;
+        precision_t entropyRight;
+        size_t nElementsLeft = cut - start;
+        size_t nElementsRight = end - cut;
+        size_t nElements = end - start;
        if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
            return igCache[make_tuple(start, cut, end)];
        }
        entropyInterval = entropy(start, end);
        entropyLeft = entropy(start, cut);
        entropyRight = entropy(cut, end);
-        iGain = entropyInterval - ((precision_t)nElementsLeft * entropyLeft + (precision_t)nElementsRight * entropyRight) / nElements;
+        iGain = entropyInterval -
+            (static_cast<precision_t>(nElementsLeft) * entropyLeft +
+                static_cast<precision_t>(nElementsRight) * entropyRight) /
+            static_cast<precision_t>(nElements);
        igCache[make_tuple(start, cut, end)] = iGain;
        return iGain;
    }
--- a/Metrics.h
+++ b/Metrics.h
@@ -1,17 +1,19 @@
 #ifndef CCMETRICS_H
 #define CCMETRICS_H
+
 #include "typesFImdlp.h"
+
 namespace mdlp {
    class Metrics {
    protected:
        labels_t& y;
        indices_t& indices;
        int numClasses;
-        cacheEnt_t entropyCache;
-        cacheIg_t igCache;
+        cacheEnt_t entropyCache = cacheEnt_t();
+        cacheIg_t igCache = cacheIg_t();
    public:
        Metrics(labels_t&, indices_t&);
-        void setData(labels_t&, indices_t&);
+        void setData(const labels_t&, const indices_t&);
        int computeNumClasses(size_t, size_t);
        precision_t entropy(size_t, size_t);
        precision_t informationGain(size_t, size_t, size_t);
--- a/README.md
+++ b/README.md
@@ -1,4 +1,8 @@
-# mdlp
+[![Build](https://github.com/rmontanana/mdlp/actions/workflows/build.yml/badge.svg)](https://github.com/rmontanana/mdlp/actions/workflows/build.yml)
+[![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_mdlp&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
+[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_mdlp&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
+
+# <img src="logo.png" alt="logo" width="50"/> mdlp

 Discretization algorithm based on the paper by Fayyad &amp; Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)

@@ -7,6 +11,11 @@ The implementation tries to mitigate the problem of different label values with
 - Sorts the values of the variable using the label values as a tie-breaker
 - Once found a valid candidate for the split, it checks if the previous value is the same as actual one, and tries to get previous one, or next if the former is not possible.

+Other features:
+
+- Intervals with the same value of the variable are not taken into account for cutpoints.
+- Intervals have to have more than two examples to be evaluated.
+
 The algorithm returns the cut points for the variable.

 ## Sample
@@ -15,16 +24,16 @@ To run the sample, just execute the following commands:

 ```bash
 cd sample
-mkdir build
+cmake -B build
 cd build
-cmake ..
 make
-./sample iris
+./sample -f iris -m 2
+./sample -h
 ```

 ## Test

-To run the tests, execute the following commands:
+To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:

 ```bash
 cd tests
--- a/logo.png
+++ b/logo.png
--- a/sample/.vscode/launch.json
+++ b/sample/.vscode/launch.json
@@ -0,0 +1,21 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "lldb puro",
+            "type": "cppdbg",
+            // "targetArchitecture": "arm64",
+            "request": "launch",
+            "program": "${workspaceRoot}/build/sample",
+            "args": [
+                "-f",
+                "iris"
+            ],
+            "stopAtEntry": false,
+            "cwd": "${workspaceRoot}/build/",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "lldb"
+        },
+    ]
+}
--- a/sample/CMakeLists.txt
+++ b/sample/CMakeLists.txt
@@ -1,6 +1,5 @@
-cmake_minimum_required(VERSION 3.20)
-project(main)
-
 set(CMAKE_CXX_STANDARD 11)

+set(CMAKE_BUILD_TYPE Debug)
+
 add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
--- a/sample/sample.cpp
+++ b/sample/sample.cpp
@@ -1,33 +1,101 @@
 #include <iostream>
 #include <vector>
 #include <iomanip>
+#include <chrono>
+#include <algorithm>
+#include <cstring>
+#include <getopt.h>
 #include "../CPPFImdlp.h"
 #include "../tests/ArffFiles.h"

 using namespace std;
 using namespace mdlp;

+const string PATH = "../../tests/datasets/";

-int main(int argc, char** argv)
+/* print a description of all supported options */
+void usage(const char* path)
+{
+    /* take only the last portion of the path */
+    const char* basename = strrchr(path, '/');
+    basename = basename ? basename + 1 : path;
+
+    cout << "usage: " << basename << "[OPTION]" << endl;
+    cout << "  -h, --help\t\t Print this help and exit." << endl;
+    cout
+        << "  -f, --file[=FILENAME]\t {all, diabetes, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
+        << endl;
+    cout << "  -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << endl;
+    cout << "  -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << endl;
+    cout
+        << "  -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any"
+        << endl;
+    cout << "  -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << endl;
+}
+
+tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
+{
+    string file_name;
+    string path = PATH;
+    int max_depth = numeric_limits<int>::max();
+    int min_length = 3;
+    float max_cutpoints = 0;
+    const vector<struct option> long_options = {
+            {"help",          no_argument,       nullptr, 'h'},
+            {"file",          required_argument, nullptr, 'f'},
+            {"path",          required_argument, nullptr, 'p'},
+            {"max_depth",     required_argument, nullptr, 'm'},
+            {"max_cutpoints", required_argument, nullptr, 'c'},
+            {"min_length",    required_argument, nullptr, 'n'},
+            {nullptr,         no_argument,       nullptr, 0}
+    };
+    while (true) {
+        const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options.data(), nullptr);
+        if (c == -1)
+            break;
+        switch (c) {
+            case 'h':
+                usage(argv[0]);
+                exit(0);
+            case 'f':
+                file_name = string(optarg);
+                break;
+            case 'm':
+                max_depth = stoi(optarg);
+                break;
+            case 'n':
+                min_length = stoi(optarg);
+                break;
+            case 'c':
+                max_cutpoints = stof(optarg);
+                break;
+            case 'p':
+                path = optarg;
+                if (path.back() != '/')
+                    path += '/';
+                break;
+            case '?':
+                usage(argv[0]);
+                exit(1);
+            default:
+                abort();
+        }
+    }
+    if (file_name.empty()) {
+        usage(argv[0]);
+        exit(1);
+    }
+    return make_tuple(file_name, path, max_depth, min_length, max_cutpoints);
+}
+
+void process_file(const string& path, const string& file_name, bool class_last, int max_depth, int min_length,
+    float max_cutpoints)
 {
    ArffFiles file;
-    string path = "../../tests/datasets/";
-    map<string, bool> datasets = {
-            {"mfeat-factors",      true},
-            {"iris",               true},
-            {"letter",             true},
-            {"glass",              true},
-            {"kdd_JapaneseVowels", false},
-            {"test",               true}
-    };
-    if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
-        cout << "Usage: " << argv[0] << " {mfeat-factors, glass, iris, letter, kdd_JapaneseVowels, test}" << endl;
-        return 1;
-    }

-    file.load(path + argv[1] + ".arff", datasets[argv[1]]);
-    auto attributes = file.getAttributes();
-    int items = file.getSize();
+    file.load(path + file_name + ".arff", class_last);
+    const auto attributes = file.getAttributes();
+    const auto items = file.getSize();
    cout << "Number of lines: " << items << endl;
    cout << "Attributes: " << endl;
    for (auto attribute : attributes) {
@@ -38,22 +106,92 @@ int main(int argc, char** argv)
    cout << "Data: " << endl;
    vector<samples_t>& X = file.getX();
    labels_t& y = file.getY();
-    for (int i = 0; i < 50; i++) {
+    for (int i = 0; i < 5; i++) {
        for (auto feature : X) {
            cout << fixed << setprecision(1) << feature[i] << " ";
        }
        cout << y[i] << endl;
    }
-    mdlp::CPPFImdlp test = mdlp::CPPFImdlp();
+    auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
+    size_t total = 0;
    for (auto i = 0; i < attributes.size(); i++) {
        auto min_max = minmax_element(X[i].begin(), X[i].end());
-        cout << "Cut points for " << get<0>(attributes[i]) << endl;
-        cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
-        cout << "--------------------------" << setprecision(3) << endl;
+        cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
        test.fit(X[i], y);
-        for (auto item : test.getCutPoints()) {
-            cout << item << endl;
+        auto cut_points = test.getCutPoints();
+        for (auto item : cut_points) {
+            cout << item;
+            if (item != cut_points.back())
+                cout << ", ";
        }
+        total += test.getCutPoints().size();
+        cout << "]" << endl;
+        cout << "Min: " << *min_max.first << " Max: " << *min_max.second << endl;
+        cout << "--------------------------" << endl;
+    }
+    cout << "Total cut points ...: " << total << endl;
+    cout << "Total feature states: " << total + attributes.size() << endl;
+}
+
+void process_all_files(const map<string, bool>& datasets, const string& path, int max_depth, int min_length,
+    float max_cutpoints)
+{
+    cout << "Results: " << "Max_depth: " << max_depth << "  Min_length: " << min_length << "  Max_cutpoints: "
+        << max_cutpoints << endl << endl;
+    printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
+    printf("==================== ==== ==== ========\n");
+    for (const auto& dataset : datasets) {
+        ArffFiles file;
+        file.load(path + dataset.first + ".arff", dataset.second);
+        auto attributes = file.getAttributes();
+        vector<samples_t>& X = file.getX();
+        labels_t& y = file.getY();
+        size_t timing = 0;
+        size_t cut_points = 0;
+        for (auto i = 0; i < attributes.size(); i++) {
+            auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
+            std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
+            test.fit(X[i], y);
+            std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
+            timing += std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
+            cut_points += test.getCutPoints().size();
+        }
+        printf("%-20s %4lu %4zu %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing);
+    }
+}
+
+
+int main(int argc, char** argv)
+{
+    map<string, bool> datasets = {
+            {"diabetes",           true},
+            {"glass",              true},
+            {"iris",               true},
+            {"kdd_JapaneseVowels", false},
+            {"letter",             true},
+            {"liver-disorders",    true},
+            {"mfeat-factors",      true},
+            {"test",               true}
+    };
+    string file_name;
+    string path;
+    int max_depth;
+    int min_length;
+    float max_cutpoints;
+    tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
+    if (datasets.find(file_name) == datasets.end() && file_name != "all") {
+        cout << "Invalid file name: " << file_name << endl;
+        usage(argv[0]);
+        exit(1);
+    }
+    if (file_name == "all")
+        process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
+    else {
+        process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
+        cout << "File name ....: " << file_name << endl;
+        cout << "Max depth ....: " << max_depth << endl;
+        cout << "Min length ...: " << min_length << endl;
+        cout << "Max cutpoints : " << max_cutpoints << endl;
    }
    return 0;
-}
+}
--- a/sonar-project.properties
+++ b/sonar-project.properties
@@ -0,0 +1,14 @@
+sonar.projectKey=rmontanana_mdlp
+sonar.organization=rmontanana
+
+# This is the name and version displayed in the SonarCloud UI.
+sonar.projectName=mdlp
+sonar.projectVersion=1.1.3
+# sonar.test.exclusions=tests/**
+# sonar.tests=tests/
+# sonar.coverage.exclusions=tests/**,sample/**
+# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.
+#sonar.sources=.
+
+# Encoding of the source code. Default is default system encoding
+sonar.sourceEncoding=UTF-8
--- a/tests/ArffFiles.cpp
+++ b/tests/ArffFiles.cpp
@@ -2,87 +2,101 @@
 #include <fstream>
 #include <sstream>
 #include <map>
-#include <iostream>

 using namespace std;

-ArffFiles::ArffFiles()
-{
-}
-vector<string> ArffFiles::getLines()
+ArffFiles::ArffFiles() = default;
+
+vector<string> ArffFiles::getLines() const
 {
    return lines;
 }
-unsigned long int ArffFiles::getSize()
+
+unsigned long int ArffFiles::getSize() const
 {
    return lines.size();
 }
-vector<pair<string, string>> ArffFiles::getAttributes()
+
+vector<pair<string, string>> ArffFiles::getAttributes() const
 {
    return attributes;
 }
-string ArffFiles::getClassName()
+
+string ArffFiles::getClassName() const
 {
    return className;
 }
-string ArffFiles::getClassType()
+
+string ArffFiles::getClassType() const
 {
    return classType;
 }
-vector<vector<float>>& ArffFiles::getX()
+
+vector<mdlp::samples_t>& ArffFiles::getX()
 {
    return X;
 }
+
 vector<int>& ArffFiles::getY()
 {
    return y;
 }
-void ArffFiles::load(string fileName, bool classLast)
+
+void ArffFiles::load(const string& fileName, bool classLast)
 {
    ifstream file(fileName);
-    string keyword, attribute, type;
-    if (file.is_open()) {
-        string line;
-        while (getline(file, line)) {
-            if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
-                continue;
-            }
-            if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
-                stringstream ss(line);
-                ss >> keyword >> attribute >> type;
-                attributes.push_back({ attribute, type });
-                continue;
-            }
-            if (line[0] == '@') {
-                continue;
-            }
-            lines.push_back(line);
-        }
-        file.close();
-        if (attributes.empty())
-            throw invalid_argument("No attributes found");
-        if (classLast) {
-            className = get<0>(attributes.back());
-            classType = get<1>(attributes.back());
-            attributes.pop_back();
-        } else {
-            className = get<0>(attributes.front());
-            classType = get<1>(attributes.front());
-            attributes.erase(attributes.begin());
-        }
-        generateDataset(classLast);
-    } else
+    if (!file.is_open()) {
        throw invalid_argument("Unable to open file");
+    }
+    string line;
+    string keyword;
+    string attribute;
+    string type;
+    string type_w;
+    while (getline(file, line)) {
+        if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
+            continue;
+        }
+        if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
+            stringstream ss(line);
+            ss >> keyword >> attribute;
+            type = "";
+            while (ss >> type_w)
+                type += type_w + " ";
+            attributes.emplace_back(trim(attribute), trim(type));
+            continue;
+        }
+        if (line[0] == '@') {
+            continue;
+        }
+        lines.push_back(line);
+    }
+    file.close();
+    if (attributes.empty())
+        throw invalid_argument("No attributes found");
+    if (classLast) {
+        className = get<0>(attributes.back());
+        classType = get<1>(attributes.back());
+        attributes.pop_back();
+    } else {
+        className = get<0>(attributes.front());
+        classType = get<1>(attributes.front());
+        attributes.erase(attributes.begin());
+    }
+    generateDataset(classLast);
+
 }
+
 void ArffFiles::generateDataset(bool classLast)
 {
-    X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
-    vector<string> yy = vector<string>(lines.size(), "");
-    int labelIndex = classLast ? attributes.size() : 0;
-    for (int i = 0; i < lines.size(); i++) {
+    X = vector<mdlp::samples_t>(attributes.size(), mdlp::samples_t(lines.size()));
+    auto yy = vector<string>(lines.size(), "");
+    int labelIndex = classLast ? static_cast<int>(attributes.size()) : 0;
+    for (size_t i = 0; i < lines.size(); i++) {
        stringstream ss(lines[i]);
        string value;
-        int pos = 0, xIndex = 0;
+        int pos = 0;
+        int xIndex = 0;
        while (getline(ss, value, ',')) {
            if (pos++ == labelIndex) {
                yy[i] = value;
@@ -93,20 +107,22 @@ void ArffFiles::generateDataset(bool classLast)
    }
    y = factorize(yy);
 }
+
 string ArffFiles::trim(const string& source)
 {
    string s(source);
-    s.erase(0, s.find_first_not_of(" \n\r\t"));
-    s.erase(s.find_last_not_of(" \n\r\t") + 1);
+    s.erase(0, s.find_first_not_of(" '\n\r\t"));
+    s.erase(s.find_last_not_of(" '\n\r\t") + 1);
    return s;
 }
+
 vector<int> ArffFiles::factorize(const vector<string>& labels_t)
 {
    vector<int> yy;
    yy.reserve(labels_t.size());
    map<string, int> labelMap;
    int i = 0;
-    for (string label : labels_t) {
+    for (const string& label : labels_t) {
        if (labelMap.find(label) == labelMap.end()) {
            labelMap[label] = i++;
        }
--- a/tests/ArffFiles.h
+++ b/tests/ArffFiles.h
@@ -1,27 +1,35 @@
 #ifndef ARFFFILES_H
 #define ARFFFILES_H
+
 #include <string>
 #include <vector>
+#include "../typesFImdlp.h"
+
 using namespace std;
+
 class ArffFiles {
 private:
    vector<string> lines;
    vector<pair<string, string>> attributes;
-    string className, classType;
-    vector<vector<float>> X;
+    string className;
+    string classType;
+    vector<mdlp::samples_t> X;
    vector<int> y;
+
    void generateDataset(bool);
+
 public:
    ArffFiles();
-    void load(string, bool = true);
-    vector<string> getLines();
-    unsigned long int getSize();
-    string getClassName();
-    string getClassType();
-    string trim(const string&);
-    vector<vector<float>>& getX();
+    void load(const string&, bool = true);
+    vector<string> getLines() const;
+    unsigned long int getSize() const;
+    string getClassName() const;
+    string getClassType() const;
+    static string trim(const string&);
+    vector<mdlp::samples_t>& getX();
    vector<int>& getY();
-    vector<pair<string, string>> getAttributes();
-    vector<int> factorize(const vector<string>& labels_t);
+    vector<pair<string, string>> getAttributes() const;
+    static vector<int> factorize(const vector<string>& labels_t);
 };
+
 #endif
--- a/tests/BinDisc_unittest.cpp
+++ b/tests/BinDisc_unittest.cpp
@@ -0,0 +1,351 @@
+#include <fstream>
+#include <string>
+#include <iostream>
+#include "gtest/gtest.h"
+#include "ArffFiles.h"
+#include "../BinDisc.h"
+
+namespace mdlp {
+    const float margin = 1e-4;
+    static std::string set_data_path()
+    {
+        std::string path = "../datasets/";
+        std::ifstream file(path + "iris.arff");
+        if (file.is_open()) {
+            file.close();
+            return path;
+        }
+        return "../../tests/datasets/";
+    }
+    const std::string data_path = set_data_path();
+    class TestBinDisc3U : public BinDisc, public testing::Test {
+    public:
+        TestBinDisc3U(int n_bins = 3) : BinDisc(n_bins, strategy_t::UNIFORM) {};
+    };
+    class TestBinDisc3Q : public BinDisc, public testing::Test {
+    public:
+        TestBinDisc3Q(int n_bins = 3) : BinDisc(n_bins, strategy_t::QUANTILE) {};
+    };
+    class TestBinDisc4U : public BinDisc, public testing::Test {
+    public:
+        TestBinDisc4U(int n_bins = 4) : BinDisc(n_bins, strategy_t::UNIFORM) {};
+    };
+    class TestBinDisc4Q : public BinDisc, public testing::Test {
+    public:
+        TestBinDisc4Q(int n_bins = 4) : BinDisc(n_bins, strategy_t::QUANTILE) {};
+    };
+    TEST_F(TestBinDisc3U, Easy3BinsUniform)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_NEAR(3.66667, cuts[0], margin);
+        EXPECT_NEAR(6.33333, cuts[1], margin);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_EQ(3, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc3Q, Easy3BinsQuantile)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_NEAR(3.666667, cuts[0], margin);
+        EXPECT_NEAR(6.333333, cuts[1], margin);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_EQ(3, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc3U, X10BinsUniform)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(4.0, cuts[0]);
+        EXPECT_EQ(7.0, cuts[1]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_EQ(3, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc3Q, X10BinsQuantile)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(4, cuts[0]);
+        EXPECT_EQ(7, cuts[1]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_EQ(3, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc3U, X11BinsUniform)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_NEAR(4.33333, cuts[0], margin);
+        EXPECT_NEAR(7.66667, cuts[1], margin);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_EQ(3, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc3U, X11BinsQuantile)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_NEAR(4.33333, cuts[0], margin);
+        EXPECT_NEAR(7.66667, cuts[1], margin);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_EQ(3, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc3U, ConstantUniform)
+    {
+        samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
+        EXPECT_EQ(1, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 0, 0, 0 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc3Q, ConstantQuantile)
+    {
+        samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
+        EXPECT_EQ(1, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 0, 0, 0 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc3U, EmptyUniform)
+    {
+        samples_t X = {};
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
+        EXPECT_EQ(1, cuts.size());
+    }
+    TEST_F(TestBinDisc3Q, EmptyQuantile)
+    {
+        samples_t X = {};
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[0]);
+        EXPECT_EQ(1, cuts.size());
+    }
+    TEST(TestBinDisc3, ExceptionNumberBins)
+    {
+        EXPECT_THROW(BinDisc(2), std::invalid_argument);
+    }
+    TEST_F(TestBinDisc3U, EasyRepeated)
+    {
+        samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_NEAR(1.66667, cuts[0], margin);
+        EXPECT_NEAR(2.33333, cuts[1], margin);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_EQ(3, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 };
+        EXPECT_EQ(expected, labels);
+        EXPECT_EQ(3.0, X[0]); // X is not modified
+    }
+    TEST_F(TestBinDisc3Q, EasyRepeated)
+    {
+        samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        std::cout << "cuts: ";
+        for (auto cut : cuts) {
+            std::cout << cut << " ";
+        }
+        std::cout << std::endl;
+        std::cout << std::string(80, '-') << std::endl;
+        EXPECT_NEAR(1.66667, cuts[0], margin);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[1]);
+        EXPECT_EQ(2, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 };
+        EXPECT_EQ(expected, labels);
+        EXPECT_EQ(3.0, X[0]); // X is not modified
+    }
+    TEST_F(TestBinDisc4U, Easy4BinsUniform)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(3.75, cuts[0]);
+        EXPECT_EQ(6.5, cuts[1]);
+        EXPECT_EQ(9.25, cuts[2]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_EQ(4, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4Q, Easy4BinsQuantile)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(3.75, cuts[0]);
+        EXPECT_EQ(6.5, cuts[1]);
+        EXPECT_EQ(9.25, cuts[2]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_EQ(4, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4U, X13BinsUniform)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(4.0, cuts[0]);
+        EXPECT_EQ(7.0, cuts[1]);
+        EXPECT_EQ(10.0, cuts[2]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_EQ(4, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4Q, X13BinsQuantile)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(4.0, cuts[0]);
+        EXPECT_EQ(7.0, cuts[1]);
+        EXPECT_EQ(10.0, cuts[2]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_EQ(4, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4U, X14BinsUniform)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(4.25, cuts[0]);
+        EXPECT_EQ(7.5, cuts[1]);
+        EXPECT_EQ(10.75, cuts[2]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_EQ(4, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4Q, X14BinsQuantile)
+    {
+        samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(4.25, cuts[0]);
+        EXPECT_EQ(7.5, cuts[1]);
+        EXPECT_EQ(10.75, cuts[2]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_EQ(4, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4U, X15BinsUniform)
+    {
+        samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(4.5, cuts[0]);
+        EXPECT_EQ(8, cuts[1]);
+        EXPECT_EQ(11.5, cuts[2]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_EQ(4, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4Q, X15BinsQuantile)
+    {
+        samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(4.5, cuts[0]);
+        EXPECT_EQ(8, cuts[1]);
+        EXPECT_EQ(11.5, cuts[2]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_EQ(4, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4U, RepeatedValuesUniform)
+    {
+        samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 };
+        //               0    1     2   3    4    5    6    7    8    9
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(1.0, cuts[0]);
+        EXPECT_EQ(2.0, cuts[1]);
+        EXPECT_EQ(3.0, cuts[2]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[3]);
+        EXPECT_EQ(4, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4Q, RepeatedValuesQuantile)
+    {
+        samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 };
+        //               0    1     2   3    4    5    6    7    8    9
+        fit(X);
+        auto cuts = getCutPoints();
+        EXPECT_EQ(2.0, cuts[0]);
+        EXPECT_EQ(3.0, cuts[1]);
+        EXPECT_EQ(numeric_limits<float>::max(), cuts[2]);
+        EXPECT_EQ(3, cuts.size());
+        auto labels = transform(X);
+        labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 };
+        EXPECT_EQ(expected, labels);
+    }
+    TEST_F(TestBinDisc4U, irisUniform)
+    {
+        ArffFiles file;
+        file.load(data_path + "iris.arff", true);
+        vector<samples_t>& X = file.getX();
+        fit(X[0]);
+        auto Xt = transform(X[0]);
+        labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
+        EXPECT_EQ(expected, Xt);
+    }
+    TEST_F(TestBinDisc4Q, irisQuantile)
+    {
+        ArffFiles file;
+        file.load(data_path + "iris.arff", true);
+        vector<samples_t>& X = file.getX();
+        fit(X[0]);
+        auto Xt = transform(X[0]);
+        labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
+        EXPECT_EQ(expected, Xt);
+    }
+}
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,15 +1,13 @@
-cmake_minimum_required(VERSION 3.14)
-project(FImdlp)
-
-# GoogleTest requires at least C++14
-set(CMAKE_CXX_STANDARD 14)
+cmake_minimum_required(VERSION 3.20)
+set(CMAKE_CXX_STANDARD 11)
 include(FetchContent)

 include_directories(${GTEST_INCLUDE_DIRS})

+
 FetchContent_Declare(
-  googletest
-  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
+        googletest
+        URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
 )
 # For Windows: Prevent overriding the parent project's compiler/linker settings
 set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
@@ -18,15 +16,19 @@ FetchContent_MakeAvailable(googletest)
 enable_testing()

 add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
-add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
+add_executable(FImdlp_unittest ../CPPFImdlp.cpp ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
+add_executable(BinDisc_unittest ../BinDisc.cpp ArffFiles.cpp BinDisc_unittest.cpp)
 target_link_libraries(Metrics_unittest GTest::gtest_main)
 target_link_libraries(FImdlp_unittest GTest::gtest_main)
+target_link_libraries(BinDisc_unittest GTest::gtest_main)
 target_compile_options(Metrics_unittest PRIVATE --coverage)
 target_compile_options(FImdlp_unittest PRIVATE --coverage)
+target_compile_options(BinDisc_unittest PRIVATE --coverage)
 target_link_options(Metrics_unittest PRIVATE --coverage)
 target_link_options(FImdlp_unittest PRIVATE --coverage)
+target_link_options(BinDisc_unittest PRIVATE --coverage)

 include(GoogleTest)
 gtest_discover_tests(Metrics_unittest)
 gtest_discover_tests(FImdlp_unittest)
-
+gtest_discover_tests(BinDisc_unittest)
--- a/tests/FImdlp_unittest.cpp
+++ b/tests/FImdlp_unittest.cpp
@@ -1,20 +1,48 @@
 #include "gtest/gtest.h"
 #include "../Metrics.h"
 #include "../CPPFImdlp.h"
-#include "ArffFiles.h"
+#include <fstream>
 #include <iostream>
+#include "ArffFiles.h"
+
+#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
+try { \
+stmt; \
+} catch (const etype& ex) { \
+EXPECT_EQ(whatstring, std::string(ex.what())); \
+throw; \
+} \
+, etype)

 namespace mdlp {
-    class TestFImdlp: public CPPFImdlp, public testing::Test {
+    class TestFImdlp : public CPPFImdlp, public testing::Test {
    public:
-        precision_t precision = 0.000001;
-        TestFImdlp(): CPPFImdlp() {}
-        void SetUp()
+        precision_t precision = 0.000001f;
+
+        TestFImdlp() : CPPFImdlp() {}
+
+        string data_path;
+
+        void SetUp() override
        {
-            X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
+            X = { 4.7f, 4.7f, 4.7f, 4.7f, 4.8f, 4.8f, 4.8f, 4.8f, 4.9f, 4.95f, 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f,
+                 6.0f, 5.1f, 5.9f };
            y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
            fit(X, y);
+            data_path = set_data_path();
        }
+
+        static string set_data_path()
+        {
+            string path = "../datasets/";
+            ifstream file(path + "iris.arff");
+            if (file.is_open()) {
+                file.close();
+                return path;
+            }
+            return "../../tests/datasets/";
+        }
+
        void checkSortedVector()
        {
            indices_t testSortedIndices = sortIndices(X, y);
@@ -25,24 +53,17 @@ namespace mdlp {
                prev = X[testSortedIndices[i]];
            }
        }
-        void checkCutPoints(cutPoints_t& expected)
+
+        void checkCutPoints(cutPoints_t& computed, cutPoints_t& expected) const
        {
-            int expectedSize = expected.size();
-            EXPECT_EQ(cutPoints.size(), expectedSize);
-            for (unsigned long i = 0; i < cutPoints.size(); i++) {
-                EXPECT_NEAR(cutPoints[i], expected[i], precision);
+            EXPECT_EQ(computed.size(), expected.size());
+            for (unsigned long i = 0; i < computed.size(); i++) {
+                cout << "(" << computed[i] << ", " << expected[i] << ") ";
+                EXPECT_NEAR(computed[i], expected[i], precision);
            }
        }
-        template<typename T, typename A>
-        void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
-        {
-            EXPECT_EQ(expected.size(), computed.size());
-            ASSERT_EQ(expected.size(), computed.size());
-            for (auto i = 0; i < expected.size(); i++) {
-                EXPECT_NEAR(expected[i], computed[i], precision);
-            }
-        }
-        bool test_result(samples_t& X_, size_t cut, float midPoint, size_t limit, string title)
+
+        bool test_result(const samples_t& X_, size_t cut, float midPoint, size_t limit, const string& title)
        {
            pair<precision_t, size_t> result;
            labels_t y_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
@@ -55,37 +76,90 @@ namespace mdlp {
            EXPECT_EQ(result.second, limit);
            return true;
        }
+
+        void test_dataset(CPPFImdlp& test, const string& filename, vector<cutPoints_t>& expected,
+            vector<int>& depths) const
+        {
+            ArffFiles file;
+            file.load(data_path + filename + ".arff", true);
+            vector<samples_t>& X = file.getX();
+            labels_t& y = file.getY();
+            auto attributes = file.getAttributes();
+            for (auto feature = 0; feature < attributes.size(); feature++) {
+                test.fit(X[feature], y);
+                EXPECT_EQ(test.get_depth(), depths[feature]);
+                auto computed = test.getCutPoints();
+                cout << "Feature " << feature << ": ";
+                checkCutPoints(computed, expected[feature]);
+                cout << endl;
+            }
+        }
    };
+
    TEST_F(TestFImdlp, FitErrorEmptyDataset)
    {
        X = samples_t();
        y = labels_t();
-        EXPECT_THROW(fit(X, y), std::invalid_argument);
+        EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have at least one element");
    }
+
    TEST_F(TestFImdlp, FitErrorDifferentSize)
    {
        X = { 1, 2, 3 };
        y = { 1, 2 };
-        EXPECT_THROW(fit(X, y), std::invalid_argument);
+        EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size");
    }
+
+    TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth)
+    {
+        auto testLength = CPPFImdlp(2, 10, 0);
+        auto testDepth = CPPFImdlp(3, 0, 0);
+        X = { 1, 2, 3 };
+        y = { 1, 2, 3 };
+        EXPECT_THROW_WITH_MESSAGE(testLength.fit(X, y), invalid_argument, "min_length must be greater than 2");
+        EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0");
+    }
+
+    TEST_F(TestFImdlp, JoinFit)
+    {
+        samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 };
+        labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 };
+        cutPoints_t expected = { 1.5f, 2.5f };
+        fit(X_, y_);
+        auto computed = getCutPoints();
+        EXPECT_EQ(computed.size(), expected.size());
+        checkCutPoints(computed, expected);
+    }
+
+    TEST_F(TestFImdlp, FitErrorMaxCutPoints)
+    {
+        auto testmin = CPPFImdlp(2, 10, -1);
+        auto testmax = CPPFImdlp(3, 0, 200);
+        X = { 1, 2, 3 };
+        y = { 1, 2, 3 };
+        EXPECT_THROW_WITH_MESSAGE(testmin.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
+        EXPECT_THROW_WITH_MESSAGE(testmax.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
+    }
+
    TEST_F(TestFImdlp, SortIndices)
    {
-        X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
+        X = { 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f, 6.0f, 5.1f, 5.9f };
        y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
        indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
        checkSortedVector();
-        X = { 5.77, 5.88, 5.99 };
+        X = { 5.77f, 5.88f, 5.99f };
        y = { 1, 2, 1 };
        indices = { 0, 1, 2 };
        checkSortedVector();
-        X = { 5.33, 5.22, 5.11 };
+        X = { 5.33f, 5.22f, 5.11f };
        y = { 1, 2, 1 };
        indices = { 2, 1, 0 };
        checkSortedVector();
-        X = { 5.33, 5.22, 5.33 };
+        X = { 5.33f, 5.22f, 5.33f };
        y = { 2, 2, 1 };
        indices = { 1, 2, 0 };
    }
+
    TEST_F(TestFImdlp, TestShortDatasets)
    {
        vector<precision_t> computed;
@@ -111,44 +185,31 @@ namespace mdlp {
        EXPECT_EQ(computed.size(), 1);
        EXPECT_NEAR(computed[0], 1.5, precision);
    }
+
    TEST_F(TestFImdlp, TestArtificialDataset)
    {
        fit(X, y);
-        computeCutPoints(0, 20);
-        cutPoints_t expected = { 5.05 };
+        cutPoints_t expected = { 5.05f };
        vector<precision_t> computed = getCutPoints();
-        computed = getCutPoints();
-        int expectedSize = expected.size();
        EXPECT_EQ(computed.size(), expected.size());
        for (unsigned long i = 0; i < computed.size(); i++) {
            EXPECT_NEAR(computed[i], expected[i], precision);
        }
    }
+
    TEST_F(TestFImdlp, TestIris)
    {
-        ArffFiles file;
-        string path = "../datasets/";
-
-        file.load(path + "iris.arff", true);
-        int items = file.getSize();
-        vector<samples_t>& X = file.getX();
        vector<cutPoints_t> expected = {
-            { 5.4499998092651367, 5.75 },
-            { 2.75, 2.85, 2.95, 3.05, 3.35 },
-            { 2.4500000476837158, 4.75, 5.0500001907348633 },
-            { 0.80000001192092896, 1.75 }
+                {5.45f, 5.75f},
+                {2.75f, 2.85f, 2.95f, 3.05f, 3.35f},
+                {2.45f, 4.75f, 5.05f},
+                {0.8f,  1.75f}
        };
-        labels_t& y = file.getY();
-        auto attributes = file.getAttributes();
-        for (auto feature = 0; feature < attributes.size(); feature++) {
-            fit(X[feature], y);
-            vector<precision_t> computed = getCutPoints();
-            EXPECT_EQ(computed.size(), expected[feature].size());
-            for (auto i = 0; i < computed.size(); i++) {
-                EXPECT_NEAR(computed[i], expected[feature][i], precision);
-            }
-        }
+        vector<int> depths = { 3, 5, 4, 3 };
+        auto test = CPPFImdlp();
+        test_dataset(test, "iris", expected, depths);
    }
+
    TEST_F(TestFImdlp, ComputeCutPointsGCase)
    {
        cutPoints_t expected;
@@ -156,26 +217,138 @@ namespace mdlp {
        samples_t X_ = { 0, 1, 2, 2, 2 };
        labels_t y_ = { 1, 1, 1, 2, 2 };
        fit(X_, y_);
-        checkCutPoints(expected);
+        auto computed = getCutPoints();
+        checkCutPoints(computed, expected);
    }
+
    TEST_F(TestFImdlp, ValueCutPoint)
    {
        // Case titles as stated in the doc
-        samples_t X1a{ 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0 };
-        test_result(X1a, 6, 7.3 / 2, 6, "1a");
-        samples_t X2a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
-        test_result(X2a, 6, 7.1 / 2, 4, "2a");
-        samples_t X2b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
-        test_result(X2b, 6, 7.5 / 2, 7, "2b");
-        samples_t X3a = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
-        test_result(X3a, 4, 7.1 / 2, 4, "3a");
-        samples_t X3b = { 3.1, 3.2, 3.3, 3.4, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 };
-        test_result(X3b, 4, 7.1 / 2, 4, "3b");
-        samples_t X4a = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.9, 4.0 };
-        test_result(X4a, 4, 6.9 / 2, 2, "4a");
-        samples_t X4b = { 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.8, 3.9, 4.0 };
-        test_result(X4b, 4, 7.5 / 2, 7, "4b");
-        samples_t X4c = { 3.1, 3.2, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7 };
-        test_result(X4c, 4, 6.9 / 2, 2, "4c");
+        samples_t X1a{ 3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.0f };
+        test_result(X1a, 6, 7.3f / 2, 6, "1a");
+        samples_t X2a = { 3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
+        test_result(X2a, 6, 7.1f / 2, 4, "2a");
+        samples_t X2b = { 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
+        test_result(X2b, 6, 7.5f / 2, 7, "2b");
+        samples_t X3a = { 3.f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
+        test_result(X3a, 4, 7.1f / 2, 4, "3a");
+        samples_t X3b = { 3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f };
+        test_result(X3b, 4, 7.1f / 2, 4, "3b");
+        samples_t X4a = { 3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.9f, 4.0f };
+        test_result(X4a, 4, 6.9f / 2, 2, "4a");
+        samples_t X4b = { 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
+        test_result(X4b, 4, 7.5f / 2, 7, "4b");
+        samples_t X4c = { 3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f };
+        test_result(X4c, 4, 6.9f / 2, 2, "4c");
+    }
+
+    TEST_F(TestFImdlp, MaxDepth)
+    {
+        // Set max_depth to 1
+        auto test = CPPFImdlp(3, 1, 0);
+        vector<cutPoints_t> expected = {
+                {5.45f},
+                {3.35f},
+                {2.45f},
+                {0.8f}
+        };
+        vector<int> depths = { 1, 1, 1, 1 };
+        test_dataset(test, "iris", expected, depths);
+    }
+
+    TEST_F(TestFImdlp, MinLength)
+    {
+        auto test = CPPFImdlp(75, 100, 0);
+        // Set min_length to 75
+        vector<cutPoints_t> expected = {
+                {5.45f, 5.75f},
+                {2.85f, 3.35f},
+                {2.45f, 4.75f},
+                {0.8f,  1.75f}
+        };
+        vector<int> depths = { 3, 2, 2, 2 };
+        test_dataset(test, "iris", expected, depths);
+    }
+
+    TEST_F(TestFImdlp, MinLengthMaxDepth)
+    {
+        // Set min_length to 75
+        auto test = CPPFImdlp(75, 2, 0);
+        vector<cutPoints_t> expected = {
+                {5.45f, 5.75f},
+                {2.85f, 3.35f},
+                {2.45f, 4.75f},
+                {0.8f,  1.75f}
+        };
+        vector<int> depths = { 2, 2, 2, 2 };
+        test_dataset(test, "iris", expected, depths);
+    }
+
+    TEST_F(TestFImdlp, MaxCutPointsInteger)
+    {
+        // Set min_length to 75
+        auto test = CPPFImdlp(75, 2, 1);
+        vector<cutPoints_t> expected = {
+                {5.45f},
+                {2.85f},
+                {2.45f},
+                {0.8f}
+        };
+        vector<int> depths = { 2, 2, 2, 2 };
+        test_dataset(test, "iris", expected, depths);
+
+    }
+
+    TEST_F(TestFImdlp, MaxCutPointsFloat)
+    {
+        // Set min_length to 75
+        auto test = CPPFImdlp(75, 2, 0.2f);
+        vector<cutPoints_t> expected = {
+                {5.45f, 5.75f},
+                {2.85f, 3.35f},
+                {2.45f, 4.75f},
+                {0.8f,  1.75f}
+        };
+        vector<int> depths = { 2, 2, 2, 2 };
+        test_dataset(test, "iris", expected, depths);
+    }
+
+    TEST_F(TestFImdlp, ProposedCuts)
+    {
+        vector<pair<float, size_t>> proposed_list = { {0.1f,  2},
+                                                     {0.5f,  10},
+                                                     {0.07f, 1},
+                                                     {1.0f,  1},
+                                                     {2.0f,  2} };
+        size_t expected;
+        size_t computed;
+        for (auto proposed_item : proposed_list) {
+            tie(proposed_cuts, expected) = proposed_item;
+            computed = compute_max_num_cut_points();
+            ASSERT_EQ(expected, computed);
+        }
+
+    }
+    TEST_F(TestFImdlp, TransformTest)
+    {
+        labels_t expected = {
+            5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
+            5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5,
+            5, 3, 5, 4, 5, 4, 4, 4, 4, 0, 1, 1, 4, 0, 2, 0, 0, 3, 0, 2, 2, 4,
+            3, 0, 0, 0, 4, 1, 0, 1, 2, 3, 1, 3, 2, 0, 0, 0, 0, 0, 3, 5, 4, 0,
+            3, 0, 0, 3, 0, 0, 0, 3, 2, 2, 0, 1, 4, 0, 3, 2, 3, 3, 0, 2, 0, 5,
+            4, 0, 3, 0, 1, 4, 3, 5, 0, 0, 4, 1, 1, 0, 4, 4, 1, 3, 1, 3, 1, 5,
+            1, 1, 0, 3, 5, 4, 3, 4, 4, 4, 0, 4, 4, 3, 0, 3, 5, 3
+        };
+        ArffFiles file;
+        file.load(data_path + "iris.arff", true);
+        vector<samples_t>& X = file.getX();
+        labels_t& y = file.getY();
+        fit(X[1], y);
+        auto computed = transform(X[1]);
+        EXPECT_EQ(computed.size(), expected.size());
+        for (unsigned long i = 0; i < computed.size(); i++) {
+            EXPECT_EQ(computed[i], expected[i]);
+        }
    }
 }
--- a/tests/Metrics_unittest.cpp
+++ b/tests/Metrics_unittest.cpp
@@ -1,23 +1,21 @@
 #include "gtest/gtest.h"
 #include "../Metrics.h"

-
 namespace mdlp {
    class TestMetrics: public Metrics, public testing::Test {
    public:
-        labels_t y;
-        samples_t X;
-        indices_t indices;
-        precision_t precision = 0.000001;
+        labels_t y_ = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
+        indices_t indices_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        precision_t precision = 0.000001f;

-        TestMetrics(): Metrics(y, indices) {}
-        void SetUp()
+        TestMetrics(): Metrics(y_, indices_) {};
+
+        void SetUp() override
        {
-            y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
-            indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
-            setData(y, indices);
+            setData(y_, indices_);
        }
    };
+
    TEST_F(TestMetrics, NumClasses)
    {
        y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
@@ -25,19 +23,31 @@ namespace mdlp {
        EXPECT_EQ(2, computeNumClasses(0, 10));
        EXPECT_EQ(2, computeNumClasses(8, 10));
    }
+
    TEST_F(TestMetrics, Entropy)
    {
        EXPECT_EQ(1, entropy(0, 10));
        EXPECT_EQ(0, entropy(0, 5));
        y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
        setData(y, indices);
-        ASSERT_NEAR(0.468996, entropy(0, 10), precision);
+        ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
    }
+
+    TEST_F(TestMetrics, EntropyDouble)
+    {
+        y = { 0, 0, 1, 2, 3 };
+        samples_t expected_entropies = { 0.0, 0.0, 0.91829583, 1.5, 1.4575424759098898 };
+        for (auto idx = 0; idx < y.size(); ++idx) {
+            ASSERT_NEAR(expected_entropies[idx], entropy(0, idx + 1), precision);
+        }
+    }
+
    TEST_F(TestMetrics, InformationGain)
    {
        ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
+        ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache
        y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
        setData(y, indices);
-        ASSERT_NEAR(0.108032, informationGain(0, 5, 10), precision);
+        ASSERT_NEAR(0.108032f, informationGain(0, 5, 10), precision);
    }
 }
--- a/tests/cover
+++ b/tests/cover
@@ -1,4 +0,0 @@
-rm -fr lcoverage/*
-lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
-genhtml lcoverage/main_coverage.info --output-directory lcoverage
-open lcoverage/index.html
--- a/tests/datasets/diabetes.arff
+++ b/tests/datasets/diabetes.arff
@@ -0,0 +1,863 @@
+% 1. Title: Pima Indians Diabetes Database
+% 
+% 2. Sources:
+%    (a) Original owners: National Institute of Diabetes and Digestive and
+%                         Kidney Diseases
+%    (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu)
+%                           Research Center, RMI Group Leader
+%                           Applied Physics Laboratory
+%                           The Johns Hopkins University
+%                           Johns Hopkins Road
+%                           Laurel, MD 20707
+%                           (301) 953-6231
+%    (c) Date received: 9 May 1990
+% 
+% 3. Past Usage:
+%     1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \&
+%        Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast
+%        the onset of diabetes mellitus.  In {\it Proceedings of the Symposium
+%        on Computer Applications and Medical Care} (pp. 261--265).  IEEE
+%        Computer Society Press.
+% 
+%        The diagnostic, binary-valued variable investigated is whether the
+%        patient shows signs of diabetes according to World Health Organization
+%        criteria (i.e., if the 2 hour post-load plasma glucose was at least 
+%        200 mg/dl at any survey  examination or if found during routine medical
+%        care).   The population lives near Phoenix, Arizona, USA.
+% 
+%        Results: Their ADAP algorithm makes a real-valued prediction between
+%        0 and 1.  This was transformed into a binary decision using a cutoff of 
+%        0.448.  Using 576 training instances, the sensitivity and specificity
+%        of their algorithm was 76% on the remaining 192 instances.
+% 
+% 4. Relevant Information:
+%       Several constraints were placed on the selection of these instances from
+%       a larger database.  In particular, all patients here are females at
+%       least 21 years old of Pima Indian heritage.  ADAP is an adaptive learning
+%       routine that generates and executes digital analogs of perceptron-like
+%       devices.  It is a unique algorithm; see the paper for details.
+% 
+% 5. Number of Instances: 768
+% 
+% 6. Number of Attributes: 8 plus class 
+% 
+% 7. For Each Attribute: (all numeric-valued)
+%    1. Number of times pregnant
+%    2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
+%    3. Diastolic blood pressure (mm Hg)
+%    4. Triceps skin fold thickness (mm)
+%    5. 2-Hour serum insulin (mu U/ml)
+%    6. Body mass index (weight in kg/(height in m)^2)
+%    7. Diabetes pedigree function
+%    8. Age (years)
+%    9. Class variable (0 or 1)
+% 
+% 8. Missing Attribute Values: None
+% 
+% 9. Class Distribution: (class value 1 is interpreted as "tested positive for
+%    diabetes")
+% 
+%    Class Value  Number of instances
+%    0            500
+%    1            268
+% 
+% 10. Brief statistical analysis:
+% 
+%     Attribute number:    Mean:   Standard Deviation:
+%     1.                     3.8     3.4
+%     2.                   120.9    32.0
+%     3.                    69.1    19.4
+%     4.                    20.5    16.0
+%     5.                    79.8   115.2
+%     6.                    32.0     7.9
+%     7.                     0.5     0.3
+%     8.                    33.2    11.8
+% 
+% 
+%
+%
+%
+%
+% Relabeled values in attribute 'class'
+%    From: 0                       To: tested_negative     
+%    From: 1                       To: tested_positive     
+%
+@relation pima_diabetes
+@attribute 'preg' real
+@attribute 'plas' real
+@attribute 'pres' real
+@attribute 'skin' real
+@attribute 'insu' real
+@attribute 'mass' real
+@attribute 'pedi' real
+@attribute 'age' real
+@attribute 'class' { tested_negative, tested_positive}
+@data
+6,148,72,35,0,33.6,0.627,50,tested_positive
+1,85,66,29,0,26.6,0.351,31,tested_negative
+8,183,64,0,0,23.3,0.672,32,tested_positive
+1,89,66,23,94,28.1,0.167,21,tested_negative
+0,137,40,35,168,43.1,2.288,33,tested_positive
+5,116,74,0,0,25.6,0.201,30,tested_negative
+3,78,50,32,88,31,0.248,26,tested_positive
+10,115,0,0,0,35.3,0.134,29,tested_negative
+2,197,70,45,543,30.5,0.158,53,tested_positive
+8,125,96,0,0,0,0.232,54,tested_positive
+4,110,92,0,0,37.6,0.191,30,tested_negative
+10,168,74,0,0,38,0.537,34,tested_positive
+10,139,80,0,0,27.1,1.441,57,tested_negative
+1,189,60,23,846,30.1,0.398,59,tested_positive
+5,166,72,19,175,25.8,0.587,51,tested_positive
+7,100,0,0,0,30,0.484,32,tested_positive
+0,118,84,47,230,45.8,0.551,31,tested_positive
+7,107,74,0,0,29.6,0.254,31,tested_positive
+1,103,30,38,83,43.3,0.183,33,tested_negative
+1,115,70,30,96,34.6,0.529,32,tested_positive
+3,126,88,41,235,39.3,0.704,27,tested_negative
+8,99,84,0,0,35.4,0.388,50,tested_negative
+7,196,90,0,0,39.8,0.451,41,tested_positive
+9,119,80,35,0,29,0.263,29,tested_positive
+11,143,94,33,146,36.6,0.254,51,tested_positive
+10,125,70,26,115,31.1,0.205,41,tested_positive
+7,147,76,0,0,39.4,0.257,43,tested_positive
+1,97,66,15,140,23.2,0.487,22,tested_negative
+13,145,82,19,110,22.2,0.245,57,tested_negative
+5,117,92,0,0,34.1,0.337,38,tested_negative
+5,109,75,26,0,36,0.546,60,tested_negative
+3,158,76,36,245,31.6,0.851,28,tested_positive
+3,88,58,11,54,24.8,0.267,22,tested_negative
+6,92,92,0,0,19.9,0.188,28,tested_negative
+10,122,78,31,0,27.6,0.512,45,tested_negative
+4,103,60,33,192,24,0.966,33,tested_negative
+11,138,76,0,0,33.2,0.42,35,tested_negative
+9,102,76,37,0,32.9,0.665,46,tested_positive
+2,90,68,42,0,38.2,0.503,27,tested_positive
+4,111,72,47,207,37.1,1.39,56,tested_positive
+3,180,64,25,70,34,0.271,26,tested_negative
+7,133,84,0,0,40.2,0.696,37,tested_negative
+7,106,92,18,0,22.7,0.235,48,tested_negative
+9,171,110,24,240,45.4,0.721,54,tested_positive
+7,159,64,0,0,27.4,0.294,40,tested_negative
+0,180,66,39,0,42,1.893,25,tested_positive
+1,146,56,0,0,29.7,0.564,29,tested_negative
+2,71,70,27,0,28,0.586,22,tested_negative
+7,103,66,32,0,39.1,0.344,31,tested_positive
+7,105,0,0,0,0,0.305,24,tested_negative
+1,103,80,11,82,19.4,0.491,22,tested_negative
+1,101,50,15,36,24.2,0.526,26,tested_negative
+5,88,66,21,23,24.4,0.342,30,tested_negative
+8,176,90,34,300,33.7,0.467,58,tested_positive
+7,150,66,42,342,34.7,0.718,42,tested_negative
+1,73,50,10,0,23,0.248,21,tested_negative
+7,187,68,39,304,37.7,0.254,41,tested_positive
+0,100,88,60,110,46.8,0.962,31,tested_negative
+0,146,82,0,0,40.5,1.781,44,tested_negative
+0,105,64,41,142,41.5,0.173,22,tested_negative
+2,84,0,0,0,0,0.304,21,tested_negative
+8,133,72,0,0,32.9,0.27,39,tested_positive
+5,44,62,0,0,25,0.587,36,tested_negative
+2,141,58,34,128,25.4,0.699,24,tested_negative
+7,114,66,0,0,32.8,0.258,42,tested_positive
+5,99,74,27,0,29,0.203,32,tested_negative
+0,109,88,30,0,32.5,0.855,38,tested_positive
+2,109,92,0,0,42.7,0.845,54,tested_negative
+1,95,66,13,38,19.6,0.334,25,tested_negative
+4,146,85,27,100,28.9,0.189,27,tested_negative
+2,100,66,20,90,32.9,0.867,28,tested_positive
+5,139,64,35,140,28.6,0.411,26,tested_negative
+13,126,90,0,0,43.4,0.583,42,tested_positive
+4,129,86,20,270,35.1,0.231,23,tested_negative
+1,79,75,30,0,32,0.396,22,tested_negative
+1,0,48,20,0,24.7,0.14,22,tested_negative
+7,62,78,0,0,32.6,0.391,41,tested_negative
+5,95,72,33,0,37.7,0.37,27,tested_negative
+0,131,0,0,0,43.2,0.27,26,tested_positive
+2,112,66,22,0,25,0.307,24,tested_negative
+3,113,44,13,0,22.4,0.14,22,tested_negative
+2,74,0,0,0,0,0.102,22,tested_negative
+7,83,78,26,71,29.3,0.767,36,tested_negative
+0,101,65,28,0,24.6,0.237,22,tested_negative
+5,137,108,0,0,48.8,0.227,37,tested_positive
+2,110,74,29,125,32.4,0.698,27,tested_negative
+13,106,72,54,0,36.6,0.178,45,tested_negative
+2,100,68,25,71,38.5,0.324,26,tested_negative
+15,136,70,32,110,37.1,0.153,43,tested_positive
+1,107,68,19,0,26.5,0.165,24,tested_negative
+1,80,55,0,0,19.1,0.258,21,tested_negative
+4,123,80,15,176,32,0.443,34,tested_negative
+7,81,78,40,48,46.7,0.261,42,tested_negative
+4,134,72,0,0,23.8,0.277,60,tested_positive
+2,142,82,18,64,24.7,0.761,21,tested_negative
+6,144,72,27,228,33.9,0.255,40,tested_negative
+2,92,62,28,0,31.6,0.13,24,tested_negative
+1,71,48,18,76,20.4,0.323,22,tested_negative
+6,93,50,30,64,28.7,0.356,23,tested_negative
+1,122,90,51,220,49.7,0.325,31,tested_positive
+1,163,72,0,0,39,1.222,33,tested_positive
+1,151,60,0,0,26.1,0.179,22,tested_negative
+0,125,96,0,0,22.5,0.262,21,tested_negative
+1,81,72,18,40,26.6,0.283,24,tested_negative
+2,85,65,0,0,39.6,0.93,27,tested_negative
+1,126,56,29,152,28.7,0.801,21,tested_negative
+1,96,122,0,0,22.4,0.207,27,tested_negative
+4,144,58,28,140,29.5,0.287,37,tested_negative
+3,83,58,31,18,34.3,0.336,25,tested_negative
+0,95,85,25,36,37.4,0.247,24,tested_positive
+3,171,72,33,135,33.3,0.199,24,tested_positive
+8,155,62,26,495,34,0.543,46,tested_positive
+1,89,76,34,37,31.2,0.192,23,tested_negative
+4,76,62,0,0,34,0.391,25,tested_negative
+7,160,54,32,175,30.5,0.588,39,tested_positive
+4,146,92,0,0,31.2,0.539,61,tested_positive
+5,124,74,0,0,34,0.22,38,tested_positive
+5,78,48,0,0,33.7,0.654,25,tested_negative
+4,97,60,23,0,28.2,0.443,22,tested_negative
+4,99,76,15,51,23.2,0.223,21,tested_negative
+0,162,76,56,100,53.2,0.759,25,tested_positive
+6,111,64,39,0,34.2,0.26,24,tested_negative
+2,107,74,30,100,33.6,0.404,23,tested_negative
+5,132,80,0,0,26.8,0.186,69,tested_negative
+0,113,76,0,0,33.3,0.278,23,tested_positive
+1,88,30,42,99,55,0.496,26,tested_positive
+3,120,70,30,135,42.9,0.452,30,tested_negative
+1,118,58,36,94,33.3,0.261,23,tested_negative
+1,117,88,24,145,34.5,0.403,40,tested_positive
+0,105,84,0,0,27.9,0.741,62,tested_positive
+4,173,70,14,168,29.7,0.361,33,tested_positive
+9,122,56,0,0,33.3,1.114,33,tested_positive
+3,170,64,37,225,34.5,0.356,30,tested_positive
+8,84,74,31,0,38.3,0.457,39,tested_negative
+2,96,68,13,49,21.1,0.647,26,tested_negative
+2,125,60,20,140,33.8,0.088,31,tested_negative
+0,100,70,26,50,30.8,0.597,21,tested_negative
+0,93,60,25,92,28.7,0.532,22,tested_negative
+0,129,80,0,0,31.2,0.703,29,tested_negative
+5,105,72,29,325,36.9,0.159,28,tested_negative
+3,128,78,0,0,21.1,0.268,55,tested_negative
+5,106,82,30,0,39.5,0.286,38,tested_negative
+2,108,52,26,63,32.5,0.318,22,tested_negative
+10,108,66,0,0,32.4,0.272,42,tested_positive
+4,154,62,31,284,32.8,0.237,23,tested_negative
+0,102,75,23,0,0,0.572,21,tested_negative
+9,57,80,37,0,32.8,0.096,41,tested_negative
+2,106,64,35,119,30.5,1.4,34,tested_negative
+5,147,78,0,0,33.7,0.218,65,tested_negative
+2,90,70,17,0,27.3,0.085,22,tested_negative
+1,136,74,50,204,37.4,0.399,24,tested_negative
+4,114,65,0,0,21.9,0.432,37,tested_negative
+9,156,86,28,155,34.3,1.189,42,tested_positive
+1,153,82,42,485,40.6,0.687,23,tested_negative
+8,188,78,0,0,47.9,0.137,43,tested_positive
+7,152,88,44,0,50,0.337,36,tested_positive
+2,99,52,15,94,24.6,0.637,21,tested_negative
+1,109,56,21,135,25.2,0.833,23,tested_negative
+2,88,74,19,53,29,0.229,22,tested_negative
+17,163,72,41,114,40.9,0.817,47,tested_positive
+4,151,90,38,0,29.7,0.294,36,tested_negative
+7,102,74,40,105,37.2,0.204,45,tested_negative
+0,114,80,34,285,44.2,0.167,27,tested_negative
+2,100,64,23,0,29.7,0.368,21,tested_negative
+0,131,88,0,0,31.6,0.743,32,tested_positive
+6,104,74,18,156,29.9,0.722,41,tested_positive
+3,148,66,25,0,32.5,0.256,22,tested_negative
+4,120,68,0,0,29.6,0.709,34,tested_negative
+4,110,66,0,0,31.9,0.471,29,tested_negative
+3,111,90,12,78,28.4,0.495,29,tested_negative
+6,102,82,0,0,30.8,0.18,36,tested_positive
+6,134,70,23,130,35.4,0.542,29,tested_positive
+2,87,0,23,0,28.9,0.773,25,tested_negative
+1,79,60,42,48,43.5,0.678,23,tested_negative
+2,75,64,24,55,29.7,0.37,33,tested_negative
+8,179,72,42,130,32.7,0.719,36,tested_positive
+6,85,78,0,0,31.2,0.382,42,tested_negative
+0,129,110,46,130,67.1,0.319,26,tested_positive
+5,143,78,0,0,45,0.19,47,tested_negative
+5,130,82,0,0,39.1,0.956,37,tested_positive
+6,87,80,0,0,23.2,0.084,32,tested_negative
+0,119,64,18,92,34.9,0.725,23,tested_negative
+1,0,74,20,23,27.7,0.299,21,tested_negative
+5,73,60,0,0,26.8,0.268,27,tested_negative
+4,141,74,0,0,27.6,0.244,40,tested_negative
+7,194,68,28,0,35.9,0.745,41,tested_positive
+8,181,68,36,495,30.1,0.615,60,tested_positive
+1,128,98,41,58,32,1.321,33,tested_positive
+8,109,76,39,114,27.9,0.64,31,tested_positive
+5,139,80,35,160,31.6,0.361,25,tested_positive
+3,111,62,0,0,22.6,0.142,21,tested_negative
+9,123,70,44,94,33.1,0.374,40,tested_negative
+7,159,66,0,0,30.4,0.383,36,tested_positive
+11,135,0,0,0,52.3,0.578,40,tested_positive
+8,85,55,20,0,24.4,0.136,42,tested_negative
+5,158,84,41,210,39.4,0.395,29,tested_positive
+1,105,58,0,0,24.3,0.187,21,tested_negative
+3,107,62,13,48,22.9,0.678,23,tested_positive
+4,109,64,44,99,34.8,0.905,26,tested_positive
+4,148,60,27,318,30.9,0.15,29,tested_positive
+0,113,80,16,0,31,0.874,21,tested_negative
+1,138,82,0,0,40.1,0.236,28,tested_negative
+0,108,68,20,0,27.3,0.787,32,tested_negative
+2,99,70,16,44,20.4,0.235,27,tested_negative
+6,103,72,32,190,37.7,0.324,55,tested_negative
+5,111,72,28,0,23.9,0.407,27,tested_negative
+8,196,76,29,280,37.5,0.605,57,tested_positive
+5,162,104,0,0,37.7,0.151,52,tested_positive
+1,96,64,27,87,33.2,0.289,21,tested_negative
+7,184,84,33,0,35.5,0.355,41,tested_positive
+2,81,60,22,0,27.7,0.29,25,tested_negative
+0,147,85,54,0,42.8,0.375,24,tested_negative
+7,179,95,31,0,34.2,0.164,60,tested_negative
+0,140,65,26,130,42.6,0.431,24,tested_positive
+9,112,82,32,175,34.2,0.26,36,tested_positive
+12,151,70,40,271,41.8,0.742,38,tested_positive
+5,109,62,41,129,35.8,0.514,25,tested_positive
+6,125,68,30,120,30,0.464,32,tested_negative
+5,85,74,22,0,29,1.224,32,tested_positive
+5,112,66,0,0,37.8,0.261,41,tested_positive
+0,177,60,29,478,34.6,1.072,21,tested_positive
+2,158,90,0,0,31.6,0.805,66,tested_positive
+7,119,0,0,0,25.2,0.209,37,tested_negative
+7,142,60,33,190,28.8,0.687,61,tested_negative
+1,100,66,15,56,23.6,0.666,26,tested_negative
+1,87,78,27,32,34.6,0.101,22,tested_negative
+0,101,76,0,0,35.7,0.198,26,tested_negative
+3,162,52,38,0,37.2,0.652,24,tested_positive
+4,197,70,39,744,36.7,2.329,31,tested_negative
+0,117,80,31,53,45.2,0.089,24,tested_negative
+4,142,86,0,0,44,0.645,22,tested_positive
+6,134,80,37,370,46.2,0.238,46,tested_positive
+1,79,80,25,37,25.4,0.583,22,tested_negative
+4,122,68,0,0,35,0.394,29,tested_negative
+3,74,68,28,45,29.7,0.293,23,tested_negative
+4,171,72,0,0,43.6,0.479,26,tested_positive
+7,181,84,21,192,35.9,0.586,51,tested_positive
+0,179,90,27,0,44.1,0.686,23,tested_positive
+9,164,84,21,0,30.8,0.831,32,tested_positive
+0,104,76,0,0,18.4,0.582,27,tested_negative
+1,91,64,24,0,29.2,0.192,21,tested_negative
+4,91,70,32,88,33.1,0.446,22,tested_negative
+3,139,54,0,0,25.6,0.402,22,tested_positive
+6,119,50,22,176,27.1,1.318,33,tested_positive
+2,146,76,35,194,38.2,0.329,29,tested_negative
+9,184,85,15,0,30,1.213,49,tested_positive
+10,122,68,0,0,31.2,0.258,41,tested_negative
+0,165,90,33,680,52.3,0.427,23,tested_negative
+9,124,70,33,402,35.4,0.282,34,tested_negative
+1,111,86,19,0,30.1,0.143,23,tested_negative
+9,106,52,0,0,31.2,0.38,42,tested_negative
+2,129,84,0,0,28,0.284,27,tested_negative
+2,90,80,14,55,24.4,0.249,24,tested_negative
+0,86,68,32,0,35.8,0.238,25,tested_negative
+12,92,62,7,258,27.6,0.926,44,tested_positive
+1,113,64,35,0,33.6,0.543,21,tested_positive
+3,111,56,39,0,30.1,0.557,30,tested_negative
+2,114,68,22,0,28.7,0.092,25,tested_negative
+1,193,50,16,375,25.9,0.655,24,tested_negative
+11,155,76,28,150,33.3,1.353,51,tested_positive
+3,191,68,15,130,30.9,0.299,34,tested_negative
+3,141,0,0,0,30,0.761,27,tested_positive
+4,95,70,32,0,32.1,0.612,24,tested_negative
+3,142,80,15,0,32.4,0.2,63,tested_negative
+4,123,62,0,0,32,0.226,35,tested_positive
+5,96,74,18,67,33.6,0.997,43,tested_negative
+0,138,0,0,0,36.3,0.933,25,tested_positive
+2,128,64,42,0,40,1.101,24,tested_negative
+0,102,52,0,0,25.1,0.078,21,tested_negative
+2,146,0,0,0,27.5,0.24,28,tested_positive
+10,101,86,37,0,45.6,1.136,38,tested_positive
+2,108,62,32,56,25.2,0.128,21,tested_negative
+3,122,78,0,0,23,0.254,40,tested_negative
+1,71,78,50,45,33.2,0.422,21,tested_negative
+13,106,70,0,0,34.2,0.251,52,tested_negative
+2,100,70,52,57,40.5,0.677,25,tested_negative
+7,106,60,24,0,26.5,0.296,29,tested_positive
+0,104,64,23,116,27.8,0.454,23,tested_negative
+5,114,74,0,0,24.9,0.744,57,tested_negative
+2,108,62,10,278,25.3,0.881,22,tested_negative
+0,146,70,0,0,37.9,0.334,28,tested_positive
+10,129,76,28,122,35.9,0.28,39,tested_negative
+7,133,88,15,155,32.4,0.262,37,tested_negative
+7,161,86,0,0,30.4,0.165,47,tested_positive
+2,108,80,0,0,27,0.259,52,tested_positive
+7,136,74,26,135,26,0.647,51,tested_negative
+5,155,84,44,545,38.7,0.619,34,tested_negative
+1,119,86,39,220,45.6,0.808,29,tested_positive
+4,96,56,17,49,20.8,0.34,26,tested_negative
+5,108,72,43,75,36.1,0.263,33,tested_negative
+0,78,88,29,40,36.9,0.434,21,tested_negative
+0,107,62,30,74,36.6,0.757,25,tested_positive
+2,128,78,37,182,43.3,1.224,31,tested_positive
+1,128,48,45,194,40.5,0.613,24,tested_positive
+0,161,50,0,0,21.9,0.254,65,tested_negative
+6,151,62,31,120,35.5,0.692,28,tested_negative
+2,146,70,38,360,28,0.337,29,tested_positive
+0,126,84,29,215,30.7,0.52,24,tested_negative
+14,100,78,25,184,36.6,0.412,46,tested_positive
+8,112,72,0,0,23.6,0.84,58,tested_negative
+0,167,0,0,0,32.3,0.839,30,tested_positive
+2,144,58,33,135,31.6,0.422,25,tested_positive
+5,77,82,41,42,35.8,0.156,35,tested_negative
+5,115,98,0,0,52.9,0.209,28,tested_positive
+3,150,76,0,0,21,0.207,37,tested_negative
+2,120,76,37,105,39.7,0.215,29,tested_negative
+10,161,68,23,132,25.5,0.326,47,tested_positive
+0,137,68,14,148,24.8,0.143,21,tested_negative
+0,128,68,19,180,30.5,1.391,25,tested_positive
+2,124,68,28,205,32.9,0.875,30,tested_positive
+6,80,66,30,0,26.2,0.313,41,tested_negative
+0,106,70,37,148,39.4,0.605,22,tested_negative
+2,155,74,17,96,26.6,0.433,27,tested_positive
+3,113,50,10,85,29.5,0.626,25,tested_negative
+7,109,80,31,0,35.9,1.127,43,tested_positive
+2,112,68,22,94,34.1,0.315,26,tested_negative
+3,99,80,11,64,19.3,0.284,30,tested_negative
+3,182,74,0,0,30.5,0.345,29,tested_positive
+3,115,66,39,140,38.1,0.15,28,tested_negative
+6,194,78,0,0,23.5,0.129,59,tested_positive
+4,129,60,12,231,27.5,0.527,31,tested_negative
+3,112,74,30,0,31.6,0.197,25,tested_positive
+0,124,70,20,0,27.4,0.254,36,tested_positive
+13,152,90,33,29,26.8,0.731,43,tested_positive
+2,112,75,32,0,35.7,0.148,21,tested_negative
+1,157,72,21,168,25.6,0.123,24,tested_negative
+1,122,64,32,156,35.1,0.692,30,tested_positive
+10,179,70,0,0,35.1,0.2,37,tested_negative
+2,102,86,36,120,45.5,0.127,23,tested_positive
+6,105,70,32,68,30.8,0.122,37,tested_negative
+8,118,72,19,0,23.1,1.476,46,tested_negative
+2,87,58,16,52,32.7,0.166,25,tested_negative
+1,180,0,0,0,43.3,0.282,41,tested_positive
+12,106,80,0,0,23.6,0.137,44,tested_negative
+1,95,60,18,58,23.9,0.26,22,tested_negative
+0,165,76,43,255,47.9,0.259,26,tested_negative
+0,117,0,0,0,33.8,0.932,44,tested_negative
+5,115,76,0,0,31.2,0.343,44,tested_positive
+9,152,78,34,171,34.2,0.893,33,tested_positive
+7,178,84,0,0,39.9,0.331,41,tested_positive
+1,130,70,13,105,25.9,0.472,22,tested_negative
+1,95,74,21,73,25.9,0.673,36,tested_negative
+1,0,68,35,0,32,0.389,22,tested_negative
+5,122,86,0,0,34.7,0.29,33,tested_negative
+8,95,72,0,0,36.8,0.485,57,tested_negative
+8,126,88,36,108,38.5,0.349,49,tested_negative
+1,139,46,19,83,28.7,0.654,22,tested_negative
+3,116,0,0,0,23.5,0.187,23,tested_negative
+3,99,62,19,74,21.8,0.279,26,tested_negative
+5,0,80,32,0,41,0.346,37,tested_positive
+4,92,80,0,0,42.2,0.237,29,tested_negative
+4,137,84,0,0,31.2,0.252,30,tested_negative
+3,61,82,28,0,34.4,0.243,46,tested_negative
+1,90,62,12,43,27.2,0.58,24,tested_negative
+3,90,78,0,0,42.7,0.559,21,tested_negative
+9,165,88,0,0,30.4,0.302,49,tested_positive
+1,125,50,40,167,33.3,0.962,28,tested_positive
+13,129,0,30,0,39.9,0.569,44,tested_positive
+12,88,74,40,54,35.3,0.378,48,tested_negative
+1,196,76,36,249,36.5,0.875,29,tested_positive
+5,189,64,33,325,31.2,0.583,29,tested_positive
+5,158,70,0,0,29.8,0.207,63,tested_negative
+5,103,108,37,0,39.2,0.305,65,tested_negative
+4,146,78,0,0,38.5,0.52,67,tested_positive
+4,147,74,25,293,34.9,0.385,30,tested_negative
+5,99,54,28,83,34,0.499,30,tested_negative
+6,124,72,0,0,27.6,0.368,29,tested_positive
+0,101,64,17,0,21,0.252,21,tested_negative
+3,81,86,16,66,27.5,0.306,22,tested_negative
+1,133,102,28,140,32.8,0.234,45,tested_positive
+3,173,82,48,465,38.4,2.137,25,tested_positive
+0,118,64,23,89,0,1.731,21,tested_negative
+0,84,64,22,66,35.8,0.545,21,tested_negative
+2,105,58,40,94,34.9,0.225,25,tested_negative
+2,122,52,43,158,36.2,0.816,28,tested_negative
+12,140,82,43,325,39.2,0.528,58,tested_positive
+0,98,82,15,84,25.2,0.299,22,tested_negative
+1,87,60,37,75,37.2,0.509,22,tested_negative
+4,156,75,0,0,48.3,0.238,32,tested_positive
+0,93,100,39,72,43.4,1.021,35,tested_negative
+1,107,72,30,82,30.8,0.821,24,tested_negative
+0,105,68,22,0,20,0.236,22,tested_negative
+1,109,60,8,182,25.4,0.947,21,tested_negative
+1,90,62,18,59,25.1,1.268,25,tested_negative
+1,125,70,24,110,24.3,0.221,25,tested_negative
+1,119,54,13,50,22.3,0.205,24,tested_negative
+5,116,74,29,0,32.3,0.66,35,tested_positive
+8,105,100,36,0,43.3,0.239,45,tested_positive
+5,144,82,26,285,32,0.452,58,tested_positive
+3,100,68,23,81,31.6,0.949,28,tested_negative
+1,100,66,29,196,32,0.444,42,tested_negative
+5,166,76,0,0,45.7,0.34,27,tested_positive
+1,131,64,14,415,23.7,0.389,21,tested_negative
+4,116,72,12,87,22.1,0.463,37,tested_negative
+4,158,78,0,0,32.9,0.803,31,tested_positive
+2,127,58,24,275,27.7,1.6,25,tested_negative
+3,96,56,34,115,24.7,0.944,39,tested_negative
+0,131,66,40,0,34.3,0.196,22,tested_positive
+3,82,70,0,0,21.1,0.389,25,tested_negative
+3,193,70,31,0,34.9,0.241,25,tested_positive
+4,95,64,0,0,32,0.161,31,tested_positive
+6,137,61,0,0,24.2,0.151,55,tested_negative
+5,136,84,41,88,35,0.286,35,tested_positive
+9,72,78,25,0,31.6,0.28,38,tested_negative
+5,168,64,0,0,32.9,0.135,41,tested_positive
+2,123,48,32,165,42.1,0.52,26,tested_negative
+4,115,72,0,0,28.9,0.376,46,tested_positive
+0,101,62,0,0,21.9,0.336,25,tested_negative
+8,197,74,0,0,25.9,1.191,39,tested_positive
+1,172,68,49,579,42.4,0.702,28,tested_positive
+6,102,90,39,0,35.7,0.674,28,tested_negative
+1,112,72,30,176,34.4,0.528,25,tested_negative
+1,143,84,23,310,42.4,1.076,22,tested_negative
+1,143,74,22,61,26.2,0.256,21,tested_negative
+0,138,60,35,167,34.6,0.534,21,tested_positive
+3,173,84,33,474,35.7,0.258,22,tested_positive
+1,97,68,21,0,27.2,1.095,22,tested_negative
+4,144,82,32,0,38.5,0.554,37,tested_positive
+1,83,68,0,0,18.2,0.624,27,tested_negative
+3,129,64,29,115,26.4,0.219,28,tested_positive
+1,119,88,41,170,45.3,0.507,26,tested_negative
+2,94,68,18,76,26,0.561,21,tested_negative
+0,102,64,46,78,40.6,0.496,21,tested_negative
+2,115,64,22,0,30.8,0.421,21,tested_negative
+8,151,78,32,210,42.9,0.516,36,tested_positive
+4,184,78,39,277,37,0.264,31,tested_positive
+0,94,0,0,0,0,0.256,25,tested_negative
+1,181,64,30,180,34.1,0.328,38,tested_positive
+0,135,94,46,145,40.6,0.284,26,tested_negative
+1,95,82,25,180,35,0.233,43,tested_positive
+2,99,0,0,0,22.2,0.108,23,tested_negative
+3,89,74,16,85,30.4,0.551,38,tested_negative
+1,80,74,11,60,30,0.527,22,tested_negative
+2,139,75,0,0,25.6,0.167,29,tested_negative
+1,90,68,8,0,24.5,1.138,36,tested_negative
+0,141,0,0,0,42.4,0.205,29,tested_positive
+12,140,85,33,0,37.4,0.244,41,tested_negative
+5,147,75,0,0,29.9,0.434,28,tested_negative
+1,97,70,15,0,18.2,0.147,21,tested_negative
+6,107,88,0,0,36.8,0.727,31,tested_negative
+0,189,104,25,0,34.3,0.435,41,tested_positive
+2,83,66,23,50,32.2,0.497,22,tested_negative
+4,117,64,27,120,33.2,0.23,24,tested_negative
+8,108,70,0,0,30.5,0.955,33,tested_positive
+4,117,62,12,0,29.7,0.38,30,tested_positive
+0,180,78,63,14,59.4,2.42,25,tested_positive
+1,100,72,12,70,25.3,0.658,28,tested_negative
+0,95,80,45,92,36.5,0.33,26,tested_negative
+0,104,64,37,64,33.6,0.51,22,tested_positive
+0,120,74,18,63,30.5,0.285,26,tested_negative
+1,82,64,13,95,21.2,0.415,23,tested_negative
+2,134,70,0,0,28.9,0.542,23,tested_positive
+0,91,68,32,210,39.9,0.381,25,tested_negative
+2,119,0,0,0,19.6,0.832,72,tested_negative
+2,100,54,28,105,37.8,0.498,24,tested_negative
+14,175,62,30,0,33.6,0.212,38,tested_positive
+1,135,54,0,0,26.7,0.687,62,tested_negative
+5,86,68,28,71,30.2,0.364,24,tested_negative
+10,148,84,48,237,37.6,1.001,51,tested_positive
+9,134,74,33,60,25.9,0.46,81,tested_negative
+9,120,72,22,56,20.8,0.733,48,tested_negative
+1,71,62,0,0,21.8,0.416,26,tested_negative
+8,74,70,40,49,35.3,0.705,39,tested_negative
+5,88,78,30,0,27.6,0.258,37,tested_negative
+10,115,98,0,0,24,1.022,34,tested_negative
+0,124,56,13,105,21.8,0.452,21,tested_negative
+0,74,52,10,36,27.8,0.269,22,tested_negative
+0,97,64,36,100,36.8,0.6,25,tested_negative
+8,120,0,0,0,30,0.183,38,tested_positive
+6,154,78,41,140,46.1,0.571,27,tested_negative
+1,144,82,40,0,41.3,0.607,28,tested_negative
+0,137,70,38,0,33.2,0.17,22,tested_negative
+0,119,66,27,0,38.8,0.259,22,tested_negative
+7,136,90,0,0,29.9,0.21,50,tested_negative
+4,114,64,0,0,28.9,0.126,24,tested_negative
+0,137,84,27,0,27.3,0.231,59,tested_negative
+2,105,80,45,191,33.7,0.711,29,tested_positive
+7,114,76,17,110,23.8,0.466,31,tested_negative
+8,126,74,38,75,25.9,0.162,39,tested_negative
+4,132,86,31,0,28,0.419,63,tested_negative
+3,158,70,30,328,35.5,0.344,35,tested_positive
+0,123,88,37,0,35.2,0.197,29,tested_negative
+4,85,58,22,49,27.8,0.306,28,tested_negative
+0,84,82,31,125,38.2,0.233,23,tested_negative
+0,145,0,0,0,44.2,0.63,31,tested_positive
+0,135,68,42,250,42.3,0.365,24,tested_positive
+1,139,62,41,480,40.7,0.536,21,tested_negative
+0,173,78,32,265,46.5,1.159,58,tested_negative
+4,99,72,17,0,25.6,0.294,28,tested_negative
+8,194,80,0,0,26.1,0.551,67,tested_negative
+2,83,65,28,66,36.8,0.629,24,tested_negative
+2,89,90,30,0,33.5,0.292,42,tested_negative
+4,99,68,38,0,32.8,0.145,33,tested_negative
+4,125,70,18,122,28.9,1.144,45,tested_positive
+3,80,0,0,0,0,0.174,22,tested_negative
+6,166,74,0,0,26.6,0.304,66,tested_negative
+5,110,68,0,0,26,0.292,30,tested_negative
+2,81,72,15,76,30.1,0.547,25,tested_negative
+7,195,70,33,145,25.1,0.163,55,tested_positive
+6,154,74,32,193,29.3,0.839,39,tested_negative
+2,117,90,19,71,25.2,0.313,21,tested_negative
+3,84,72,32,0,37.2,0.267,28,tested_negative
+6,0,68,41,0,39,0.727,41,tested_positive
+7,94,64,25,79,33.3,0.738,41,tested_negative
+3,96,78,39,0,37.3,0.238,40,tested_negative
+10,75,82,0,0,33.3,0.263,38,tested_negative
+0,180,90,26,90,36.5,0.314,35,tested_positive
+1,130,60,23,170,28.6,0.692,21,tested_negative
+2,84,50,23,76,30.4,0.968,21,tested_negative
+8,120,78,0,0,25,0.409,64,tested_negative
+12,84,72,31,0,29.7,0.297,46,tested_positive
+0,139,62,17,210,22.1,0.207,21,tested_negative
+9,91,68,0,0,24.2,0.2,58,tested_negative
+2,91,62,0,0,27.3,0.525,22,tested_negative
+3,99,54,19,86,25.6,0.154,24,tested_negative
+3,163,70,18,105,31.6,0.268,28,tested_positive
+9,145,88,34,165,30.3,0.771,53,tested_positive
+7,125,86,0,0,37.6,0.304,51,tested_negative
+13,76,60,0,0,32.8,0.18,41,tested_negative
+6,129,90,7,326,19.6,0.582,60,tested_negative
+2,68,70,32,66,25,0.187,25,tested_negative
+3,124,80,33,130,33.2,0.305,26,tested_negative
+6,114,0,0,0,0,0.189,26,tested_negative
+9,130,70,0,0,34.2,0.652,45,tested_positive
+3,125,58,0,0,31.6,0.151,24,tested_negative
+3,87,60,18,0,21.8,0.444,21,tested_negative
+1,97,64,19,82,18.2,0.299,21,tested_negative
+3,116,74,15,105,26.3,0.107,24,tested_negative
+0,117,66,31,188,30.8,0.493,22,tested_negative
+0,111,65,0,0,24.6,0.66,31,tested_negative
+2,122,60,18,106,29.8,0.717,22,tested_negative
+0,107,76,0,0,45.3,0.686,24,tested_negative
+1,86,66,52,65,41.3,0.917,29,tested_negative
+6,91,0,0,0,29.8,0.501,31,tested_negative
+1,77,56,30,56,33.3,1.251,24,tested_negative
+4,132,0,0,0,32.9,0.302,23,tested_positive
+0,105,90,0,0,29.6,0.197,46,tested_negative
+0,57,60,0,0,21.7,0.735,67,tested_negative
+0,127,80,37,210,36.3,0.804,23,tested_negative
+3,129,92,49,155,36.4,0.968,32,tested_positive
+8,100,74,40,215,39.4,0.661,43,tested_positive
+3,128,72,25,190,32.4,0.549,27,tested_positive
+10,90,85,32,0,34.9,0.825,56,tested_positive
+4,84,90,23,56,39.5,0.159,25,tested_negative
+1,88,78,29,76,32,0.365,29,tested_negative
+8,186,90,35,225,34.5,0.423,37,tested_positive
+5,187,76,27,207,43.6,1.034,53,tested_positive
+4,131,68,21,166,33.1,0.16,28,tested_negative
+1,164,82,43,67,32.8,0.341,50,tested_negative
+4,189,110,31,0,28.5,0.68,37,tested_negative
+1,116,70,28,0,27.4,0.204,21,tested_negative
+3,84,68,30,106,31.9,0.591,25,tested_negative
+6,114,88,0,0,27.8,0.247,66,tested_negative
+1,88,62,24,44,29.9,0.422,23,tested_negative
+1,84,64,23,115,36.9,0.471,28,tested_negative
+7,124,70,33,215,25.5,0.161,37,tested_negative
+1,97,70,40,0,38.1,0.218,30,tested_negative
+8,110,76,0,0,27.8,0.237,58,tested_negative
+11,103,68,40,0,46.2,0.126,42,tested_negative
+11,85,74,0,0,30.1,0.3,35,tested_negative
+6,125,76,0,0,33.8,0.121,54,tested_positive
+0,198,66,32,274,41.3,0.502,28,tested_positive
+1,87,68,34,77,37.6,0.401,24,tested_negative
+6,99,60,19,54,26.9,0.497,32,tested_negative
+0,91,80,0,0,32.4,0.601,27,tested_negative
+2,95,54,14,88,26.1,0.748,22,tested_negative
+1,99,72,30,18,38.6,0.412,21,tested_negative
+6,92,62,32,126,32,0.085,46,tested_negative
+4,154,72,29,126,31.3,0.338,37,tested_negative
+0,121,66,30,165,34.3,0.203,33,tested_positive
+3,78,70,0,0,32.5,0.27,39,tested_negative
+2,130,96,0,0,22.6,0.268,21,tested_negative
+3,111,58,31,44,29.5,0.43,22,tested_negative
+2,98,60,17,120,34.7,0.198,22,tested_negative
+1,143,86,30,330,30.1,0.892,23,tested_negative
+1,119,44,47,63,35.5,0.28,25,tested_negative
+6,108,44,20,130,24,0.813,35,tested_negative
+2,118,80,0,0,42.9,0.693,21,tested_positive
+10,133,68,0,0,27,0.245,36,tested_negative
+2,197,70,99,0,34.7,0.575,62,tested_positive
+0,151,90,46,0,42.1,0.371,21,tested_positive
+6,109,60,27,0,25,0.206,27,tested_negative
+12,121,78,17,0,26.5,0.259,62,tested_negative
+8,100,76,0,0,38.7,0.19,42,tested_negative
+8,124,76,24,600,28.7,0.687,52,tested_positive
+1,93,56,11,0,22.5,0.417,22,tested_negative
+8,143,66,0,0,34.9,0.129,41,tested_positive
+6,103,66,0,0,24.3,0.249,29,tested_negative
+3,176,86,27,156,33.3,1.154,52,tested_positive
+0,73,0,0,0,21.1,0.342,25,tested_negative
+11,111,84,40,0,46.8,0.925,45,tested_positive
+2,112,78,50,140,39.4,0.175,24,tested_negative
+3,132,80,0,0,34.4,0.402,44,tested_positive
+2,82,52,22,115,28.5,1.699,25,tested_negative
+6,123,72,45,230,33.6,0.733,34,tested_negative
+0,188,82,14,185,32,0.682,22,tested_positive
+0,67,76,0,0,45.3,0.194,46,tested_negative
+1,89,24,19,25,27.8,0.559,21,tested_negative
+1,173,74,0,0,36.8,0.088,38,tested_positive
+1,109,38,18,120,23.1,0.407,26,tested_negative
+1,108,88,19,0,27.1,0.4,24,tested_negative
+6,96,0,0,0,23.7,0.19,28,tested_negative
+1,124,74,36,0,27.8,0.1,30,tested_negative
+7,150,78,29,126,35.2,0.692,54,tested_positive
+4,183,0,0,0,28.4,0.212,36,tested_positive
+1,124,60,32,0,35.8,0.514,21,tested_negative
+1,181,78,42,293,40,1.258,22,tested_positive
+1,92,62,25,41,19.5,0.482,25,tested_negative
+0,152,82,39,272,41.5,0.27,27,tested_negative
+1,111,62,13,182,24,0.138,23,tested_negative
+3,106,54,21,158,30.9,0.292,24,tested_negative
+3,174,58,22,194,32.9,0.593,36,tested_positive
+7,168,88,42,321,38.2,0.787,40,tested_positive
+6,105,80,28,0,32.5,0.878,26,tested_negative
+11,138,74,26,144,36.1,0.557,50,tested_positive
+3,106,72,0,0,25.8,0.207,27,tested_negative
+6,117,96,0,0,28.7,0.157,30,tested_negative
+2,68,62,13,15,20.1,0.257,23,tested_negative
+9,112,82,24,0,28.2,1.282,50,tested_positive
+0,119,0,0,0,32.4,0.141,24,tested_positive
+2,112,86,42,160,38.4,0.246,28,tested_negative
+2,92,76,20,0,24.2,1.698,28,tested_negative
+6,183,94,0,0,40.8,1.461,45,tested_negative
+0,94,70,27,115,43.5,0.347,21,tested_negative
+2,108,64,0,0,30.8,0.158,21,tested_negative
+4,90,88,47,54,37.7,0.362,29,tested_negative
+0,125,68,0,0,24.7,0.206,21,tested_negative
+0,132,78,0,0,32.4,0.393,21,tested_negative
+5,128,80,0,0,34.6,0.144,45,tested_negative
+4,94,65,22,0,24.7,0.148,21,tested_negative
+7,114,64,0,0,27.4,0.732,34,tested_positive
+0,102,78,40,90,34.5,0.238,24,tested_negative
+2,111,60,0,0,26.2,0.343,23,tested_negative
+1,128,82,17,183,27.5,0.115,22,tested_negative
+10,92,62,0,0,25.9,0.167,31,tested_negative
+13,104,72,0,0,31.2,0.465,38,tested_positive
+5,104,74,0,0,28.8,0.153,48,tested_negative
+2,94,76,18,66,31.6,0.649,23,tested_negative
+7,97,76,32,91,40.9,0.871,32,tested_positive
+1,100,74,12,46,19.5,0.149,28,tested_negative
+0,102,86,17,105,29.3,0.695,27,tested_negative
+4,128,70,0,0,34.3,0.303,24,tested_negative
+6,147,80,0,0,29.5,0.178,50,tested_positive
+4,90,0,0,0,28,0.61,31,tested_negative
+3,103,72,30,152,27.6,0.73,27,tested_negative
+2,157,74,35,440,39.4,0.134,30,tested_negative
+1,167,74,17,144,23.4,0.447,33,tested_positive
+0,179,50,36,159,37.8,0.455,22,tested_positive
+11,136,84,35,130,28.3,0.26,42,tested_positive
+0,107,60,25,0,26.4,0.133,23,tested_negative
+1,91,54,25,100,25.2,0.234,23,tested_negative
+1,117,60,23,106,33.8,0.466,27,tested_negative
+5,123,74,40,77,34.1,0.269,28,tested_negative
+2,120,54,0,0,26.8,0.455,27,tested_negative
+1,106,70,28,135,34.2,0.142,22,tested_negative
+2,155,52,27,540,38.7,0.24,25,tested_positive
+2,101,58,35,90,21.8,0.155,22,tested_negative
+1,120,80,48,200,38.9,1.162,41,tested_negative
+11,127,106,0,0,39,0.19,51,tested_negative
+3,80,82,31,70,34.2,1.292,27,tested_positive
+10,162,84,0,0,27.7,0.182,54,tested_negative
+1,199,76,43,0,42.9,1.394,22,tested_positive
+8,167,106,46,231,37.6,0.165,43,tested_positive
+9,145,80,46,130,37.9,0.637,40,tested_positive
+6,115,60,39,0,33.7,0.245,40,tested_positive
+1,112,80,45,132,34.8,0.217,24,tested_negative
+4,145,82,18,0,32.5,0.235,70,tested_positive
+10,111,70,27,0,27.5,0.141,40,tested_positive
+6,98,58,33,190,34,0.43,43,tested_negative
+9,154,78,30,100,30.9,0.164,45,tested_negative
+6,165,68,26,168,33.6,0.631,49,tested_negative
+1,99,58,10,0,25.4,0.551,21,tested_negative
+10,68,106,23,49,35.5,0.285,47,tested_negative
+3,123,100,35,240,57.3,0.88,22,tested_negative
+8,91,82,0,0,35.6,0.587,68,tested_negative
+6,195,70,0,0,30.9,0.328,31,tested_positive
+9,156,86,0,0,24.8,0.23,53,tested_positive
+0,93,60,0,0,35.3,0.263,25,tested_negative
+3,121,52,0,0,36,0.127,25,tested_positive
+2,101,58,17,265,24.2,0.614,23,tested_negative
+2,56,56,28,45,24.2,0.332,22,tested_negative
+0,162,76,36,0,49.6,0.364,26,tested_positive
+0,95,64,39,105,44.6,0.366,22,tested_negative
+4,125,80,0,0,32.3,0.536,27,tested_positive
+5,136,82,0,0,0,0.64,69,tested_negative
+2,129,74,26,205,33.2,0.591,25,tested_negative
+3,130,64,0,0,23.1,0.314,22,tested_negative
+1,107,50,19,0,28.3,0.181,29,tested_negative
+1,140,74,26,180,24.1,0.828,23,tested_negative
+1,144,82,46,180,46.1,0.335,46,tested_positive
+8,107,80,0,0,24.6,0.856,34,tested_negative
+13,158,114,0,0,42.3,0.257,44,tested_positive
+2,121,70,32,95,39.1,0.886,23,tested_negative
+7,129,68,49,125,38.5,0.439,43,tested_positive
+2,90,60,0,0,23.5,0.191,25,tested_negative
+7,142,90,24,480,30.4,0.128,43,tested_positive
+3,169,74,19,125,29.9,0.268,31,tested_positive
+0,99,0,0,0,25,0.253,22,tested_negative
+4,127,88,11,155,34.5,0.598,28,tested_negative
+4,118,70,0,0,44.5,0.904,26,tested_negative
+2,122,76,27,200,35.9,0.483,26,tested_negative
+6,125,78,31,0,27.6,0.565,49,tested_positive
+1,168,88,29,0,35,0.905,52,tested_positive
+2,129,0,0,0,38.5,0.304,41,tested_negative
+4,110,76,20,100,28.4,0.118,27,tested_negative
+6,80,80,36,0,39.8,0.177,28,tested_negative
+10,115,0,0,0,0,0.261,30,tested_positive
+2,127,46,21,335,34.4,0.176,22,tested_negative
+9,164,78,0,0,32.8,0.148,45,tested_positive
+2,93,64,32,160,38,0.674,23,tested_positive
+3,158,64,13,387,31.2,0.295,24,tested_negative
+5,126,78,27,22,29.6,0.439,40,tested_negative
+10,129,62,36,0,41.2,0.441,38,tested_positive
+0,134,58,20,291,26.4,0.352,21,tested_negative
+3,102,74,0,0,29.5,0.121,32,tested_negative
+7,187,50,33,392,33.9,0.826,34,tested_positive
+3,173,78,39,185,33.8,0.97,31,tested_positive
+10,94,72,18,0,23.1,0.595,56,tested_negative
+1,108,60,46,178,35.5,0.415,24,tested_negative
+5,97,76,27,0,35.6,0.378,52,tested_positive
+4,83,86,19,0,29.3,0.317,34,tested_negative
+1,114,66,36,200,38.1,0.289,21,tested_negative
+1,149,68,29,127,29.3,0.349,42,tested_positive
+5,117,86,30,105,39.1,0.251,42,tested_negative
+1,111,94,0,0,32.8,0.265,45,tested_negative
+4,112,78,40,0,39.4,0.236,38,tested_negative
+1,116,78,29,180,36.1,0.496,25,tested_negative
+0,141,84,26,0,32.4,0.433,22,tested_negative
+2,175,88,0,0,22.9,0.326,22,tested_negative
+2,92,52,0,0,30.1,0.141,22,tested_negative
+3,130,78,23,79,28.4,0.323,34,tested_positive
+8,120,86,0,0,28.4,0.259,22,tested_positive
+2,174,88,37,120,44.5,0.646,24,tested_positive
+2,106,56,27,165,29,0.426,22,tested_negative
+2,105,75,0,0,23.3,0.56,53,tested_negative
+4,95,60,32,0,35.4,0.284,28,tested_negative
+0,126,86,27,120,27.4,0.515,21,tested_negative
+8,65,72,23,0,32,0.6,42,tested_negative
+2,99,60,17,160,36.6,0.453,21,tested_negative
+1,102,74,0,0,39.5,0.293,42,tested_positive
+11,120,80,37,150,42.3,0.785,48,tested_positive
+3,102,44,20,94,30.8,0.4,26,tested_negative
+1,109,58,18,116,28.5,0.219,22,tested_negative
+9,140,94,0,0,32.7,0.734,45,tested_positive
+13,153,88,37,140,40.6,1.174,39,tested_negative
+12,100,84,33,105,30,0.488,46,tested_negative
+1,147,94,41,0,49.3,0.358,27,tested_positive
+1,81,74,41,57,46.3,1.096,32,tested_negative
+3,187,70,22,200,36.4,0.408,36,tested_positive
+6,162,62,0,0,24.3,0.178,50,tested_positive
+4,136,70,0,0,31.2,1.182,22,tested_positive
+1,121,78,39,74,39,0.261,28,tested_negative
+3,108,62,24,0,26,0.223,25,tested_negative
+0,181,88,44,510,43.3,0.222,26,tested_positive
+8,154,78,32,0,32.4,0.443,45,tested_positive
+1,128,88,39,110,36.5,1.057,37,tested_positive
+7,137,90,41,0,32,0.391,39,tested_negative
+0,123,72,0,0,36.3,0.258,52,tested_positive
+1,106,76,0,0,37.5,0.197,26,tested_negative
+6,190,92,0,0,35.5,0.278,66,tested_positive
+2,88,58,26,16,28.4,0.766,22,tested_negative
+9,170,74,31,0,44,0.403,43,tested_positive
+9,89,62,0,0,22.5,0.142,33,tested_negative
+10,101,76,48,180,32.9,0.171,63,tested_negative
+2,122,70,27,0,36.8,0.34,27,tested_negative
+5,121,72,23,112,26.2,0.245,30,tested_negative
+1,126,60,0,0,30.1,0.349,47,tested_positive
+1,93,70,31,0,30.4,0.315,23,tested_negative
--- a/tests/datasets/glass.arff
+++ b/tests/datasets/glass.arff
@@ -114,7 +114,7 @@
@attribute 'Ca' real
@attribute 'Ba' real
@attribute 'Fe' real
-@attribute 'Type' { 'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
+@attribute 'Type' {'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
@data
 1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'
 1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0,0,'vehic wind float'
--- a/tests/datasets/liver-disorders.arff
+++ b/tests/datasets/liver-disorders.arff
@@ -0,0 +1,399 @@
+% 1. Title: BUPA liver disorders
+% 
+% 2. Source information:
+%    -- Creators: BUPA Medical Research Ltd.
+%    -- Donor: Richard S. Forsyth
+%              8 Grosvenor Avenue
+%              Mapperley Park
+%              Nottingham NG3 5DX
+%              0602-621676
+%    -- Date: 5/15/1990
+% 
+% 3. Past usage: 
+%    -- None known other than what is shown in the PC/BEAGLE User's Guide
+%       (written by Richard S. Forsyth).
+% 
+% 4. Relevant information:
+%    -- The first 5 variables are all blood tests which are thought
+%       to be sensitive to liver disorders that might arise from
+%       excessive alcohol consumption.  Each line in the bupa.data file
+%       constitutes the record of a single male individual.
+%    -- It appears that drinks>5 is some sort of a selector on this database.
+%       See the PC/BEAGLE User's Guide for more information.
+% 
+% 5. Number of instances: 345
+% 
+% 6. Number of attributes: 7 overall
+% 
+% 7. Attribute information:
+%    1. mcv	mean corpuscular volume
+%    2. alkphos	alkaline phosphotase
+%    3. sgpt	alamine aminotransferase
+%    4. sgot 	aspartate aminotransferase
+%    5. gammagt	gamma-glutamyl transpeptidase
+%    6. drinks	number of half-pint equivalents of alcoholic beverages
+%                 drunk per day
+%    7. selector  field used to split data into two sets
+% 
+% 8. Missing values: none%
+% Information about the dataset
+% CLASSTYPE: nominal
+% CLASSINDEX: last
+%
+
+@relation liver-disorders
+
+@attribute mcv INTEGER
+@attribute alkphos INTEGER
+@attribute sgpt INTEGER
+@attribute sgot INTEGER
+@attribute gammagt INTEGER
+@attribute drinks REAL
+@attribute selector {1,2}
+
+@data
+85,92,45,27,31,0.0,1
+85,64,59,32,23,0.0,2
+86,54,33,16,54,0.0,2
+91,78,34,24,36,0.0,2
+87,70,12,28,10,0.0,2
+98,55,13,17,17,0.0,2
+88,62,20,17,9,0.5,1
+88,67,21,11,11,0.5,1
+92,54,22,20,7,0.5,1
+90,60,25,19,5,0.5,1
+89,52,13,24,15,0.5,1
+82,62,17,17,15,0.5,1
+90,64,61,32,13,0.5,1
+86,77,25,19,18,0.5,1
+96,67,29,20,11,0.5,1
+91,78,20,31,18,0.5,1
+89,67,23,16,10,0.5,1
+89,79,17,17,16,0.5,1
+91,107,20,20,56,0.5,1
+94,116,11,33,11,0.5,1
+92,59,35,13,19,0.5,1
+93,23,35,20,20,0.5,1
+90,60,23,27,5,0.5,1
+96,68,18,19,19,0.5,1
+84,80,47,33,97,0.5,1
+92,70,24,13,26,0.5,1
+90,47,28,15,18,0.5,1
+88,66,20,21,10,0.5,1
+91,102,17,13,19,0.5,1
+87,41,31,19,16,0.5,1
+86,79,28,16,17,0.5,1
+91,57,31,23,42,0.5,1
+93,77,32,18,29,0.5,1
+88,96,28,21,40,0.5,1
+94,65,22,18,11,0.5,1
+91,72,155,68,82,0.5,2
+85,54,47,33,22,0.5,2
+79,39,14,19,9,0.5,2
+85,85,25,26,30,0.5,2
+89,63,24,20,38,0.5,2
+84,92,68,37,44,0.5,2
+89,68,26,39,42,0.5,2
+89,101,18,25,13,0.5,2
+86,84,18,14,16,0.5,2
+85,65,25,14,18,0.5,2
+88,61,19,21,13,0.5,2
+92,56,14,16,10,0.5,2
+95,50,29,25,50,0.5,2
+91,75,24,22,11,0.5,2
+83,40,29,25,38,0.5,2
+89,74,19,23,16,0.5,2
+85,64,24,22,11,0.5,2
+92,57,64,36,90,0.5,2
+94,48,11,23,43,0.5,2
+87,52,21,19,30,0.5,2
+85,65,23,29,15,0.5,2
+84,82,21,21,19,0.5,2
+88,49,20,22,19,0.5,2
+96,67,26,26,36,0.5,2
+90,63,24,24,24,0.5,2
+90,45,33,34,27,0.5,2
+90,72,14,15,18,0.5,2
+91,55,4,8,13,0.5,2
+91,52,15,22,11,0.5,2
+87,71,32,19,27,1.0,1
+89,77,26,20,19,1.0,1
+89,67,5,17,14,1.0,2
+85,51,26,24,23,1.0,2
+103,75,19,30,13,1.0,2
+90,63,16,21,14,1.0,2
+90,63,29,23,57,2.0,1
+90,67,35,19,35,2.0,1
+87,66,27,22,9,2.0,1
+90,73,34,21,22,2.0,1
+86,54,20,21,16,2.0,1
+90,80,19,14,42,2.0,1
+87,90,43,28,156,2.0,2
+96,72,28,19,30,2.0,2
+91,55,9,25,16,2.0,2
+95,78,27,25,30,2.0,2
+92,101,34,30,64,2.0,2
+89,51,41,22,48,2.0,2
+91,99,42,33,16,2.0,2
+94,58,21,18,26,2.0,2
+92,60,30,27,297,2.0,2
+94,58,21,18,26,2.0,2
+88,47,33,26,29,2.0,2
+92,65,17,25,9,2.0,2
+92,79,22,20,11,3.0,1
+84,83,20,25,7,3.0,1
+88,68,27,21,26,3.0,1
+86,48,20,20,6,3.0,1
+99,69,45,32,30,3.0,1
+88,66,23,12,15,3.0,1
+89,62,42,30,20,3.0,1
+90,51,23,17,27,3.0,1
+81,61,32,37,53,3.0,2
+89,89,23,18,104,3.0,2
+89,65,26,18,36,3.0,2
+92,75,26,26,24,3.0,2
+85,59,25,20,25,3.0,2
+92,61,18,13,81,3.0,2
+89,63,22,27,10,4.0,1
+90,84,18,23,13,4.0,1
+88,95,25,19,14,4.0,1
+89,35,27,29,17,4.0,1
+91,80,37,23,27,4.0,1
+91,109,33,15,18,4.0,1
+91,65,17,5,7,4.0,1
+88,107,29,20,50,4.0,2
+87,76,22,55,9,4.0,2
+87,86,28,23,21,4.0,2
+87,42,26,23,17,4.0,2
+88,80,24,25,17,4.0,2
+90,96,34,49,169,4.0,2
+86,67,11,15,8,4.0,2
+92,40,19,20,21,4.0,2
+85,60,17,21,14,4.0,2
+89,90,15,17,25,4.0,2
+91,57,15,16,16,4.0,2
+96,55,48,39,42,4.0,2
+79,101,17,27,23,4.0,2
+90,134,14,20,14,4.0,2
+89,76,14,21,24,4.0,2
+88,93,29,27,31,4.0,2
+90,67,10,16,16,4.0,2
+92,73,24,21,48,4.0,2
+91,55,28,28,82,4.0,2
+83,45,19,21,13,4.0,2
+90,74,19,14,22,4.0,2
+92,66,21,16,33,5.0,1
+93,63,26,18,18,5.0,1
+86,78,47,39,107,5.0,2
+97,44,113,45,150,5.0,2
+87,59,15,19,12,5.0,2
+86,44,21,11,15,5.0,2
+87,64,16,20,24,5.0,2
+92,57,21,23,22,5.0,2
+90,70,25,23,112,5.0,2
+99,59,17,19,11,5.0,2
+92,80,10,26,20,6.0,1
+95,60,26,22,28,6.0,1
+91,63,25,26,15,6.0,1
+92,62,37,21,36,6.0,1
+95,50,13,14,15,6.0,1
+90,76,37,19,50,6.0,1
+96,70,70,26,36,6.0,1
+95,62,64,42,76,6.0,1
+92,62,20,23,20,6.0,1
+91,63,25,26,15,6.0,1
+82,56,67,38,92,6.0,2
+92,82,27,24,37,6.0,2
+90,63,12,26,21,6.0,2
+88,37,9,15,16,6.0,2
+100,60,29,23,76,6.0,2
+98,43,35,23,69,6.0,2
+91,74,87,50,67,6.0,2
+92,87,57,25,44,6.0,2
+93,99,36,34,48,6.0,2
+90,72,17,19,19,6.0,2
+97,93,21,20,68,6.0,2
+93,50,18,25,17,6.0,2
+90,57,20,26,33,6.0,2
+92,76,31,28,41,6.0,2
+88,55,19,17,14,6.0,2
+89,63,24,29,29,6.0,2
+92,79,70,32,84,7.0,1
+92,93,58,35,120,7.0,1
+93,84,58,47,62,7.0,2
+97,71,29,22,52,8.0,1
+84,99,33,19,26,8.0,1
+96,44,42,23,73,8.0,1
+90,62,22,21,21,8.0,1
+92,94,18,17,6,8.0,1
+90,67,77,39,114,8.0,1
+97,71,29,22,52,8.0,1
+91,69,25,25,66,8.0,2
+93,59,17,20,14,8.0,2
+92,95,85,48,200,8.0,2
+90,50,26,22,53,8.0,2
+91,62,59,47,60,8.0,2
+92,93,22,28,123,9.0,1
+92,77,86,41,31,10.0,1
+86,66,22,24,26,10.0,2
+98,57,31,34,73,10.0,2
+95,80,50,64,55,10.0,2
+92,108,53,33,94,12.0,2
+97,92,22,28,49,12.0,2
+93,77,39,37,108,16.0,1
+94,83,81,34,201,20.0,1
+87,75,25,21,14,0.0,1
+88,56,23,18,12,0.0,1
+84,97,41,20,32,0.0,2
+94,91,27,20,15,0.5,1
+97,62,17,13,5,0.5,1
+92,85,25,20,12,0.5,1
+82,48,27,15,12,0.5,1
+88,74,31,25,15,0.5,1
+95,77,30,14,21,0.5,1
+88,94,26,18,8,0.5,1
+91,70,19,19,22,0.5,1
+83,54,27,15,12,0.5,1
+91,105,40,26,56,0.5,1
+86,79,37,28,14,0.5,1
+91,96,35,22,135,0.5,1
+89,82,23,14,35,0.5,1
+90,73,24,23,11,0.5,1
+90,87,19,25,19,0.5,1
+89,82,33,32,18,0.5,1
+85,79,17,8,9,0.5,1
+85,119,30,26,17,0.5,1
+78,69,24,18,31,0.5,1
+88,107,34,21,27,0.5,1
+89,115,17,27,7,0.5,1
+92,67,23,15,12,0.5,1
+89,101,27,34,14,0.5,1
+91,84,11,12,10,0.5,1
+94,101,41,20,53,0.5,2
+88,46,29,22,18,0.5,2
+88,122,35,29,42,0.5,2
+84,88,28,25,35,0.5,2
+90,79,18,15,24,0.5,2
+87,69,22,26,11,0.5,2
+65,63,19,20,14,0.5,2
+90,64,12,17,14,0.5,2
+85,58,18,24,16,0.5,2
+88,81,41,27,36,0.5,2
+86,78,52,29,62,0.5,2
+82,74,38,28,48,0.5,2
+86,58,36,27,59,0.5,2
+94,56,30,18,27,0.5,2
+87,57,30,30,22,0.5,2
+98,74,148,75,159,0.5,2
+94,75,20,25,38,0.5,2
+83,68,17,20,71,0.5,2
+93,56,25,21,33,0.5,2
+101,65,18,21,22,0.5,2
+92,65,25,20,31,0.5,2
+92,58,14,16,13,0.5,2
+86,58,16,23,23,0.5,2
+85,62,15,13,22,0.5,2
+86,57,13,20,13,0.5,2
+86,54,26,30,13,0.5,2
+81,41,33,27,34,1.0,1
+91,67,32,26,13,1.0,1
+91,80,21,19,14,1.0,1
+92,60,23,15,19,1.0,1
+91,60,32,14,8,1.0,1
+93,65,28,22,10,1.0,1
+90,63,45,24,85,1.0,2
+87,92,21,22,37,1.0,2
+83,78,31,19,115,1.0,2
+95,62,24,23,14,1.0,2
+93,59,41,30,48,1.0,2
+84,82,43,32,38,2.0,1
+87,71,33,20,22,2.0,1
+86,44,24,15,18,2.0,1
+86,66,28,24,21,2.0,1
+88,58,31,17,17,2.0,1
+90,61,28,29,31,2.0,1
+88,69,70,24,64,2.0,1
+93,87,18,17,26,2.0,1
+98,58,33,21,28,2.0,1
+91,44,18,18,23,2.0,2
+87,75,37,19,70,2.0,2
+94,91,30,26,25,2.0,2
+88,85,14,15,10,2.0,2
+89,109,26,25,27,2.0,2
+87,59,37,27,34,2.0,2
+93,58,20,23,18,2.0,2
+88,57,9,15,16,2.0,2
+94,65,38,27,17,3.0,1
+91,71,12,22,11,3.0,1
+90,55,20,20,16,3.0,1
+91,64,21,17,26,3.0,2
+88,47,35,26,33,3.0,2
+82,72,31,20,84,3.0,2
+85,58,83,49,51,3.0,2
+91,54,25,22,35,4.0,1
+98,50,27,25,53,4.0,2
+86,62,29,21,26,4.0,2
+89,48,32,22,14,4.0,2
+82,68,20,22,9,4.0,2
+83,70,17,19,23,4.0,2
+96,70,21,26,21,4.0,2
+94,117,77,56,52,4.0,2
+93,45,11,14,21,4.0,2
+93,49,27,21,29,4.0,2
+84,73,46,32,39,4.0,2
+91,63,17,17,46,4.0,2
+90,57,31,18,37,4.0,2
+87,45,19,13,16,4.0,2
+91,68,14,20,19,4.0,2
+86,55,29,35,108,4.0,2
+91,86,52,47,52,4.0,2
+88,46,15,33,55,4.0,2
+85,52,22,23,34,4.0,2
+89,72,33,27,55,4.0,2
+95,59,23,18,19,4.0,2
+94,43,154,82,121,4.0,2
+96,56,38,26,23,5.0,2
+90,52,10,17,12,5.0,2
+94,45,20,16,12,5.0,2
+99,42,14,21,49,5.0,2
+93,102,47,23,37,5.0,2
+94,71,25,26,31,5.0,2
+92,73,33,34,115,5.0,2
+87,54,41,29,23,6.0,1
+92,67,15,14,14,6.0,1
+98,101,31,26,32,6.0,1
+92,53,51,33,92,6.0,1
+97,94,43,43,82,6.0,1
+93,43,11,16,54,6.0,1
+93,68,24,18,19,6.0,1
+95,36,38,19,15,6.0,1
+99,86,58,42,203,6.0,1
+98,66,103,57,114,6.0,1
+92,80,10,26,20,6.0,1
+96,74,27,25,43,6.0,2
+95,93,21,27,47,6.0,2
+86,109,16,22,28,6.0,2
+91,46,30,24,39,7.0,2
+102,82,34,78,203,7.0,2
+85,50,12,18,14,7.0,2
+91,57,33,23,12,8.0,1
+91,52,76,32,24,8.0,1
+93,70,46,30,33,8.0,1
+87,55,36,19,25,8.0,1
+98,123,28,24,31,8.0,1
+82,55,18,23,44,8.0,2
+95,73,20,25,225,8.0,2
+97,80,17,20,53,8.0,2
+100,83,25,24,28,8.0,2
+88,91,56,35,126,9.0,2
+91,138,45,21,48,10.0,1
+92,41,37,22,37,10.0,1
+86,123,20,25,23,10.0,2
+91,93,35,34,37,10.0,2
+87,87,15,23,11,10.0,2
+87,56,52,43,55,10.0,2
+99,75,26,24,41,12.0,1
+96,69,53,43,203,12.0,2
+98,77,55,35,89,15.0,1
+91,68,27,26,14,16.0,1
+98,99,57,45,65,20.0,1
--- a/tests/test
+++ b/tests/test
@@ -1,12 +1,18 @@
+#!/bin/bash
+if [ -d build ] ; then
+	rm -fr build
+fi
+if [ -d gcovr-report ] ; then
+	rm -fr gcovr-report
+fi
 cmake -S . -B build -Wno-dev 
-if test $? -ne 0; then
-   echo "Error in creating build commands."
-   exit 1
-fi
 cmake --build build
-if test $? -ne 0; then
-   echo "Error in build command."
-   exit 1
-fi
 cd build
 ctest --output-on-failure
+cd ..
+mkdir gcovr-report
+#lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
+#lcov --remove lcoverage/main_coverage.info 'v1/*' '/Applications/*' '*/tests/*' --output-file lcoverage/main_coverage.info -q
+#lcov --list lcoverage/main_coverage.info
+cd ..
+gcovr  --gcov-filter "CPPFImdlp.cpp" --gcov-filter "Metrics.cpp" --gcov-filter "BinDisc.cpp" --txt --sonarqube=tests/gcovr-report/coverage.xml --exclude-noncode-lines
--- a/tests/testKbins.py
+++ b/tests/testKbins.py
@@ -0,0 +1,404 @@
+from scipy.io.arff import loadarff
+from sklearn.preprocessing import KBinsDiscretizer
+
+
+def test(clf, X, expected, title):
+    X = [[x] for x in X]
+    clf.fit(X)
+    computed = [int(x[0]) for x in clf.transform(X)]
+    print(f"{title}")
+    print(f"{computed=}")
+    print(f"{expected=}")
+    assert computed == expected
+    print("-" * 80)
+
+
+# Test Uniform Strategy
+clf3u = KBinsDiscretizer(
+    n_bins=3, encode="ordinal", strategy="uniform", subsample=200_000
+)
+clf3q = KBinsDiscretizer(
+    n_bins=3, encode="ordinal", strategy="quantile", subsample=200_000
+)
+clf4u = KBinsDiscretizer(
+    n_bins=4, encode="ordinal", strategy="uniform", subsample=200_000
+)
+clf4q = KBinsDiscretizer(
+    n_bins=4, encode="ordinal", strategy="quantile", subsample=200_000
+)
+#
+X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
+labels = [0, 0, 0, 1, 1, 1, 2, 2, 2]
+test(clf3u, X, labels, title="Easy3BinsUniform")
+test(clf3q, X, labels, title="Easy3BinsQuantile")
+#
+X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
+labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 2]
+# En C++ se obtiene el mismo resultado en ambos, no como aquí
+labels2 = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2]
+test(clf3u, X, labels, title="X10BinsUniform")
+test(clf3q, X, labels2, title="X10BinsQuantile")
+#
+X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
+labels = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2]
+# En C++ se obtiene el mismo resultado en ambos, no como aquí
+# labels2 = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2]
+test(clf3u, X, labels, title="X11BinsUniform")
+test(clf3q, X, labels, title="X11BinsQuantile")
+#
+X = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+labels = [0, 0, 0, 0, 0, 0]
+test(clf3u, X, labels, title="ConstantUniform")
+test(clf3q, X, labels, title="ConstantQuantile")
+#
+X = [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
+labels = [2, 0, 0, 2, 0, 0, 2, 0, 0]
+labels2 = [1, 0, 0, 1, 0, 0, 1, 0, 0]  # igual que en C++
+test(clf3u, X, labels, title="EasyRepeatedUniform")
+test(clf3q, X, labels2, title="EasyRepeatedQuantile")
+#
+X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
+labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]
+test(clf4u, X, labels, title="Easy4BinsUniform")
+test(clf4q, X, labels, title="Easy4BinsQuantile")
+#
+X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
+labels = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3]
+test(clf4u, X, labels, title="X13BinsUniform")
+test(clf4q, X, labels, title="X13BinsQuantile")
+#
+X = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
+labels = [0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3]
+test(clf4u, X, labels, title="X14BinsUniform")
+test(clf4q, X, labels, title="X14BinsQuantile")
+#
+X1 = [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
+X2 = [15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
+labels1 = [3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0]
+labels2 = [3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0]
+test(clf4u, X1, labels1, title="X15BinsUniform")
+test(clf4q, X2, labels2, title="X15BinsQuantile")
+#
+X = [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
+labels = [0, 1, 1, 1, 2, 2, 3, 3, 3, 3]
+test(clf4u, X, labels, title="RepeatedValuesUniform")
+test(clf4q, X, labels, title="RepeatedValuesQuantile")
+
+print(f"Uniform   {clf4u.bin_edges_=}")
+print(f"Quaintile {clf4q.bin_edges_=}")
+print("-" * 80)
+#
+data, meta = loadarff("tests/datasets/iris.arff")
+labelsu = [
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    1,
+    0,
+    0,
+    1,
+    1,
+    1,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    3,
+    2,
+    2,
+    1,
+    2,
+    1,
+    2,
+    0,
+    2,
+    1,
+    0,
+    1,
+    1,
+    2,
+    1,
+    2,
+    1,
+    1,
+    2,
+    1,
+    1,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    2,
+    2,
+    1,
+    1,
+    1,
+    2,
+    1,
+    0,
+    1,
+    1,
+    1,
+    2,
+    0,
+    1,
+    2,
+    1,
+    3,
+    2,
+    2,
+    3,
+    0,
+    3,
+    2,
+    3,
+    2,
+    2,
+    2,
+    1,
+    1,
+    2,
+    2,
+    3,
+    3,
+    1,
+    2,
+    1,
+    3,
+    2,
+    2,
+    3,
+    2,
+    2,
+    2,
+    3,
+    3,
+    3,
+    2,
+    2,
+    2,
+    3,
+    2,
+    2,
+    1,
+    2,
+    2,
+    2,
+    1,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2,
+    1,
+]
+labelsq = [
+    1,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    2,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    0,
+    1,
+    0,
+    0,
+    0,
+    1,
+    1,
+    0,
+    0,
+    1,
+    1,
+    1,
+    0,
+    0,
+    1,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    1,
+    0,
+    1,
+    0,
+    3,
+    3,
+    3,
+    1,
+    3,
+    1,
+    2,
+    0,
+    3,
+    1,
+    0,
+    2,
+    2,
+    2,
+    1,
+    3,
+    1,
+    2,
+    2,
+    1,
+    2,
+    2,
+    2,
+    2,
+    3,
+    3,
+    3,
+    3,
+    2,
+    1,
+    1,
+    1,
+    2,
+    2,
+    1,
+    2,
+    3,
+    2,
+    1,
+    1,
+    1,
+    2,
+    2,
+    0,
+    1,
+    1,
+    1,
+    2,
+    1,
+    1,
+    2,
+    2,
+    3,
+    2,
+    3,
+    3,
+    0,
+    3,
+    3,
+    3,
+    3,
+    3,
+    3,
+    1,
+    2,
+    3,
+    3,
+    3,
+    3,
+    2,
+    3,
+    1,
+    3,
+    2,
+    3,
+    3,
+    2,
+    2,
+    3,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2,
+    3,
+    2,
+    3,
+    2,
+    3,
+    3,
+    3,
+    2,
+    3,
+    3,
+    3,
+    2,
+    3,
+    2,
+    2,
+]
+test(clf4u, data["sepallength"], labelsu, title="IrisUniform")
+test(clf4q, data["sepallength"], labelsq, title="IrisQuantile")
+# print("Labels")
+# print(labels)
+# print("Expected")
+# print(expected)
+# for i in range(len(labels)):
+#     if labels[i] != expected[i]:
+#         print(f"Error at {i} {labels[i]} != {expected[i]}")
--- a/typesFImdlp.h
+++ b/typesFImdlp.h
@@ -1,5 +1,6 @@
 #ifndef TYPES_H
 #define TYPES_H
+
 #include <vector>
 #include <map>
 #include <stdexcept>
@@ -7,11 +8,11 @@
 using namespace std;
 namespace mdlp {
    typedef float precision_t;
-    typedef vector<precision_t> samples_t;
-    typedef vector<int> labels_t;
-    typedef vector<size_t> indices_t;
-    typedef vector<precision_t> cutPoints_t;
-    typedef map<pair<int, int>, precision_t> cacheEnt_t;
-    typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
+    typedef std::vector<precision_t> samples_t;
+    typedef std::vector<int> labels_t;
+    typedef std::vector<size_t> indices_t;
+    typedef std::vector<precision_t> cutPoints_t;
+    typedef std::map<std::pair<int, int>, precision_t> cacheEnt_t;
+    typedef std::map<std::tuple<int, int, int>, precision_t> cacheIg_t;
 }
 #endif
Author	SHA1	Message	Date
Ricardo Montañana Gómez	f258fc220f	Merge pull request #7 from rmontanana/BinDisc Implement BinDisc and tests	2024-06-05 11:08:56 +02:00
Ricardo Montañana Gómez	0beeda320d	Update workflow build	2024-06-05 10:56:49 +02:00
Ricardo Montañana Gómez	6b68a41c42	Implement BinDisc and tests	2024-06-05 10:45:11 +02:00
Ricardo Montañana	236d1b2f8b	Update sonarcloud github action	2024-05-02 12:51:40 +02:00
Ricardo Montañana	52ee93178f	Update dockerfile	2024-05-02 10:46:29 +00:00
Ricardo Montañana	eeda4347e9	Add logo to README	2024-05-02 11:56:18 +02:00
Ricardo Montañana	5708dc3de9	Fix initialization mistake in transform	2023-08-01 17:30:37 +02:00
Ricardo Montañana	fbffc3a9c4	Remove sample from library binary file	2023-07-20 18:42:46 +02:00
Ricardo Montañana	ab3786e2a2	Add transform test	2023-07-06 16:40:58 +02:00
Ricardo Montañana	be1917d05b	Fix attribute name extraction in ArffFiles	2023-07-06 16:15:23 +02:00
Ricardo Montañana	5679d607e5	Add transform method to discretize values using CutPoints	2023-07-06 16:06:52 +02:00
Ricardo Montañana	e8559faf1f	Add diabetes dataset to sample	2023-06-12 12:34:04 +02:00
Ricardo Montañana	b21e85f5e8	Add devcontainer and tasks config for docker dev	2023-05-02 14:33:22 +02:00
Ricardo Montañana Gómez	db76afc4e2	Merge pull request #6 from rmontanana/max_cut_points_entropy Max cut points entropy	2023-04-25 10:52:00 +02:00
Ricardo Montañana	a1f26a257c	Reformat code and update version	2023-04-25 10:48:59 +02:00
Ricardo Montañana	22997f5d69	Add debug option in CMakeLists sample	2023-04-25 10:20:12 +02:00
Ricardo Montañana	ef16488ffa	Remove vscode config files	2023-04-14 11:55:26 +02:00
Ricardo Montañana	449bf3a67e	refactor sample	2023-04-14 11:53:25 +02:00
Ricardo Montañana	e689d1f69c	refactor computing max_cuts	2023-04-14 11:53:16 +02:00
Ricardo Montañana	d77d27459b	refactor system types in library Add new test taken from join_fit in FImdlp python Update instructions in README	2023-04-11 19:24:31 +02:00
Ricardo Montañana	49c08bfe12	Change some types in sample	2023-04-02 18:52:25 +02:00
Ricardo Montañana	62e9276fbf	Refactor sonar properties	2023-04-02 14:24:24 +02:00
Ricardo Montañana	c52c7d0828	Refactor sonar properties	2023-04-02 11:59:34 +02:00
Ricardo Montañana Gómez	0b35a15d62	Merge pull request #5 from rmontanana/hiperparameters -Fix a big mistake in sortIndices method (removed unneeded loop) -Add three hyperparameters to algorithm: * max_depth: maximum level of recursion when looking for cut point candidates. * min_length: minimum length of the interval of samples to be searched for candidates. * max_cut: Maximum number of cutpoints. This could be achieved in two ways: a natural number meaning the maximum number of outpoints in each feature of the dataset, or this number could be a number int the range (0, 1) meaning a proportion of the number of samples.	2023-04-01 19:05:12 +02:00
Ricardo Montañana	c662a96da8	Refactor github build action	2023-04-01 17:59:46 +02:00
Ricardo Montañana	0ead15be7c	Refactor github build action	2023-04-01 17:53:37 +02:00
Ricardo Montañana	da41a9317d	Refactor github build action	2023-04-01 17:53:00 +02:00
Ricardo Montañana	42e83b3d26	move limits include to CPPFImdlp header	2023-03-22 18:17:11 +01:00
Ricardo Montañana	77135739cf	Reformat some test files	2023-03-21 09:55:40 +01:00
Ricardo Montañana	27ea3bf338	Refactor tests	2023-03-21 00:53:18 +01:00
Ricardo Montañana	12222f7903	Remove trailing space in attribute type of Arff	2023-03-20 20:24:32 +01:00
Ricardo Montañana	cfade7a556	Remove unneeded loop in sortIndices Add some static casts	2023-03-19 19:13:37 +01:00
Ricardo Montañana	f0845c5bd1	Fix mistake in class type of ArffFiles Add some type casting to CPPFImdlp Add additional path to datasets in tests Fix some smells in sample Join CMakeLists	2023-03-18 18:40:10 +01:00
Ricardo Montañana	1f4abade2c	Add launch.json for debugging sample in vscode	2023-03-17 00:14:28 +01:00
Ricardo Montañana	770502c8e5	Update sample	2023-03-14 11:36:38 +01:00
Ricardo Montañana	ed7433672d	Add checked strings in exceptions	2023-03-13 17:45:06 +01:00
Ricardo Montañana	14860ea0b9	Fix smell and add new test	2023-03-13 17:17:31 +01:00
Ricardo Montañana	d9a6f528f6	Fix 2 code smell	2023-03-13 16:56:09 +01:00
Ricardo Montañana	7551b0d669	Refactor constructor	2023-03-13 01:36:29 +01:00
Ricardo Montañana	ffb8df4d1c	Add max_cutpoints Hyperparameter	2023-03-13 01:17:04 +01:00
Ricardo Montañana	ed784736ca	update build	2023-03-12 11:39:35 +01:00
Ricardo Montañana	49e9dd3e12	Update build	2023-03-12 11:30:43 +01:00
Ricardo Montañana	083a56b311	Change seconds for milliseconds in sample change path of coverage report in build	2023-03-12 11:27:02 +01:00
Ricardo Montañana	4492252729	Add headers needed in sample.cpp	2023-03-11 22:45:34 +01:00
Ricardo Montañana	c00b7a613c	Add path argument to command line	2023-02-28 10:52:26 +01:00
Ricardo Montañana	200015000c	Add all datasets to sample	2023-02-28 10:28:23 +01:00
Ricardo Montañana	ce9ddb3be3	Cosmetic refactor in unittest	2023-02-28 00:50:12 +01:00
Ricardo Montañana	90428218c2	Add dataset to test and add hyperparameters to sample	2023-02-28 00:43:37 +01:00
Ricardo Montañana	0b63d9ace0	Update build	2023-02-27 01:18:46 +01:00
Ricardo Montañana	6875127394	Update Test coverage and build	2023-02-27 01:01:24 +01:00
Ricardo Montañana	747f610ce9	Remove unneeded code in CPPFImdlp	2023-02-27 00:53:00 +01:00
Ricardo Montañana	a7d13f602d	set min_length as protected	2023-02-26 12:07:52 +01:00
Ricardo Montañana	552b03afc9	make public min_length for tests	2023-02-26 11:33:10 +01:00
Ricardo Montañana	4a9664c4aa	Fix depth init in fit	2023-02-26 11:26:37 +01:00
Ricardo Montañana	964555de20	Add echo total of cut points in sample	2023-02-25 18:31:57 +01:00
Ricardo Montañana	d6cece1006	Add max_depth and min_length as hyperparams	2023-02-25 18:16:20 +01:00
Ricardo Montañana Gómez	e25ca378f0	Merge pull request #4 from rmontanana/test Add tests to GH action	2023-02-24 11:45:14 +01:00
Ricardo Montañana	71c1dc2928	Build project and tests in action	2023-02-24 11:41:50 +01:00
Ricardo Montañana	ebea31afd1	Build tests in action	2023-02-24 11:39:38 +01:00
Ricardo Montañana	89d675eb1f	Action to execute tests	2023-02-24 11:36:48 +01:00
Ricardo Montañana	e8fcc20a32	Fix mistake in build action	2023-02-24 11:33:26 +01:00
Ricardo Montañana	848ee7ba24	Try tests in build action	2023-02-24 11:32:10 +01:00
Ricardo Montañana Gómez	32a6fd9ba0	Update version number	2023-02-22 12:05:47 +01:00
Ricardo Montañana	cd04f97fd0	Remove exception of valuecutpoint it's not needed	2023-02-22 11:55:31 +01:00
Ricardo Montañana	458a313aee	Add limits header	2023-02-22 11:32:37 +01:00
Ricardo Montañana	e97aea2a4d	try new build	2023-02-22 01:33:49 +01:00
Ricardo Montañana	4707bc0b7f	update sonar project key	2023-02-22 01:16:39 +01:00
Ricardo Montañana	8c868981e8	New build.yml version	2023-02-22 00:46:54 +01:00
Ricardo Montañana	e812e91540	Update gha to sonarcloud	2023-02-22 00:30:14 +01:00
Ricardo Montañana	dddeea4024	fix token name mistake in github action	2023-02-21 20:38:10 +01:00
Ricardo Montañana	5b7d66d922	Update sonar configuration	2023-02-21 20:27:54 +01:00