mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-17 16:35:57 +00:00
Compare commits
116 Commits
v1.0.0.0
...
059fd33b4e
Author | SHA1 | Date | |
---|---|---|---|
059fd33b4e
|
|||
e068bf0a54
|
|||
|
cfb993f5ec | ||
7d62d6af4a
|
|||
ea70535984
|
|||
2d8b949abd
|
|||
ab12622009
|
|||
248a511972
|
|||
d9bd0126f9
|
|||
210af46a88
|
|||
2db60e007d
|
|||
1cf245fa49
|
|||
|
e36d9af8f9 | ||
7b0673fd4b
|
|||
a1346e1943
|
|||
b3fc598c29
|
|||
cc1efa0b4e
|
|||
90965877eb
|
|||
c4e6c041fe
|
|||
7938df7f0f
|
|||
7ee9896734
|
|||
8f7f605670
|
|||
2f55b27691
|
|||
378fbd51ef
|
|||
402d0da878
|
|||
f34bcc2ed7
|
|||
c9ba35fb58
|
|||
e205668906
|
|||
633aa52849
|
|||
61de687476
|
|||
7ff88c8e4b
|
|||
|
638bb2a59e | ||
|
f258fc220f | ||
0beeda320d
|
|||
6b68a41c42
|
|||
236d1b2f8b
|
|||
52ee93178f
|
|||
eeda4347e9
|
|||
5708dc3de9
|
|||
fbffc3a9c4
|
|||
ab3786e2a2
|
|||
be1917d05b
|
|||
5679d607e5
|
|||
e8559faf1f
|
|||
b21e85f5e8
|
|||
|
db76afc4e2 | ||
a1f26a257c
|
|||
22997f5d69
|
|||
ef16488ffa
|
|||
449bf3a67e
|
|||
e689d1f69c
|
|||
d77d27459b
|
|||
49c08bfe12
|
|||
62e9276fbf
|
|||
c52c7d0828
|
|||
|
0b35a15d62 | ||
c662a96da8
|
|||
0ead15be7c
|
|||
da41a9317d
|
|||
42e83b3d26
|
|||
77135739cf
|
|||
27ea3bf338
|
|||
12222f7903
|
|||
cfade7a556
|
|||
f0845c5bd1
|
|||
1f4abade2c
|
|||
770502c8e5
|
|||
ed7433672d
|
|||
14860ea0b9
|
|||
d9a6f528f6
|
|||
7551b0d669
|
|||
ffb8df4d1c
|
|||
ed784736ca
|
|||
49e9dd3e12
|
|||
083a56b311
|
|||
4492252729
|
|||
c00b7a613c
|
|||
200015000c
|
|||
ce9ddb3be3
|
|||
90428218c2
|
|||
0b63d9ace0
|
|||
6875127394
|
|||
747f610ce9
|
|||
a7d13f602d
|
|||
552b03afc9
|
|||
4a9664c4aa
|
|||
964555de20
|
|||
d6cece1006
|
|||
|
e25ca378f0 | ||
71c1dc2928
|
|||
ebea31afd1
|
|||
89d675eb1f
|
|||
e8fcc20a32
|
|||
848ee7ba24
|
|||
|
32a6fd9ba0 | ||
cd04f97fd0
|
|||
458a313aee
|
|||
e97aea2a4d
|
|||
4707bc0b7f
|
|||
8c868981e8
|
|||
e812e91540
|
|||
dddeea4024
|
|||
5b7d66d922
|
|||
|
5d5eb98afc | ||
a44f01460a
|
|||
de25ba78bd
|
|||
79c029832a
|
|||
5bb0e1e6ca
|
|||
dec1295933
|
|||
04c1772019
|
|||
e37702dcb0
|
|||
1c7492d3b6
|
|||
2ab828b400
|
|||
|
1b89f5927c
|
||
7b20bde428
|
|||
13ebb43bf3
|
16
.devcontainer/Dockerfile
Normal file
16
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
||||
FROM mcr.microsoft.com/devcontainers/cpp:0-ubuntu-22.04
|
||||
|
||||
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||
&& apt-get -y install --no-install-recommends \
|
||||
python3 \
|
||||
python3-pip \
|
||||
lcov \
|
||||
cmake \
|
||||
&& apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip3 install --no-cache-dir \
|
||||
cpplint \
|
||||
cmake-format\
|
||||
gcovr
|
||||
# [Optional] Uncomment this section to install additional vcpkg ports.
|
||||
# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install <your-port-name-here>"
|
32
.devcontainer/devcontainer.json
Normal file
32
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,32 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/cpp
|
||||
{
|
||||
"name": "C++",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile"
|
||||
},
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
// "postCreateCommand": "gcc -v",
|
||||
// Configure tool-specific properties.
|
||||
"customizations": {
|
||||
// Configure properties specific to VS Code.
|
||||
"vscode": {
|
||||
"settings": {},
|
||||
"extensions": [
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.cpptools-extension-pack",
|
||||
"ms-vscode.cpptools-themes",
|
||||
"jbenden.c-cpp-flylint",
|
||||
"matepek.vscode-catch2-test-adapter",
|
||||
"ms-vscode.cmake-tools",
|
||||
"GitHub.copilot"
|
||||
]
|
||||
}
|
||||
}
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
}
|
59
.devcontainer/reinstall-cmake.sh
Normal file
59
.devcontainer/reinstall-cmake.sh
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env bash
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information.
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
#
|
||||
set -e
|
||||
|
||||
CMAKE_VERSION=${1:-"none"}
|
||||
|
||||
if [ "${CMAKE_VERSION}" = "none" ]; then
|
||||
echo "No CMake version specified, skipping CMake reinstallation"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Cleanup temporary directory and associated files when exiting the script.
|
||||
cleanup() {
|
||||
EXIT_CODE=$?
|
||||
set +e
|
||||
if [[ -n "${TMP_DIR}" ]]; then
|
||||
echo "Executing cleanup of tmp files"
|
||||
rm -Rf "${TMP_DIR}"
|
||||
fi
|
||||
exit $EXIT_CODE
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
|
||||
echo "Installing CMake..."
|
||||
apt-get -y purge --auto-remove cmake
|
||||
mkdir -p /opt/cmake
|
||||
|
||||
architecture=$(dpkg --print-architecture)
|
||||
case "${architecture}" in
|
||||
arm64)
|
||||
ARCH=aarch64 ;;
|
||||
amd64)
|
||||
ARCH=x86_64 ;;
|
||||
*)
|
||||
echo "Unsupported architecture ${architecture}."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
CMAKE_BINARY_NAME="cmake-${CMAKE_VERSION}-linux-${ARCH}.sh"
|
||||
CMAKE_CHECKSUM_NAME="cmake-${CMAKE_VERSION}-SHA-256.txt"
|
||||
TMP_DIR=$(mktemp -d -t cmake-XXXXXXXXXX)
|
||||
|
||||
echo "${TMP_DIR}"
|
||||
cd "${TMP_DIR}"
|
||||
|
||||
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_BINARY_NAME}" -O
|
||||
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_CHECKSUM_NAME}" -O
|
||||
|
||||
sha256sum -c --ignore-missing "${CMAKE_CHECKSUM_NAME}"
|
||||
sh "${TMP_DIR}/${CMAKE_BINARY_NAME}" --prefix=/opt/cmake --skip-license
|
||||
|
||||
ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
|
||||
ln -s /opt/cmake/bin/ctest /usr/local/bin/ctest
|
44
.github/workflows/build.yml
vendored
Normal file
44
.github/workflows/build.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Build
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- "*"
|
||||
pull_request:
|
||||
types: [ opened, synchronize, reopened ]
|
||||
jobs:
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
||||
submodules: recursive
|
||||
- name: Install sonar-scanner and build-wrapper
|
||||
uses: SonarSource/sonarcloud-github-c-cpp@v2
|
||||
- name: Install lcov & gcovr
|
||||
run: |
|
||||
sudo apt-get -y install lcov
|
||||
sudo apt-get -y install gcovr
|
||||
- name: Install Libtorch
|
||||
run: |
|
||||
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcpu.zip
|
||||
unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip
|
||||
- name: Tests & build-wrapper
|
||||
run: |
|
||||
cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch -DCMAKE_BUILD_TYPE=Debug -DENABLE_TESTING=ON
|
||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Debug
|
||||
cmake --build build -j 4
|
||||
cd build
|
||||
ctest -C Debug --output-on-failure -j 4
|
||||
gcovr -f ../src/CPPFImdlp.cpp -f ../src/Metrics.cpp -f ../src/BinDisc.cpp -f ../src/Discretizer.cpp --txt --sonarqube=coverage.xml
|
||||
- name: Run sonar-scanner
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
run: |
|
||||
sonar-scanner --define sonar.cfamily.compile-commands="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
|
||||
--define sonar.coverageReportPaths=build/coverage.xml
|
5
.gitignore
vendored
5
.gitignore
vendored
@@ -31,7 +31,12 @@
|
||||
*.out
|
||||
*.app
|
||||
**/build
|
||||
build_Debug
|
||||
build_Release
|
||||
build_debug
|
||||
build_release
|
||||
**/lcoverage
|
||||
.idea
|
||||
cmake-*
|
||||
**/CMakeFiles
|
||||
**/gcovr-report
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "tests/lib/Files"]
|
||||
path = tests/lib/Files
|
||||
url = https://github.com/rmontanana/ArffFiles.git
|
42
.vscode/launch.json
vendored
Normal file
42
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "C++ Launch config",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/tests/build/BinDisc_unittest",
|
||||
"cwd": "${workspaceFolder}/tests/build",
|
||||
"args": [],
|
||||
"launchCompleteCommand": "exec-run",
|
||||
"stopAtEntry": false,
|
||||
"linux": {
|
||||
"MIMode": "gdb",
|
||||
"miDebuggerPath": "/usr/bin/gdb",
|
||||
"setupCommands": [
|
||||
{
|
||||
"description": "Enable pretty-printing for gdb",
|
||||
"text": "-enable-pretty-printing",
|
||||
"ignoreFailures": true
|
||||
},
|
||||
{
|
||||
"description": "Auto load symbols when loading an .so file",
|
||||
"text": "set auto-solib-add",
|
||||
"ignoreFailures": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"osx": {
|
||||
"type": "lldb",
|
||||
"MIMode": "lldb"
|
||||
},
|
||||
"windows": {
|
||||
"MIMode": "gdb",
|
||||
"miDebuggerPath": "C:\\MinGw\\bin\\gdb.exe"
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
109
.vscode/settings.json
vendored
Normal file
109
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"sonarlint.connectedMode.project": {
|
||||
"connectionId": "rmontanana",
|
||||
"projectKey": "rmontanana_mdlp"
|
||||
},
|
||||
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
|
||||
"cmake.configureOnOpen": true,
|
||||
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json",
|
||||
"files.associations": {
|
||||
"*.rmd": "markdown",
|
||||
"*.py": "python",
|
||||
"vector": "cpp",
|
||||
"__bit_reference": "cpp",
|
||||
"__bits": "cpp",
|
||||
"__config": "cpp",
|
||||
"__debug": "cpp",
|
||||
"__errc": "cpp",
|
||||
"__hash_table": "cpp",
|
||||
"__locale": "cpp",
|
||||
"__mutex_base": "cpp",
|
||||
"__node_handle": "cpp",
|
||||
"__nullptr": "cpp",
|
||||
"__split_buffer": "cpp",
|
||||
"__string": "cpp",
|
||||
"__threading_support": "cpp",
|
||||
"__tuple": "cpp",
|
||||
"array": "cpp",
|
||||
"atomic": "cpp",
|
||||
"bitset": "cpp",
|
||||
"cctype": "cpp",
|
||||
"chrono": "cpp",
|
||||
"clocale": "cpp",
|
||||
"cmath": "cpp",
|
||||
"compare": "cpp",
|
||||
"complex": "cpp",
|
||||
"concepts": "cpp",
|
||||
"cstdarg": "cpp",
|
||||
"cstddef": "cpp",
|
||||
"cstdint": "cpp",
|
||||
"cstdio": "cpp",
|
||||
"cstdlib": "cpp",
|
||||
"cstring": "cpp",
|
||||
"ctime": "cpp",
|
||||
"cwchar": "cpp",
|
||||
"cwctype": "cpp",
|
||||
"exception": "cpp",
|
||||
"initializer_list": "cpp",
|
||||
"ios": "cpp",
|
||||
"iosfwd": "cpp",
|
||||
"istream": "cpp",
|
||||
"limits": "cpp",
|
||||
"locale": "cpp",
|
||||
"memory": "cpp",
|
||||
"mutex": "cpp",
|
||||
"new": "cpp",
|
||||
"optional": "cpp",
|
||||
"ostream": "cpp",
|
||||
"ratio": "cpp",
|
||||
"sstream": "cpp",
|
||||
"stdexcept": "cpp",
|
||||
"streambuf": "cpp",
|
||||
"string": "cpp",
|
||||
"string_view": "cpp",
|
||||
"system_error": "cpp",
|
||||
"tuple": "cpp",
|
||||
"type_traits": "cpp",
|
||||
"typeinfo": "cpp",
|
||||
"unordered_map": "cpp",
|
||||
"variant": "cpp",
|
||||
"algorithm": "cpp",
|
||||
"iostream": "cpp",
|
||||
"iomanip": "cpp",
|
||||
"numeric": "cpp",
|
||||
"set": "cpp",
|
||||
"__tree": "cpp",
|
||||
"deque": "cpp",
|
||||
"list": "cpp",
|
||||
"map": "cpp",
|
||||
"unordered_set": "cpp",
|
||||
"any": "cpp",
|
||||
"condition_variable": "cpp",
|
||||
"forward_list": "cpp",
|
||||
"fstream": "cpp",
|
||||
"stack": "cpp",
|
||||
"thread": "cpp",
|
||||
"__memory": "cpp",
|
||||
"filesystem": "cpp",
|
||||
"*.toml": "toml",
|
||||
"utility": "cpp",
|
||||
"span": "cpp",
|
||||
"*.tcc": "cpp",
|
||||
"bit": "cpp",
|
||||
"charconv": "cpp",
|
||||
"cinttypes": "cpp",
|
||||
"codecvt": "cpp",
|
||||
"functional": "cpp",
|
||||
"iterator": "cpp",
|
||||
"memory_resource": "cpp",
|
||||
"random": "cpp",
|
||||
"source_location": "cpp",
|
||||
"format": "cpp",
|
||||
"numbers": "cpp",
|
||||
"semaphore": "cpp",
|
||||
"stop_token": "cpp",
|
||||
"text_encoding": "cpp",
|
||||
"typeindex": "cpp",
|
||||
"valarray": "cpp"
|
||||
}
|
||||
}
|
26
.vscode/tasks.json
vendored
Normal file
26
.vscode/tasks.json
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"type": "cmake",
|
||||
"label": "CMake: build",
|
||||
"command": "build",
|
||||
"targets": [
|
||||
"all"
|
||||
],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"problemMatcher": [],
|
||||
"detail": "CMake template build task"
|
||||
},
|
||||
{
|
||||
"type": "cmake",
|
||||
"label": "CMake: configure",
|
||||
"command": "configure",
|
||||
"problemMatcher": [],
|
||||
"detail": "CMake template configure task"
|
||||
}
|
||||
]
|
||||
}
|
77
CLAUDE.md
Normal file
77
CLAUDE.md
Normal file
@@ -0,0 +1,77 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
This is a C++ implementation of the MDLP (Minimum Description Length Principle) discretization algorithm based on Fayyad & Irani's paper. The library provides discretization methods for continuous-valued attributes in classification learning.
|
||||
|
||||
## Build System
|
||||
|
||||
The project uses CMake with a Makefile wrapper for common tasks:
|
||||
|
||||
### Common Commands
|
||||
- `make build` - Build release version with sample program
|
||||
- `make test` - Run full test suite with coverage report
|
||||
- `make install` - Install the library
|
||||
|
||||
### Build Configurations
|
||||
- **Release**: Built in `build_release/` directory
|
||||
- **Debug**: Built in `build_debug/` directory (for testing)
|
||||
|
||||
### Dependencies
|
||||
- PyTorch (libtorch) - Required dependency
|
||||
- GoogleTest - Fetched automatically for testing
|
||||
- Coverage tools: lcov, genhtml
|
||||
|
||||
## Code Architecture
|
||||
|
||||
### Core Components
|
||||
|
||||
1. **Discretizer** (`src/Discretizer.h/cpp`) - Abstract base class for all discretizers
|
||||
2. **CPPFImdlp** (`src/CPPFImdlp.h/cpp`) - Main MDLP algorithm implementation
|
||||
3. **BinDisc** (`src/BinDisc.h/cpp`) - K-bins discretization (quantile/uniform strategies)
|
||||
4. **Metrics** (`src/Metrics.h/cpp`) - Entropy and information gain calculations
|
||||
|
||||
### Key Data Types
|
||||
- `samples_t` - Input data samples
|
||||
- `labels_t` - Classification labels
|
||||
- `indices_t` - Index arrays for sorting/processing
|
||||
- `precision_t` - Floating-point precision type
|
||||
|
||||
### Algorithm Flow
|
||||
1. Data is sorted using labels as tie-breakers for identical values
|
||||
2. MDLP recursively finds optimal cut points using entropy-based criteria
|
||||
3. Cut points are validated to ensure meaningful splits
|
||||
4. Transform method maps continuous values to discrete bins
|
||||
|
||||
## Testing
|
||||
|
||||
Tests are built with GoogleTest and include:
|
||||
- `Metrics_unittest` - Entropy/information gain tests
|
||||
- `FImdlp_unittest` - Core MDLP algorithm tests
|
||||
- `BinDisc_unittest` - K-bins discretization tests
|
||||
- `Discretizer_unittest` - Base class functionality tests
|
||||
|
||||
### Running Tests
|
||||
```bash
|
||||
make test # Runs all tests and generates coverage report
|
||||
cd build_debug/tests && ctest # Run tests directly
|
||||
```
|
||||
|
||||
Coverage reports are generated at `build_debug/tests/coverage/index.html`.
|
||||
|
||||
## Sample Usage
|
||||
|
||||
The sample program demonstrates basic usage:
|
||||
```bash
|
||||
build_release/sample/sample -f iris -m 2
|
||||
```
|
||||
|
||||
## Development Notes
|
||||
|
||||
- The library uses PyTorch tensors for efficient numerical operations
|
||||
- Code follows C++17 standards
|
||||
- Coverage is maintained at 100%
|
||||
- The implementation handles edge cases like duplicate values and small intervals
|
||||
- Conan package manager support is available via `conanfile.py`
|
@@ -1,7 +1,78 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(mdlp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
project(fimdlp
|
||||
LANGUAGES CXX
|
||||
DESCRIPTION "Discretization algorithm based on the paper by Fayyad & Irani Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/mdlp"
|
||||
VERSION 2.0.1
|
||||
)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
|
||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
# Options
|
||||
# -------
|
||||
option(ENABLE_TESTING OFF)
|
||||
option(ENABLE_SAMPLE OFF)
|
||||
option(COVERAGE OFF)
|
||||
|
||||
add_subdirectory(config)
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-elide-constructors")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
|
||||
endif()
|
||||
|
||||
if (ENABLE_TESTING)
|
||||
message("Debug mode")
|
||||
enable_testing()
|
||||
set(CODE_COVERAGE ON)
|
||||
set(GCC_COVERAGE_LINK_FLAGS "${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
add_subdirectory(tests)
|
||||
else()
|
||||
message("Release mode")
|
||||
endif()
|
||||
|
||||
if (ENABLE_SAMPLE)
|
||||
message("Building sample")
|
||||
add_subdirectory(sample)
|
||||
endif()
|
||||
|
||||
include_directories(
|
||||
${fimdlp_SOURCE_DIR}/src
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
|
||||
add_library(fimdlp src/CPPFImdlp.cpp src/Metrics.cpp src/BinDisc.cpp src/Discretizer.cpp)
|
||||
target_link_libraries(fimdlp torch::torch)
|
||||
|
||||
# Installation
|
||||
# ------------
|
||||
include(CMakePackageConfigHelpers)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/fimdlpConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
)
|
||||
|
||||
install(TARGETS fimdlp
|
||||
EXPORT fimdlpTargets
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib
|
||||
CONFIGURATIONS Release)
|
||||
|
||||
install(DIRECTORY src/ DESTINATION include/fimdlp FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h")
|
||||
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/config.h DESTINATION include/fimdlp CONFIGURATIONS Release)
|
||||
|
||||
install(EXPORT fimdlpTargets
|
||||
FILE fimdlpTargets.cmake
|
||||
NAMESPACE fimdlp::
|
||||
DESTINATION lib/cmake/fimdlp)
|
||||
|
||||
configure_file(fimdlpConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/fimdlpConfig.cmake" @ONLY)
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/fimdlpConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/fimdlpConfigVersion.cmake"
|
||||
DESTINATION lib/cmake/fimdlp)
|
||||
|
||||
|
9
CMakeUserPresets.json
Normal file
9
CMakeUserPresets.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"version": 4,
|
||||
"vendor": {
|
||||
"conan": {}
|
||||
},
|
||||
"include": [
|
||||
"build/Release/generators/CMakePresets.json"
|
||||
]
|
||||
}
|
163
CPPFImdlp.cpp
163
CPPFImdlp.cpp
@@ -1,163 +0,0 @@
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
namespace mdlp {
|
||||
CPPFImdlp::CPPFImdlp(int algorithm):algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
|
||||
{
|
||||
}
|
||||
CPPFImdlp::~CPPFImdlp()
|
||||
= default;
|
||||
CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
X = X_;
|
||||
y = y_;
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
if (X.size() == 0 || y.size() == 0) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
indices = sortIndices(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
switch (algorithm) {
|
||||
case 0:
|
||||
computeCutPoints(0, X.size());
|
||||
break;
|
||||
case 1:
|
||||
computeCutPointsAlternative(0, X.size());
|
||||
break;
|
||||
default:
|
||||
throw invalid_argument("algorithm must be 0 or 1");
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
precision_t CPPFImdlp::halfWayValueCutPoint(size_t start, size_t idx)
|
||||
{
|
||||
size_t idxPrev = idx - 1;
|
||||
precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
}
|
||||
return (previous + actual) / 2;
|
||||
}
|
||||
tuple<precision_t, size_t> CPPFImdlp::completeValueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t idxPrev = cut - 1;
|
||||
precision_t previous, next, actual;
|
||||
previous = X[indices[idxPrev]];
|
||||
next = actual = X[indices[cut]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
}
|
||||
// get the last equal value of X in the interval
|
||||
while (actual == X[indices[cut++]] && cut < end);
|
||||
if (previous == actual && cut < end)
|
||||
actual = X[indices[cut]];
|
||||
cut--;
|
||||
return make_tuple((previous + actual) / 2, cut);
|
||||
}
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||
{
|
||||
size_t cut;
|
||||
tuple<precision_t, size_t> result;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
result = completeValueCutPoint(start, cut, end);
|
||||
cut = get<1>(result);
|
||||
cutPoints.push_back(get<0>(result));
|
||||
computeCutPoints(start, cut);
|
||||
computeCutPoints(cut, end);
|
||||
}
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||
{
|
||||
size_t cut;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
cutPoints.push_back(halfWayValueCutPoint(start, cut));
|
||||
computeCutPointsAlternative(start, cut);
|
||||
computeCutPointsAlternative(cut, end);
|
||||
}
|
||||
}
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amogst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
|
||||
precision_t entropy_left, entropy_right, minEntropy;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (auto idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k, k1, k2;
|
||||
precision_t ig, delta;
|
||||
precision_t ent, ent1, ent2;
|
||||
auto N = precision_t(end - start);
|
||||
if (N < 2) {
|
||||
return false;
|
||||
}
|
||||
k = metrics.computeNumClasses(start, end);
|
||||
k1 = metrics.computeNumClasses(start, cut);
|
||||
k2 = metrics.computeNumClasses(cut, end);
|
||||
ent = metrics.entropy(start, end);
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
for (size_t i = 0; i < X_.size(); i++)
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2)
|
||||
{
|
||||
if (X_[i1] == X_[i2]) return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
cutPoints_t CPPFImdlp::getCutPoints()
|
||||
{
|
||||
// Remove duplicates and sort
|
||||
cutPoints_t output(cutPoints.size());
|
||||
set<precision_t> s;
|
||||
unsigned size = cutPoints.size();
|
||||
for (unsigned i = 0; i < size; i++)
|
||||
s.insert(cutPoints[i]);
|
||||
output.assign(s.begin(), s.end());
|
||||
sort(output.begin(), output.end());
|
||||
return output;
|
||||
}
|
||||
}
|
33
CPPFImdlp.h
33
CPPFImdlp.h
@@ -1,33 +0,0 @@
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
#include "typesFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
#include <utility>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
protected:
|
||||
int algorithm;
|
||||
indices_t indices;
|
||||
samples_t X;
|
||||
labels_t y;
|
||||
Metrics metrics;
|
||||
cutPoints_t cutPoints;
|
||||
|
||||
static indices_t sortIndices(samples_t&, labels_t&);
|
||||
void computeCutPoints(size_t, size_t);
|
||||
void computeCutPointsAlternative(size_t, size_t);
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
size_t getCandidate(size_t, size_t);
|
||||
precision_t halfWayValueCutPoint(size_t, size_t);
|
||||
tuple<precision_t, size_t> completeValueCutPoint(size_t, size_t, size_t);
|
||||
public:
|
||||
CPPFImdlp(int algorithm = 0);
|
||||
~CPPFImdlp();
|
||||
CPPFImdlp& fit(samples_t&, labels_t&);
|
||||
samples_t getCutPoints();
|
||||
inline string version() { return "1.0.0"; };
|
||||
};
|
||||
}
|
||||
#endif
|
35
Makefile
Normal file
35
Makefile
Normal file
@@ -0,0 +1,35 @@
|
||||
SHELL := /bin/bash
|
||||
.DEFAULT_GOAL := build
|
||||
.PHONY: build test
|
||||
lcov := lcov
|
||||
|
||||
build:
|
||||
@if [ -d build_release ]; then rm -fr build_release; fi
|
||||
@mkdir build_release
|
||||
@cmake -B build_release -S . -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF -DENABLE_SAMPLE=ON
|
||||
@cmake --build build_release -j 8
|
||||
|
||||
install:
|
||||
@cmake --build build_release --target install -j 8
|
||||
|
||||
test:
|
||||
@if [ -d build_debug ]; then rm -fr build_debug; fi
|
||||
@mkdir build_debug
|
||||
@cmake -B build_debug -S . -DCMAKE_BUILD_TYPE=Debug -DENABLE_TESTING=ON -DENABLE_SAMPLE=ON
|
||||
@cmake --build build_debug -j 8
|
||||
@cd build_debug/tests && ctest --output-on-failure -j 8
|
||||
@cd build_debug/tests && $(lcov) --capture --directory ../ --demangle-cpp --ignore-errors source,source --ignore-errors mismatch --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'gtest/*' --output-file coverage.info >/dev/null 2>&1;
|
||||
@genhtml build_debug/tests/coverage.info --demangle-cpp --output-directory build_debug/tests/coverage --title "Discretizer mdlp Coverage Report" -s -k -f --legend
|
||||
@echo "* Coverage report is generated at build_debug/tests/coverage/index.html"
|
||||
@which python || (echo ">>> Please install python"; exit 1)
|
||||
@if [ ! -f build_debug/tests/coverage.info ]; then \
|
||||
echo ">>> No coverage.info file found!"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@echo ">>> Updating coverage badge..."
|
||||
@env python update_coverage.py build_debug/tests
|
20
Metrics.h
20
Metrics.h
@@ -1,20 +0,0 @@
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
#include "typesFImdlp.h"
|
||||
namespace mdlp {
|
||||
class Metrics {
|
||||
protected:
|
||||
labels_t& y;
|
||||
indices_t& indices;
|
||||
int numClasses;
|
||||
cacheEnt_t entropyCache;
|
||||
cacheIg_t igCache;
|
||||
public:
|
||||
Metrics(labels_t&, indices_t&);
|
||||
void setData(labels_t&, indices_t&);
|
||||
int computeNumClasses(size_t, size_t);
|
||||
precision_t entropy(size_t, size_t);
|
||||
precision_t informationGain(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
#endif
|
38
README.md
38
README.md
@@ -1,4 +1,11 @@
|
||||
# mdlp
|
||||
[](https://github.com/rmontanana/mdlp/actions/workflows/build.yml)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
[](html/index.html)
|
||||
[](https://deepwiki.com/rmontanana/mdlp)
|
||||
[](https://doi.org/10.5281/zenodo.14245443)
|
||||
|
||||
# <img src="logo.png" alt="logo" width="50"/> mdlp
|
||||
|
||||
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
||||
|
||||
@@ -7,26 +14,35 @@ The implementation tries to mitigate the problem of different label values with
|
||||
- Sorts the values of the variable using the label values as a tie-breaker
|
||||
- Once found a valid candidate for the split, it checks if the previous value is the same as actual one, and tries to get previous one, or next if the former is not possible.
|
||||
|
||||
The algorithm returns the cut points for the variable.
|
||||
Other features:
|
||||
|
||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||
- Intervals have to have more than two examples to be evaluated (mdlp).
|
||||
|
||||
- The algorithm returns the cut points for the variable.
|
||||
|
||||
- The transform method uses the cut points returning its index in the following way:
|
||||
|
||||
cut[i - 1] <= x < cut[i]
|
||||
|
||||
using the [std::upper_bound](https://en.cppreference.com/w/cpp/algorithm/upper_bound) method
|
||||
|
||||
- K-Bins discretization is also implemented, and "quantile" and "uniform" strategies are available.
|
||||
|
||||
## Sample
|
||||
|
||||
To run the sample, just execute the following commands:
|
||||
|
||||
```bash
|
||||
cd sample
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
./sample iris
|
||||
make build
|
||||
build_release/sample/sample -f iris -m 2
|
||||
build_release/sample/sample -h
|
||||
```
|
||||
|
||||
## Test
|
||||
|
||||
To run the tests, execute the following commands:
|
||||
To run the tests and see coverage (llvm with lcov and genhtml have to be installed), execute the following commands:
|
||||
|
||||
```bash
|
||||
cd tests
|
||||
./test
|
||||
make test
|
||||
```
|
||||
|
525
TECHNICAL_ANALYSIS_REPORT.md
Normal file
525
TECHNICAL_ANALYSIS_REPORT.md
Normal file
@@ -0,0 +1,525 @@
|
||||
# Technical Analysis Report: MDLP Discretization Library
|
||||
|
||||
## Executive Summary
|
||||
|
||||
This document presents a comprehensive technical analysis of the MDLP (Minimum Description Length Principle) discretization library. The analysis covers project structure, code quality, architecture, testing methodology, documentation, and security assessment.
|
||||
|
||||
**Overall Rating: B+ (Good with Notable Issues)**
|
||||
|
||||
The library demonstrates solid software engineering practices with excellent test coverage and clean architectural design, but contains several security vulnerabilities and code quality issues that require attention before production deployment.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Project Overview](#project-overview)
|
||||
2. [Architecture & Design Analysis](#architecture--design-analysis)
|
||||
3. [Code Quality Assessment](#code-quality-assessment)
|
||||
4. [Testing Framework Analysis](#testing-framework-analysis)
|
||||
5. [Security Analysis](#security-analysis)
|
||||
6. [Documentation & Maintainability](#documentation--maintainability)
|
||||
7. [Build System Evaluation](#build-system-evaluation)
|
||||
8. [Strengths & Weaknesses Summary](#strengths--weaknesses-summary)
|
||||
9. [Recommendations](#recommendations)
|
||||
10. [Risk Assessment](#risk-assessment)
|
||||
|
||||
---
|
||||
|
||||
## Project Overview
|
||||
|
||||
### Description
|
||||
The MDLP discretization library is a C++ implementation of Fayyad & Irani's Multi-Interval Discretization algorithm for continuous-valued attributes in classification learning. The library provides both traditional binning strategies and advanced MDLP-based discretization.
|
||||
|
||||
### Key Features
|
||||
- **MDLP Algorithm**: Implementation of information-theoretic discretization
|
||||
- **Multiple Strategies**: Uniform and quantile-based binning options
|
||||
- **PyTorch Integration**: Native support for PyTorch tensors
|
||||
- **High Performance**: Optimized algorithms with caching mechanisms
|
||||
- **Complete Testing**: 100% code coverage with comprehensive test suite
|
||||
|
||||
### Technology Stack
|
||||
- **Language**: C++17
|
||||
- **Build System**: CMake 3.20+
|
||||
- **Dependencies**: PyTorch (libtorch 2.7.0)
|
||||
- **Testing**: Google Test (GTest)
|
||||
- **Coverage**: lcov/genhtml
|
||||
- **Package Manager**: Conan
|
||||
|
||||
---
|
||||
|
||||
## Architecture & Design Analysis
|
||||
|
||||
### Class Hierarchy
|
||||
|
||||
```
|
||||
Discretizer (Abstract Base Class)
|
||||
├── CPPFImdlp (MDLP Implementation)
|
||||
└── BinDisc (Simple Binning)
|
||||
|
||||
Metrics (Standalone Utility Class)
|
||||
```
|
||||
|
||||
### Design Patterns Identified
|
||||
|
||||
#### ✅ **Well-Implemented Patterns**
|
||||
- **Template Method Pattern**: Base class provides `fit_transform()` while derived classes implement `fit()`
|
||||
- **Facade Pattern**: Unified interface for both C++ vectors and PyTorch tensors
|
||||
- **Composition**: `CPPFImdlp` composes `Metrics` for statistical calculations
|
||||
|
||||
#### ⚠️ **Pattern Issues**
|
||||
- **Strategy Pattern**: `BinDisc` uses enum-based strategy instead of proper object-oriented strategy pattern
|
||||
- **Interface Segregation**: `BinDisc.fit()` ignores `y` parameter, violating interface contract
|
||||
|
||||
### SOLID Principles Adherence
|
||||
|
||||
| Principle | Rating | Notes |
|
||||
|-----------|--------|-------|
|
||||
| **Single Responsibility** | ✅ Good | Each class has clear, focused responsibility |
|
||||
| **Open/Closed** | ✅ Good | Easy to extend with new discretization algorithms |
|
||||
| **Liskov Substitution** | ⚠️ Issues | `BinDisc` doesn't properly handle supervised interface |
|
||||
| **Interface Segregation** | ✅ Good | Focused interfaces, not overly broad |
|
||||
| **Dependency Inversion** | ✅ Good | Depends on abstractions, not implementations |
|
||||
|
||||
### Architectural Strengths
|
||||
- **Clean Separation**: Algorithm logic, metrics, and data handling well-separated
|
||||
- **Extensible Design**: Easy to add new discretization methods
|
||||
- **Multi-Interface Support**: Both C++ native and PyTorch integration
|
||||
- **Performance Optimized**: Caching and efficient data structures
|
||||
|
||||
### Architectural Weaknesses
|
||||
- **Interface Inconsistency**: Mixed supervised/unsupervised interface handling
|
||||
- **Complex Single Methods**: `computeCutPoints()` handles too many responsibilities
|
||||
- **Tight Coupling**: Direct access to internal data structures
|
||||
- **Limited Configuration**: Algorithm parameters scattered across classes
|
||||
|
||||
---
|
||||
|
||||
## Code Quality Assessment
|
||||
|
||||
### Code Style & Standards
|
||||
- **Consistent Naming**: Good use of camelCase and snake_case conventions
|
||||
- **Header Organization**: Proper SPDX licensing and copyright headers
|
||||
- **Type Safety**: Centralized type definitions in `typesFImdlp.h`
|
||||
- **Modern C++**: Good use of C++17 features
|
||||
|
||||
### Critical Code Issues
|
||||
|
||||
#### 🔴 **High Priority Issues**
|
||||
|
||||
**Memory Safety - Unsafe Pointer Operations**
|
||||
```cpp
|
||||
// Location: Discretizer.cpp:35-36
|
||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||
```
|
||||
- **Issue**: Direct pointer arithmetic without bounds checking
|
||||
- **Risk**: Buffer overflow if tensor data is malformed
|
||||
- **Fix**: Add tensor validation before pointer operations
|
||||
|
||||
#### 🟡 **Medium Priority Issues**
|
||||
|
||||
**Integer Underflow Risk**
|
||||
```cpp
|
||||
// Location: CPPFImdlp.cpp:98-100
|
||||
n = cut - 1 - idxPrev; // Could underflow if cut <= idxPrev
|
||||
m = idxNext - cut - 1; // Could underflow if idxNext <= cut
|
||||
```
|
||||
- **Issue**: Size arithmetic without underflow protection
|
||||
- **Risk**: Extremely large values from underflow
|
||||
- **Fix**: Add underflow validation
|
||||
|
||||
**Vector Access Without Bounds Checking**
|
||||
```cpp
|
||||
// Location: Multiple locations
|
||||
X[indices[idx]] // No bounds validation
|
||||
```
|
||||
- **Issue**: Direct vector access using potentially invalid indices
|
||||
- **Risk**: Out-of-bounds memory access
|
||||
- **Fix**: Use `at()` method or add explicit bounds checking
|
||||
|
||||
### Performance Considerations
|
||||
- **Caching Strategy**: Good use of entropy and information gain caching
|
||||
- **Memory Efficiency**: Smart use of indices to avoid data copying
|
||||
- **Algorithmic Complexity**: Efficient O(n log n) sorting with optimized cutpoint selection
|
||||
|
||||
---
|
||||
|
||||
## Testing Framework Analysis
|
||||
|
||||
### Test Organization
|
||||
|
||||
| Test File | Focus Area | Key Features |
|
||||
|-----------|------------|-------------|
|
||||
| `BinDisc_unittest.cpp` | Binning strategies | Parametric testing, multiple bin counts |
|
||||
| `Discretizer_unittest.cpp` | Base interface | PyTorch integration, transform methods |
|
||||
| `FImdlp_unittest.cpp` | MDLP algorithm | Real datasets, comprehensive scenarios |
|
||||
| `Metrics_unittest.cpp` | Statistical calculations | Entropy, information gain validation |
|
||||
|
||||
### Testing Strengths
|
||||
- **100% Code Coverage**: Complete line and branch coverage
|
||||
- **Real Dataset Testing**: Uses Iris, Diabetes, Glass datasets from ARFF files
|
||||
- **Edge Case Coverage**: Empty datasets, constant values, single elements
|
||||
- **Parametric Testing**: Multiple configurations and strategies
|
||||
- **Data-Driven Approach**: Systematic test generation with `tests.txt`
|
||||
- **Multiple APIs**: Tests both C++ vectors and PyTorch tensors
|
||||
|
||||
### Testing Methodology
|
||||
- **Framework**: Google Test with proper fixture usage
|
||||
- **Precision Testing**: Consistent floating-point comparison margins
|
||||
- **Exception Testing**: Proper error condition validation
|
||||
- **Integration Testing**: End-to-end algorithm validation
|
||||
|
||||
### Testing Gaps
|
||||
- **Performance Testing**: No benchmarks or performance regression tests
|
||||
- **Memory Testing**: Limited memory pressure or leak testing
|
||||
- **Thread Safety**: No concurrent access testing
|
||||
- **Fuzzing**: No randomized input testing
|
||||
|
||||
---
|
||||
|
||||
## Security Analysis
|
||||
|
||||
### Overall Security Risk: **MEDIUM**
|
||||
|
||||
### Critical Security Vulnerabilities
|
||||
|
||||
#### 🔴 **HIGH RISK - Memory Safety**
|
||||
|
||||
**Unsafe PyTorch Tensor Operations**
|
||||
- **Location**: `Discretizer.cpp:35-36, 42, 49-50`
|
||||
- **Vulnerability**: Direct pointer arithmetic without validation
|
||||
- **Impact**: Buffer overflow, memory corruption
|
||||
- **Exploit Scenario**: Malformed tensor data causing out-of-bounds access
|
||||
- **Mitigation**:
|
||||
```cpp
|
||||
if (!X_.is_contiguous() || !y_.is_contiguous()) {
|
||||
throw std::invalid_argument("Tensors must be contiguous");
|
||||
}
|
||||
if (X_.dtype() != torch::kFloat32 || y_.dtype() != torch::kInt32) {
|
||||
throw std::invalid_argument("Invalid tensor types");
|
||||
}
|
||||
```
|
||||
|
||||
#### 🟡 **MEDIUM RISK - Input Validation**
|
||||
|
||||
**Insufficient Parameter Validation**
|
||||
- **Location**: Multiple entry points
|
||||
- **Vulnerability**: Missing bounds checking on user inputs
|
||||
- **Impact**: Integer overflow, out-of-bounds access
|
||||
- **Examples**:
|
||||
- `proposed_cuts` parameter without overflow protection
|
||||
- Tensor dimensions not validated
|
||||
- Array indices not bounds-checked
|
||||
|
||||
**Thread Safety Issues**
|
||||
- **Location**: `Metrics` class cache containers
|
||||
- **Vulnerability**: Shared state without synchronization
|
||||
- **Impact**: Race conditions, data corruption
|
||||
- **Mitigation**: Add mutex protection or document thread requirements
|
||||
|
||||
#### 🟢 **LOW RISK - Information Disclosure**
|
||||
|
||||
**Debug Information Leakage**
|
||||
- **Location**: Sample code and test files
|
||||
- **Vulnerability**: Detailed internal data exposure
|
||||
- **Impact**: Minor information disclosure
|
||||
- **Mitigation**: Remove or conditionalize debug output
|
||||
|
||||
### Security Recommendations
|
||||
|
||||
#### Immediate Actions
|
||||
1. **Add Tensor Validation**: Comprehensive validation before pointer operations
|
||||
2. **Implement Bounds Checking**: Explicit validation for all array access
|
||||
3. **Add Overflow Protection**: Safe arithmetic operations
|
||||
|
||||
#### Short-term Actions
|
||||
1. **Enhance Input Validation**: Parameter validation at all public interfaces
|
||||
2. **Add Thread Safety**: Documentation or synchronization mechanisms
|
||||
3. **Update Dependencies**: Ensure PyTorch is current and secure
|
||||
|
||||
---
|
||||
|
||||
## Documentation & Maintainability
|
||||
|
||||
### Current Documentation Status
|
||||
|
||||
#### ✅ **Available Documentation**
|
||||
- **README.md**: Basic usage instructions and build commands
|
||||
- **Code Comments**: SPDX headers and licensing information
|
||||
- **Build Instructions**: CMake configuration and make targets
|
||||
|
||||
#### ❌ **Missing Documentation**
|
||||
- **API Documentation**: No comprehensive API reference
|
||||
- **Algorithm Documentation**: Limited explanation of MDLP implementation
|
||||
- **Usage Examples**: Minimal code examples beyond basic sample
|
||||
- **Configuration Guide**: No detailed parameter explanation
|
||||
- **Architecture Documentation**: No design document or UML diagrams
|
||||
|
||||
### Maintainability Assessment
|
||||
|
||||
#### Strengths
|
||||
- **Clear Code Structure**: Well-organized class hierarchy
|
||||
- **Consistent Style**: Uniform naming and formatting conventions
|
||||
- **Separation of Concerns**: Clear module boundaries
|
||||
- **Version Control**: Proper git repository with meaningful commits
|
||||
|
||||
#### Weaknesses
|
||||
- **Complex Methods**: Some functions handle multiple responsibilities
|
||||
- **Magic Numbers**: Hardcoded values without explanation
|
||||
- **Limited Comments**: Algorithm logic lacks explanatory comments
|
||||
- **Configuration Scattered**: Parameters spread across multiple classes
|
||||
|
||||
### Documentation Recommendations
|
||||
1. **Generate API Documentation**: Use Doxygen for comprehensive API docs
|
||||
2. **Add Algorithm Explanation**: Document MDLP implementation details
|
||||
3. **Create Usage Guide**: Comprehensive examples and tutorials
|
||||
4. **Architecture Document**: High-level design documentation
|
||||
5. **Configuration Reference**: Centralized parameter documentation
|
||||
|
||||
---
|
||||
|
||||
## Build System Evaluation
|
||||
|
||||
### CMake Configuration Analysis
|
||||
|
||||
#### Strengths
|
||||
- **Modern CMake**: Uses version 3.20+ with current best practices
|
||||
- **Multi-Configuration**: Separate debug/release builds
|
||||
- **Dependency Management**: Proper PyTorch integration
|
||||
- **Installation Support**: Complete install targets and package config
|
||||
- **Testing Integration**: CTest integration with coverage
|
||||
|
||||
#### Build Features
|
||||
```cmake
|
||||
# Key configurations
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
find_package(Torch CONFIG REQUIRED)
|
||||
option(ENABLE_TESTING OFF)
|
||||
option(ENABLE_SAMPLE OFF)
|
||||
option(COVERAGE OFF)
|
||||
```
|
||||
|
||||
### Build System Issues
|
||||
|
||||
#### Security Concerns
|
||||
- **Debug Flags**: May affect release builds
|
||||
- **Dependency Versions**: Fixed PyTorch version without security updates
|
||||
|
||||
#### Usability Issues
|
||||
- **Complex Makefile**: Manual build directory management
|
||||
- **Coverage Complexity**: Complex lcov command chain
|
||||
|
||||
### Build Recommendations
|
||||
1. **Simplify Build Process**: Use CMake presets for common configurations
|
||||
2. **Improve Dependency Management**: Flexible version constraints
|
||||
3. **Add Build Validation**: Compiler and platform checks
|
||||
4. **Enhance Documentation**: Detailed build instructions
|
||||
|
||||
---
|
||||
|
||||
## Strengths & Weaknesses Summary
|
||||
|
||||
### 🏆 **Key Strengths**
|
||||
|
||||
#### Technical Excellence
|
||||
- **Algorithmic Correctness**: Faithful implementation of Fayyad & Irani algorithm
|
||||
- **Performance Optimization**: Efficient caching and data structures
|
||||
- **Code Coverage**: 100% test coverage with comprehensive edge cases
|
||||
- **Modern C++**: Good use of C++17 features and best practices
|
||||
|
||||
#### Software Engineering
|
||||
- **Clean Architecture**: Well-structured OOP design with clear separation
|
||||
- **SOLID Principles**: Generally good adherence to design principles
|
||||
- **Multi-Platform**: CMake-based build system for cross-platform support
|
||||
- **Professional Quality**: Proper licensing, version control, CI/CD integration
|
||||
|
||||
#### API Design
|
||||
- **Multiple Interfaces**: Both C++ native and PyTorch tensor support
|
||||
- **Sklearn-like API**: Familiar `fit()`/`transform()`/`fit_transform()` pattern
|
||||
- **Extensible**: Easy to add new discretization algorithms
|
||||
|
||||
### ⚠️ **Critical Weaknesses**
|
||||
|
||||
#### Security Issues
|
||||
- **Memory Safety**: Unsafe pointer operations in PyTorch integration
|
||||
- **Input Validation**: Insufficient bounds checking and parameter validation
|
||||
- **Thread Safety**: Shared state without proper synchronization
|
||||
|
||||
#### Code Quality
|
||||
- **Interface Consistency**: LSP violation in `BinDisc` class
|
||||
- **Method Complexity**: Some functions handle too many responsibilities
|
||||
- **Error Handling**: Inconsistent exception handling patterns
|
||||
|
||||
#### Documentation
|
||||
- **API Documentation**: Minimal inline documentation
|
||||
- **Usage Examples**: Limited practical examples
|
||||
- **Architecture Documentation**: No high-level design documentation
|
||||
|
||||
---
|
||||
|
||||
## Recommendations
|
||||
|
||||
### 🚨 **Immediate Actions (HIGH Priority)**
|
||||
|
||||
#### Security Fixes
|
||||
```cpp
|
||||
// 1. Add tensor validation in Discretizer::fit_t()
|
||||
void Discretizer::fit_t(const torch::Tensor& X_, const torch::Tensor& y_) {
|
||||
// Validate tensor properties
|
||||
if (!X_.is_contiguous() || !y_.is_contiguous()) {
|
||||
throw std::invalid_argument("Tensors must be contiguous");
|
||||
}
|
||||
if (X_.sizes().size() != 1 || y_.sizes().size() != 1) {
|
||||
throw std::invalid_argument("Only 1D tensors supported");
|
||||
}
|
||||
if (X_.dtype() != torch::kFloat32 || y_.dtype() != torch::kInt32) {
|
||||
throw std::invalid_argument("Invalid tensor types");
|
||||
}
|
||||
// ... rest of implementation
|
||||
}
|
||||
```
|
||||
|
||||
```cpp
|
||||
// 2. Add bounds checking for vector access
|
||||
inline precision_t safe_vector_access(const samples_t& vec, size_t idx) {
|
||||
if (idx >= vec.size()) {
|
||||
throw std::out_of_range("Vector index out of bounds");
|
||||
}
|
||||
return vec[idx];
|
||||
}
|
||||
```
|
||||
|
||||
```cpp
|
||||
// 3. Add underflow protection in arithmetic operations
|
||||
size_t safe_subtract(size_t a, size_t b) {
|
||||
if (b > a) {
|
||||
throw std::underflow_error("Subtraction would cause underflow");
|
||||
}
|
||||
return a - b;
|
||||
}
|
||||
```
|
||||
|
||||
### 📋 **Short-term Actions (MEDIUM Priority)**
|
||||
|
||||
#### Code Quality Improvements
|
||||
1. **Fix Interface Consistency**: Separate supervised/unsupervised interfaces
|
||||
2. **Refactor Complex Methods**: Break down `computeCutPoints()` function
|
||||
3. **Standardize Error Handling**: Consistent exception types and messages
|
||||
4. **Add Input Validation**: Comprehensive parameter checking
|
||||
|
||||
#### Thread Safety
|
||||
```cpp
|
||||
// Add thread safety to Metrics class
|
||||
class Metrics {
|
||||
private:
|
||||
mutable std::mutex cache_mutex;
|
||||
cacheEnt_t entropyCache;
|
||||
cacheIg_t igCache;
|
||||
|
||||
public:
|
||||
precision_t entropy(size_t start, size_t end) const {
|
||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||
// ... implementation
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### 📚 **Long-term Actions (LOW Priority)**
|
||||
|
||||
#### Documentation & Usability
|
||||
1. **API Documentation**: Generate comprehensive Doxygen documentation
|
||||
2. **Usage Examples**: Create detailed tutorial and example repository
|
||||
3. **Performance Testing**: Add benchmarking and regression tests
|
||||
4. **Architecture Documentation**: Create design documents and UML diagrams
|
||||
|
||||
#### Code Modernization
|
||||
1. **Strategy Pattern**: Proper implementation for `BinDisc` strategies
|
||||
2. **Configuration Management**: Centralized parameter handling
|
||||
3. **Factory Pattern**: Discretizer creation factory
|
||||
4. **Resource Management**: RAII patterns for memory safety
|
||||
|
||||
---
|
||||
|
||||
## Risk Assessment
|
||||
|
||||
### Risk Priority Matrix
|
||||
|
||||
| Risk Category | High | Medium | Low | Total |
|
||||
|---------------|------|--------|-----|-------|
|
||||
| **Security** | 1 | 7 | 2 | 10 |
|
||||
| **Code Quality** | 2 | 5 | 3 | 10 |
|
||||
| **Maintainability** | 0 | 3 | 4 | 7 |
|
||||
| **Performance** | 0 | 1 | 2 | 3 |
|
||||
| **Total** | **3** | **16** | **11** | **30** |
|
||||
|
||||
### Risk Impact Assessment
|
||||
|
||||
#### Critical Risks (Immediate Attention Required)
|
||||
1. **Memory Safety Vulnerabilities**: Could lead to crashes or security exploits
|
||||
2. **Interface Consistency Issues**: Violates expected behavior contracts
|
||||
3. **Input Validation Gaps**: Potential for crashes with malformed input
|
||||
|
||||
#### Moderate Risks (Address in Next Release)
|
||||
1. **Thread Safety Issues**: Problems in multi-threaded environments
|
||||
2. **Complex Method Design**: Maintenance and debugging difficulties
|
||||
3. **Documentation Gaps**: Reduced adoption and maintainability
|
||||
|
||||
#### Low Risks (Future Improvements)
|
||||
1. **Performance Optimization**: Minor efficiency improvements
|
||||
2. **Code Style Consistency**: Enhanced readability
|
||||
3. **Build System Enhancements**: Improved developer experience
|
||||
|
||||
---
|
||||
|
||||
## Conclusion
|
||||
|
||||
The MDLP discretization library represents a solid implementation of an important machine learning algorithm with excellent test coverage and clean architectural design. However, it requires attention to security vulnerabilities and code quality issues before production deployment.
|
||||
|
||||
### Final Verdict
|
||||
|
||||
**Rating: B+ (Good with Notable Issues)**
|
||||
|
||||
- **Core Algorithm**: Excellent implementation of MDLP with proper mathematical foundations
|
||||
- **Software Engineering**: Good OOP design following most best practices
|
||||
- **Testing**: Exemplary test coverage and methodology
|
||||
- **Security**: Notable vulnerabilities requiring immediate attention
|
||||
- **Documentation**: Adequate but could be significantly improved
|
||||
|
||||
### Deployment Recommendation
|
||||
|
||||
**Not Ready for Production** without addressing HIGH priority security issues, particularly around memory safety and input validation. Once these are resolved, the library would be suitable for production use in most contexts.
|
||||
|
||||
### Next Steps
|
||||
|
||||
1. **Security Audit**: Address all HIGH and MEDIUM priority security issues
|
||||
2. **Code Review**: Implement fixes for interface consistency and method complexity
|
||||
3. **Documentation**: Create comprehensive API documentation and usage guides
|
||||
4. **Testing**: Add performance benchmarks and stress testing
|
||||
5. **Release**: Prepare version 2.1.0 with security and quality improvements
|
||||
|
||||
---
|
||||
|
||||
## Appendix
|
||||
|
||||
### Files Analyzed
|
||||
- `src/CPPFImdlp.h` & `src/CPPFImdlp.cpp` - MDLP algorithm implementation
|
||||
- `src/Discretizer.h` & `src/Discretizer.cpp` - Base class and PyTorch integration
|
||||
- `src/BinDisc.h` & `src/BinDisc.cpp` - Simple binning strategies
|
||||
- `src/Metrics.h` & `src/Metrics.cpp` - Statistical calculations
|
||||
- `src/typesFImdlp.h` - Type definitions
|
||||
- `CMakeLists.txt` - Build configuration
|
||||
- `conanfile.py` - Dependency management
|
||||
- `tests/*` - Comprehensive test suite
|
||||
|
||||
### Analysis Date
|
||||
**Report Generated**: June 27, 2025
|
||||
|
||||
### Tools Used
|
||||
- **Static Analysis**: Manual code review with security focus
|
||||
- **Architecture Analysis**: SOLID principles and design pattern evaluation
|
||||
- **Test Analysis**: Coverage and methodology assessment
|
||||
- **Security Analysis**: Vulnerability assessment with risk prioritization
|
||||
|
||||
---
|
||||
|
||||
*This report provides a comprehensive technical analysis of the MDLP discretization library. For questions or clarifications, please refer to the project repository or contact the development team.*
|
55
conanfile.py
Normal file
55
conanfile.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import re
|
||||
import os
|
||||
from conan import ConanFile
|
||||
from conan.tools.cmake import CMake, CMakeToolchain, cmake_layout, CMakeDeps
|
||||
from conan.tools.files import save, load
|
||||
|
||||
class FimdlpConan(ConanFile):
|
||||
name = "fimdlp"
|
||||
version = "X.X.X"
|
||||
license = "MIT"
|
||||
author = "Ricardo Montañana <rmontanana@gmail.com>"
|
||||
url = "https://github.com/rmontanana/mdlp"
|
||||
description = "Discretization algorithm based on the paper by Fayyad & Irani."
|
||||
topics = ("discretization", "classification", "machine learning")
|
||||
settings = "os", "compiler", "build_type", "arch"
|
||||
exports_sources = "src/*", "CMakeLists.txt", "README.md", "config/*", "fimdlpConfig.cmake.in"
|
||||
|
||||
def set_version(self):
|
||||
# Read the CMakeLists.txt file to get the version
|
||||
try:
|
||||
content = load(self, "CMakeLists.txt")
|
||||
match = re.search(r"VERSION\s+(\d+\.\d+\.\d+)", content)
|
||||
if match:
|
||||
self.version = match.group(1)
|
||||
except Exception:
|
||||
self.version = "2.0.1" # fallback version
|
||||
|
||||
def requirements(self):
|
||||
self.requires("libtorch/2.7.0")
|
||||
|
||||
def layout(self):
|
||||
cmake_layout(self)
|
||||
|
||||
def generate(self):
|
||||
deps = CMakeDeps(self)
|
||||
deps.generate()
|
||||
tc = CMakeToolchain(self)
|
||||
tc.generate()
|
||||
|
||||
def build(self):
|
||||
cmake = CMake(self)
|
||||
cmake.configure()
|
||||
cmake.build()
|
||||
|
||||
def package(self):
|
||||
cmake = CMake(self)
|
||||
cmake.install()
|
||||
|
||||
def package_info(self):
|
||||
self.cpp_info.libs = ["fimdlp"]
|
||||
self.cpp_info.includedirs = ["include"]
|
||||
self.cpp_info.libdirs = ["lib"]
|
||||
self.cpp_info.set_property("cmake_find_mode", "both")
|
||||
self.cpp_info.set_property("cmake_target_name", "fimdlp::fimdlp")
|
||||
self.cpp_info.set_property("cmake_file_name", "fimdlp")
|
4
config/CMakeLists.txt
Normal file
4
config/CMakeLists.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
configure_file(
|
||||
"config.h.in"
|
||||
"${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES
|
||||
)
|
13
config/config.h.in
Normal file
13
config/config.h.in
Normal file
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR @
|
||||
#define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR @
|
||||
#define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH @
|
||||
|
||||
static constexpr std::string_view project_mdlp_name = "@PROJECT_NAME@";
|
||||
static constexpr std::string_view project_mdlp_version = "@PROJECT_VERSION@";
|
||||
static constexpr std::string_view project_mdlp_description = "@PROJECT_DESCRIPTION@";
|
||||
static constexpr std::string_view git_mdlp_sha = "@GIT_SHA@";
|
2
fimdlpConfig.cmake.in
Normal file
2
fimdlpConfig.cmake.in
Normal file
@@ -0,0 +1,2 @@
|
||||
@PACKAGE_INIT@
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/fimdlpTargets.cmake")
|
47
getversion.py
Normal file
47
getversion.py
Normal file
@@ -0,0 +1,47 @@
|
||||
|
||||
# read the version from the CMakeLists.txt file
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def get_version_from_cmakelists(cmakelists_path):
|
||||
# Read the CMakeLists.txt file
|
||||
try:
|
||||
with open(cmakelists_path, 'r') as file:
|
||||
content = file.read()
|
||||
except IOError as e:
|
||||
print(f"Error reading {cmakelists_path}: {e}")
|
||||
sys.exit(1)
|
||||
# Use regex to find the version line
|
||||
# The regex pattern looks for a line that starts with 'project' and captures the version number
|
||||
# in the format VERSION x.y.z where x, y, and z are digits.
|
||||
# It allows for optional whitespace around the parentheses and the version number.
|
||||
version_pattern = re.compile(
|
||||
r'project\s*\([^\)]*VERSION\s+([0-9]+\.[0-9]+\.[0-9]+)', re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
match = version_pattern.search(content)
|
||||
if match:
|
||||
return match.group(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
def main():
|
||||
# Get the path to the CMakeLists.txt file
|
||||
cmakelists_path = Path(__file__).parent / "CMakeLists.txt"
|
||||
|
||||
# Check if the file exists
|
||||
if not cmakelists_path.exists():
|
||||
print(f"Error: {cmakelists_path} does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Get the version from the CMakeLists.txt file
|
||||
version = get_version_from_cmakelists(cmakelists_path)
|
||||
|
||||
if version:
|
||||
print(f"Version: {version}")
|
||||
else:
|
||||
print("Version not found in CMakeLists.txt.")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,6 +1,12 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(main)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
|
||||
add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
||||
include_directories(
|
||||
${fimdlp_SOURCE_DIR}/src
|
||||
${fimdlp_SOURCE_DIR}/tests/lib/Files
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
|
||||
add_executable(sample sample.cpp )
|
||||
target_link_libraries(sample fimdlp "${TORCH_LIBRARIES}")
|
||||
|
@@ -1,55 +1,226 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include "../CPPFImdlp.h"
|
||||
#include "../tests/ArffFiles.h"
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <getopt.h>
|
||||
#include <torch/torch.h>
|
||||
#include <ArffFiles.hpp>
|
||||
#include "Discretizer.h"
|
||||
#include "CPPFImdlp.h"
|
||||
#include "BinDisc.h"
|
||||
|
||||
const string PATH = "tests/datasets/";
|
||||
|
||||
/* print a description of all supported options */
|
||||
void usage(const char* path)
|
||||
{
|
||||
/* take only the last portion of the path */
|
||||
const char* basename = strrchr(path, '/');
|
||||
basename = basename ? basename + 1 : path;
|
||||
|
||||
std::cout << "usage: " << basename << "[OPTION]" << std::endl;
|
||||
std::cout << " -h, --help\t\t Print this help and exit." << std::endl;
|
||||
std::cout
|
||||
<< " -f, --file[=FILENAME]\t {all, diabetes, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
|
||||
<< std::endl;
|
||||
std::cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << std::endl;
|
||||
std::cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << std::endl;
|
||||
std::cout
|
||||
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any"
|
||||
<< std::endl;
|
||||
std::cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << std::endl;
|
||||
}
|
||||
|
||||
tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
||||
{
|
||||
string file_name;
|
||||
string path = PATH;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
int min_length = 3;
|
||||
float max_cutpoints = 0;
|
||||
const vector<struct option> long_options = {
|
||||
{"help", no_argument, nullptr, 'h'},
|
||||
{"file", required_argument, nullptr, 'f'},
|
||||
{"path", required_argument, nullptr, 'p'},
|
||||
{"max_depth", required_argument, nullptr, 'm'},
|
||||
{"max_cutpoints", required_argument, nullptr, 'c'},
|
||||
{"min_length", required_argument, nullptr, 'n'},
|
||||
{nullptr, no_argument, nullptr, 0}
|
||||
};
|
||||
while (true) {
|
||||
const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options.data(), nullptr);
|
||||
if (c == -1)
|
||||
break;
|
||||
switch (c) {
|
||||
case 'h':
|
||||
usage(argv[0]);
|
||||
exit(0);
|
||||
case 'f':
|
||||
file_name = string(optarg);
|
||||
break;
|
||||
case 'm':
|
||||
max_depth = stoi(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
min_length = stoi(optarg);
|
||||
break;
|
||||
case 'c':
|
||||
max_cutpoints = stof(optarg);
|
||||
break;
|
||||
case 'p':
|
||||
path = optarg;
|
||||
if (path.back() != '/')
|
||||
path += '/';
|
||||
break;
|
||||
case '?':
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
if (file_name.empty()) {
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
return make_tuple(file_name, path, max_depth, min_length, max_cutpoints);
|
||||
}
|
||||
|
||||
void process_file(const string& path, const string& file_name, bool class_last, int max_depth, int min_length,
|
||||
float max_cutpoints)
|
||||
{
|
||||
ArffFiles file;
|
||||
|
||||
file.load(path + file_name + ".arff", class_last);
|
||||
const auto attributes = file.getAttributes();
|
||||
const auto items = file.getSize();
|
||||
std::cout << "Number of lines: " << items << std::endl;
|
||||
std::cout << "Attributes: " << std::endl;
|
||||
for (auto attribute : attributes) {
|
||||
std::cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << std::endl;
|
||||
}
|
||||
std::cout << "Class name: " << file.getClassName() << std::endl;
|
||||
std::cout << "Class type: " << file.getClassType() << std::endl;
|
||||
std::cout << "Data: " << std::endl;
|
||||
std::vector<mdlp::samples_t>& X = file.getX();
|
||||
mdlp::labels_t& y = file.getY();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (auto feature : X) {
|
||||
std::cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
std::cout << y[i] << std::endl;
|
||||
}
|
||||
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||
size_t total = 0;
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
||||
std::cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
|
||||
test.fit(X[i], y);
|
||||
auto cut_points = test.getCutPoints();
|
||||
for (auto item : cut_points) {
|
||||
std::cout << item;
|
||||
if (item != cut_points.back())
|
||||
std::cout << ", ";
|
||||
}
|
||||
total += test.getCutPoints().size();
|
||||
std::cout << "]" << std::endl;
|
||||
std::cout << "Min: " << *min_max.first << " Max: " << *min_max.second << std::endl;
|
||||
std::cout << "--------------------------" << std::endl;
|
||||
}
|
||||
std::cout << "Total cut points ...: " << total << std::endl;
|
||||
std::cout << "Total feature states: " << total + attributes.size() << std::endl;
|
||||
std::cout << "Version ............: " << test.version() << std::endl;
|
||||
std::cout << "Transformed data (vector)..: " << std::endl;
|
||||
test.fit(X[0], y);
|
||||
auto data = test.transform(X[0]);
|
||||
for (int i = 130; i < 135; i++) {
|
||||
std::cout << std::fixed << std::setprecision(1) << X[0][i] << " " << data[i] << std::endl;
|
||||
}
|
||||
auto Xt = torch::tensor(X[0], torch::kFloat32);
|
||||
auto yt = torch::tensor(y, torch::kInt32);
|
||||
//test.fit_t(Xt, yt);
|
||||
auto result = test.fit_transform_t(Xt, yt);
|
||||
std::cout << "Transformed data (torch)...: " << std::endl;
|
||||
for (int i = 130; i < 135; i++) {
|
||||
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<mdlp::precision_t>() << " " << result[i].item<int>() << std::endl;
|
||||
}
|
||||
auto disc = mdlp::BinDisc(3);
|
||||
auto res_v = disc.fit_transform(X[0], y);
|
||||
disc.fit_t(Xt, yt);
|
||||
auto res_t = disc.transform_t(Xt);
|
||||
std::cout << "Transformed data (BinDisc)...: " << std::endl;
|
||||
for (int i = 130; i < 135; i++) {
|
||||
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<mdlp::precision_t>() << " " << res_v[i] << " " << res_t[i].item<int>() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void process_all_files(const map<string, bool>& datasets, const string& path, int max_depth, int min_length,
|
||||
float max_cutpoints)
|
||||
{
|
||||
std::cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: "
|
||||
<< max_cutpoints << std::endl << std::endl;
|
||||
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
||||
printf("==================== ==== ==== ========\n");
|
||||
for (const auto& dataset : datasets) {
|
||||
ArffFiles file;
|
||||
file.load(path + dataset.first + ".arff", dataset.second);
|
||||
auto attributes = file.getAttributes();
|
||||
std::vector<mdlp::samples_t>& X = file.getX();
|
||||
mdlp::labels_t& y = file.getY();
|
||||
size_t timing = 0;
|
||||
size_t cut_points = 0;
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
|
||||
test.fit(X[i], y);
|
||||
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
||||
timing += std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
|
||||
cut_points += test.getCutPoints().size();
|
||||
}
|
||||
printf("%-20s %4lu %4zu %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing);
|
||||
}
|
||||
}
|
||||
|
||||
using namespace std;
|
||||
using namespace mdlp;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
string path = "../../tests/datasets/";
|
||||
map<string, bool > datasets = {
|
||||
{"mfeat-factors", true},
|
||||
{"iris", true},
|
||||
{"letter", true},
|
||||
{"kdd_JapaneseVowels", false}
|
||||
std::map<std::string, bool> datasets = {
|
||||
{"diabetes", true},
|
||||
{"glass", true},
|
||||
{"iris", true},
|
||||
{"kdd_JapaneseVowels", false},
|
||||
{"letter", true},
|
||||
{"liver-disorders", true},
|
||||
{"mfeat-factors", true},
|
||||
{"test", true}
|
||||
};
|
||||
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
|
||||
return 1;
|
||||
std::string file_name;
|
||||
std::string path;
|
||||
int max_depth;
|
||||
int min_length;
|
||||
float max_cutpoints;
|
||||
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
||||
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
||||
std::cout << "Invalid file name: " << file_name << std::endl;
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||
auto attributes = file.getAttributes();
|
||||
int items = file.getSize();
|
||||
cout << "Number of lines: " << items << endl;
|
||||
cout << "Attributes: " << endl;
|
||||
for (auto attribute : attributes) {
|
||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||
}
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
for (int i = 0; i < 50; i++) {
|
||||
for (auto feature : X) {
|
||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0);
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||
cout << "--------------------------" << setprecision(3) << endl;
|
||||
test.fit(X[i], y);
|
||||
for (auto item : test.getCutPoints()) {
|
||||
cout << item << endl;
|
||||
}
|
||||
if (file_name == "all")
|
||||
process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
|
||||
else {
|
||||
process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
|
||||
std::cout << "File name ....: " << file_name << std::endl;
|
||||
std::cout << "Max depth ....: " << max_depth << std::endl;
|
||||
std::cout << "Min length ...: " << min_length << std::endl;
|
||||
std::cout << "Max cutpoints : " << max_cutpoints << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
14
sonar-project.properties
Normal file
14
sonar-project.properties
Normal file
@@ -0,0 +1,14 @@
|
||||
sonar.projectKey=rmontanana_mdlp
|
||||
sonar.organization=rmontanana
|
||||
|
||||
# This is the name and version displayed in the SonarCloud UI.
|
||||
sonar.projectName=mdlp
|
||||
sonar.projectVersion=2.0.1
|
||||
# sonar.test.exclusions=tests/**
|
||||
# sonar.tests=tests/
|
||||
# sonar.coverage.exclusions=tests/**,sample/**
|
||||
# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.
|
||||
#sonar.sources=.
|
||||
|
||||
# Encoding of the source code. Default is default system encoding
|
||||
sonar.sourceEncoding=UTF-8
|
98
src/BinDisc.cpp
Normal file
98
src/BinDisc.cpp
Normal file
@@ -0,0 +1,98 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include "BinDisc.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
BinDisc::BinDisc(int n_bins, strategy_t strategy) :
|
||||
Discretizer(), n_bins{ n_bins }, strategy{ strategy }
|
||||
{
|
||||
if (n_bins < 3) {
|
||||
throw std::invalid_argument("n_bins must be greater than 2");
|
||||
}
|
||||
}
|
||||
BinDisc::~BinDisc() = default;
|
||||
void BinDisc::fit(samples_t& X)
|
||||
{
|
||||
// y is included for compatibility with the Discretizer interface
|
||||
cutPoints.clear();
|
||||
if (X.empty()) {
|
||||
cutPoints.push_back(0.0);
|
||||
cutPoints.push_back(0.0);
|
||||
return;
|
||||
}
|
||||
if (strategy == strategy_t::QUANTILE) {
|
||||
direction = bound_dir_t::RIGHT;
|
||||
fit_quantile(X);
|
||||
} else if (strategy == strategy_t::UNIFORM) {
|
||||
direction = bound_dir_t::RIGHT;
|
||||
fit_uniform(X);
|
||||
}
|
||||
}
|
||||
void BinDisc::fit(samples_t& X, labels_t& y)
|
||||
{
|
||||
fit(X);
|
||||
}
|
||||
std::vector<precision_t> linspace(precision_t start, precision_t end, int num)
|
||||
{
|
||||
if (start == end) {
|
||||
return { start, end };
|
||||
}
|
||||
precision_t delta = (end - start) / static_cast<precision_t>(num - 1);
|
||||
std::vector<precision_t> linspc;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
precision_t val = start + delta * static_cast<precision_t>(i);
|
||||
linspc.push_back(val);
|
||||
}
|
||||
return linspc;
|
||||
}
|
||||
size_t clip(const size_t n, const size_t lower, const size_t upper)
|
||||
{
|
||||
return std::max(lower, std::min(n, upper));
|
||||
}
|
||||
std::vector<precision_t> percentile(samples_t& data, const std::vector<precision_t>& percentiles)
|
||||
{
|
||||
// Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html
|
||||
std::vector<precision_t> results;
|
||||
bool first = true;
|
||||
results.reserve(percentiles.size());
|
||||
for (auto percentile : percentiles) {
|
||||
const auto i = static_cast<size_t>(std::floor(static_cast<precision_t>(data.size() - 1) * percentile / 100.));
|
||||
const auto indexLower = clip(i, 0, data.size() - 2);
|
||||
const precision_t percentI = static_cast<precision_t>(indexLower) / static_cast<precision_t>(data.size() - 1);
|
||||
const precision_t fraction =
|
||||
(percentile / 100.0 - percentI) /
|
||||
(static_cast<precision_t>(indexLower + 1) / static_cast<precision_t>(data.size() - 1) - percentI);
|
||||
if (const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; value != results.back() || first) // first needed as results.back() return is undefined for empty vectors
|
||||
results.push_back(value);
|
||||
first = false;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
void BinDisc::fit_quantile(const samples_t& X)
|
||||
{
|
||||
auto quantiles = linspace(0.0, 100.0, n_bins + 1);
|
||||
auto data = X;
|
||||
std::sort(data.begin(), data.end());
|
||||
if (data.front() == data.back() || data.size() == 1) {
|
||||
// if X is constant, pass any two given points that shall be ignored in transform
|
||||
cutPoints.push_back(data.front());
|
||||
cutPoints.push_back(data.front());
|
||||
return;
|
||||
}
|
||||
cutPoints = percentile(data, quantiles);
|
||||
}
|
||||
void BinDisc::fit_uniform(const samples_t& X)
|
||||
{
|
||||
auto [vmin, vmax] = std::minmax_element(X.begin(), X.end());
|
||||
cutPoints = linspace(*vmin, *vmax, n_bins + 1);
|
||||
}
|
||||
}
|
33
src/BinDisc.h
Normal file
33
src/BinDisc.h
Normal file
@@ -0,0 +1,33 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef BINDISC_H
|
||||
#define BINDISC_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
#include "Discretizer.h"
|
||||
#include <string>
|
||||
|
||||
namespace mdlp {
|
||||
enum class strategy_t {
|
||||
UNIFORM,
|
||||
QUANTILE
|
||||
};
|
||||
class BinDisc : public Discretizer {
|
||||
public:
|
||||
BinDisc(int n_bins = 3, strategy_t strategy = strategy_t::UNIFORM);
|
||||
~BinDisc();
|
||||
// y is included for compatibility with the Discretizer interface
|
||||
void fit(samples_t& X_, labels_t& y) override;
|
||||
void fit(samples_t& X);
|
||||
private:
|
||||
void fit_uniform(const samples_t&);
|
||||
void fit_quantile(const samples_t&);
|
||||
int n_bins;
|
||||
strategy_t strategy;
|
||||
};
|
||||
}
|
||||
#endif
|
221
src/CPPFImdlp.cpp
Normal file
221
src/CPPFImdlp.cpp
Normal file
@@ -0,0 +1,221 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include "CPPFImdlp.h"
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) :
|
||||
Discretizer(),
|
||||
min_length(min_length_),
|
||||
max_depth(max_depth_),
|
||||
proposed_cuts(proposed)
|
||||
{
|
||||
direction = bound_dir_t::RIGHT;
|
||||
}
|
||||
|
||||
size_t CPPFImdlp::compute_max_num_cut_points() const
|
||||
{
|
||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||
if (proposed_cuts == 0) {
|
||||
return numeric_limits<size_t>::max();
|
||||
}
|
||||
if (proposed_cuts < 0 || proposed_cuts > static_cast<precision_t>(X.size())) {
|
||||
throw invalid_argument("wrong proposed num_cuts value");
|
||||
}
|
||||
if (proposed_cuts < 1)
|
||||
return static_cast<size_t>(round(static_cast<precision_t>(X.size()) * proposed_cuts));
|
||||
return static_cast<size_t>(proposed_cuts); // The 2 extra cutpoints should not be considered here as this parameter is considered before they are added
|
||||
}
|
||||
|
||||
void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
X = X_;
|
||||
y = y_;
|
||||
num_cut_points = compute_max_num_cut_points();
|
||||
depth = 0;
|
||||
discretizedData.clear();
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
if (X.empty() || y.empty()) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
if (min_length < 3) {
|
||||
throw invalid_argument("min_length must be greater than 2");
|
||||
}
|
||||
if (max_depth < 1) {
|
||||
throw invalid_argument("max_depth must be greater than 0");
|
||||
}
|
||||
indices = sortIndices(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
computeCutPoints(0, X.size(), 1);
|
||||
sort(cutPoints.begin(), cutPoints.end());
|
||||
if (num_cut_points > 0) {
|
||||
// Select the best (with lower entropy) cut points
|
||||
while (cutPoints.size() > num_cut_points) {
|
||||
resizeCutPoints();
|
||||
}
|
||||
}
|
||||
// Insert first & last X value to the cutpoints as them shall be ignored in transform
|
||||
auto [vmin, vmax] = std::minmax_element(X.begin(), X.end());
|
||||
cutPoints.push_back(*vmax);
|
||||
cutPoints.insert(cutPoints.begin(), *vmin);
|
||||
}
|
||||
|
||||
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t n;
|
||||
size_t m;
|
||||
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
|
||||
size_t idxNext = cut + 1 < end ? cut + 1 : cut;
|
||||
bool backWall; // true if duplicates reach beginning of the interval
|
||||
precision_t previous;
|
||||
precision_t actual;
|
||||
precision_t next;
|
||||
previous = X[indices[idxPrev]];
|
||||
actual = X[indices[cut]];
|
||||
next = X[indices[idxNext]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
// get the first equal value of X in the interval
|
||||
while (idxPrev > start && actual == previous) {
|
||||
previous = X[indices[--idxPrev]];
|
||||
}
|
||||
backWall = idxPrev == start && actual == previous;
|
||||
// get the last equal value of X in the interval
|
||||
while (idxNext < end - 1 && actual == next) {
|
||||
next = X[indices[++idxNext]];
|
||||
}
|
||||
// # of duplicates before cutpoint
|
||||
n = cut - 1 - idxPrev;
|
||||
// # of duplicates after cutpoint
|
||||
m = idxNext - cut - 1;
|
||||
// Decide which values to use
|
||||
cut = cut + (backWall ? m + 1 : -n);
|
||||
actual = X[indices[cut]];
|
||||
return { (actual + previous) / 2, cut };
|
||||
}
|
||||
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
|
||||
{
|
||||
size_t cut;
|
||||
pair<precision_t, size_t> result;
|
||||
// Check if the interval length and the depth are Ok
|
||||
if (end - start < min_length || depth_ > max_depth)
|
||||
return;
|
||||
depth = depth_ > depth ? depth_ : depth;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
result = valueCutPoint(start, cut, end);
|
||||
cut = result.second;
|
||||
cutPoints.push_back(result.first);
|
||||
computeCutPoints(start, cut, depth_ + 1);
|
||||
computeCutPoints(cut, end, depth_ + 1);
|
||||
}
|
||||
}
|
||||
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amongst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max();
|
||||
size_t elements = end - start;
|
||||
bool sameValues = true;
|
||||
precision_t entropy_left;
|
||||
precision_t entropy_right;
|
||||
precision_t minEntropy;
|
||||
// Check if all the values of the variable in the interval are the same
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
if (X[indices[idx]] != X[indices[start]]) {
|
||||
sameValues = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sameValues)
|
||||
return candidate;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / static_cast<precision_t>(elements) * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / static_cast<precision_t>(elements) * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k;
|
||||
int k1;
|
||||
int k2;
|
||||
precision_t ig;
|
||||
precision_t delta;
|
||||
precision_t ent;
|
||||
precision_t ent1;
|
||||
precision_t ent2;
|
||||
auto N = precision_t(end - start);
|
||||
k = metrics.computeNumClasses(start, end);
|
||||
k1 = metrics.computeNumClasses(start, cut);
|
||||
k2 = metrics.computeNumClasses(cut, end);
|
||||
ent = metrics.entropy(start, end);
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
||||
if (X_[i1] == X_[i2])
|
||||
return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
void CPPFImdlp::resizeCutPoints()
|
||||
{
|
||||
//Compute entropy of each of the whole cutpoint set and discards the biggest value
|
||||
precision_t maxEntropy = 0;
|
||||
precision_t entropy;
|
||||
size_t maxEntropyIdx = 0;
|
||||
size_t begin = 0;
|
||||
size_t end;
|
||||
for (size_t idx = 0; idx < cutPoints.size(); idx++) {
|
||||
end = begin;
|
||||
while (X[indices[end]] < cutPoints[idx] && end < X.size())
|
||||
end++;
|
||||
entropy = metrics.entropy(begin, end);
|
||||
if (entropy > maxEntropy) {
|
||||
maxEntropy = entropy;
|
||||
maxEntropyIdx = idx;
|
||||
}
|
||||
begin = end;
|
||||
}
|
||||
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
||||
}
|
||||
|
||||
}
|
44
src/CPPFImdlp.h
Normal file
44
src/CPPFImdlp.h
Normal file
@@ -0,0 +1,44 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include "Metrics.h"
|
||||
#include "Discretizer.h"
|
||||
|
||||
namespace mdlp {
|
||||
class CPPFImdlp : public Discretizer {
|
||||
public:
|
||||
CPPFImdlp() = default;
|
||||
CPPFImdlp(size_t min_length_, int max_depth_, float proposed);
|
||||
virtual ~CPPFImdlp() = default;
|
||||
void fit(samples_t& X_, labels_t& y_) override;
|
||||
inline int get_depth() const { return depth; };
|
||||
protected:
|
||||
size_t min_length = 3;
|
||||
int depth = 0;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
float proposed_cuts = 0;
|
||||
indices_t indices = indices_t();
|
||||
samples_t X = samples_t();
|
||||
labels_t y = labels_t();
|
||||
Metrics metrics = Metrics(y, indices);
|
||||
size_t num_cut_points = numeric_limits<size_t>::max();
|
||||
static indices_t sortIndices(samples_t&, labels_t&);
|
||||
void computeCutPoints(size_t, size_t, int);
|
||||
void resizeCutPoints();
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
size_t getCandidate(size_t, size_t);
|
||||
size_t compute_max_num_cut_points() const;
|
||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
#endif
|
54
src/Discretizer.cpp
Normal file
54
src/Discretizer.cpp
Normal file
@@ -0,0 +1,54 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include "Discretizer.h"
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
labels_t& Discretizer::transform(const samples_t& data)
|
||||
{
|
||||
discretizedData.clear();
|
||||
discretizedData.reserve(data.size());
|
||||
// CutPoints always have at least two items
|
||||
// Have to ignore first and last cut points provided
|
||||
auto first = cutPoints.begin() + 1;
|
||||
auto last = cutPoints.end() - 1;
|
||||
auto bound = direction == bound_dir_t::LEFT ? std::lower_bound<std::vector<precision_t>::iterator, precision_t> : std::upper_bound<std::vector<precision_t>::iterator, precision_t>;
|
||||
for (const precision_t& item : data) {
|
||||
auto pos = bound(first, last, item);
|
||||
auto number = pos - first;
|
||||
discretizedData.push_back(static_cast<label_t>(number));
|
||||
}
|
||||
return discretizedData;
|
||||
}
|
||||
labels_t& Discretizer::fit_transform(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
fit(X_, y_);
|
||||
return transform(X_);
|
||||
}
|
||||
void Discretizer::fit_t(const torch::Tensor& X_, const torch::Tensor& y_)
|
||||
{
|
||||
auto num_elements = X_.numel();
|
||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||
fit(X, y);
|
||||
}
|
||||
torch::Tensor Discretizer::transform_t(const torch::Tensor& X_)
|
||||
{
|
||||
auto num_elements = X_.numel();
|
||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||
auto result = transform(X);
|
||||
return torch::tensor(result, torch_label_t);
|
||||
}
|
||||
torch::Tensor Discretizer::fit_transform_t(const torch::Tensor& X_, const torch::Tensor& y_)
|
||||
{
|
||||
auto num_elements = X_.numel();
|
||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||
auto result = fit_transform(X, y);
|
||||
return torch::tensor(result, torch_label_t);
|
||||
}
|
||||
}
|
40
src/Discretizer.h
Normal file
40
src/Discretizer.h
Normal file
@@ -0,0 +1,40 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef DISCRETIZER_H
|
||||
#define DISCRETIZER_H
|
||||
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "typesFImdlp.h"
|
||||
#include <torch/torch.h>
|
||||
#include "config.h"
|
||||
|
||||
namespace mdlp {
|
||||
enum class bound_dir_t {
|
||||
LEFT,
|
||||
RIGHT
|
||||
};
|
||||
const auto torch_label_t = torch::kInt32;
|
||||
class Discretizer {
|
||||
public:
|
||||
Discretizer() = default;
|
||||
virtual ~Discretizer() = default;
|
||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
||||
virtual void fit(samples_t& X_, labels_t& y_) = 0;
|
||||
labels_t& transform(const samples_t& data);
|
||||
labels_t& fit_transform(samples_t& X_, labels_t& y_);
|
||||
void fit_t(const torch::Tensor& X_, const torch::Tensor& y_);
|
||||
torch::Tensor transform_t(const torch::Tensor& X_);
|
||||
torch::Tensor fit_transform_t(const torch::Tensor& X_, const torch::Tensor& y_);
|
||||
static inline std::string version() { return { project_mdlp_version.begin(), project_mdlp_version.end() }; };
|
||||
protected:
|
||||
labels_t discretizedData = labels_t();
|
||||
cutPoints_t cutPoints; // At least two cutpoints must be provided, the first and the last will be ignored in transform
|
||||
bound_dir_t direction; // used in transform
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,20 +1,30 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
|
||||
Metrics::Metrics(labels_t& y_, indices_t& indices_) : y(y_), indices(indices_),
|
||||
numClasses(computeNumClasses(0, indices_.size()))
|
||||
{
|
||||
}
|
||||
|
||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||
{
|
||||
set<int> nClasses;
|
||||
for (auto i = start; i < end; ++i) {
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
return nClasses.size();
|
||||
return static_cast<int>(nClasses.size());
|
||||
}
|
||||
void Metrics::setData(labels_t& y_, indices_t& indices_)
|
||||
|
||||
void Metrics::setData(const labels_t& y_, const indices_t& indices_)
|
||||
{
|
||||
indices = indices_;
|
||||
y = y_;
|
||||
@@ -22,15 +32,17 @@ namespace mdlp {
|
||||
entropyCache.clear();
|
||||
igCache.clear();
|
||||
}
|
||||
|
||||
precision_t Metrics::entropy(size_t start, size_t end)
|
||||
{
|
||||
precision_t p, ventropy = 0;
|
||||
precision_t p;
|
||||
precision_t ventropy = 0;
|
||||
int nElements = 0;
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
if (end - start < 2)
|
||||
return 0;
|
||||
if (entropyCache.find(make_tuple(start, end)) != entropyCache.end()) {
|
||||
return entropyCache[make_tuple(start, end)];
|
||||
if (entropyCache.find({ start, end }) != entropyCache.end()) {
|
||||
return entropyCache[{start, end}];
|
||||
}
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
@@ -38,26 +50,33 @@ namespace mdlp {
|
||||
}
|
||||
for (auto count : counts) {
|
||||
if (count > 0) {
|
||||
p = (precision_t)count / nElements;
|
||||
p = static_cast<precision_t>(count) / static_cast<precision_t>(nElements);
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
entropyCache[make_tuple(start, end)] = ventropy;
|
||||
entropyCache[{start, end}] = ventropy;
|
||||
return ventropy;
|
||||
}
|
||||
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
precision_t iGain;
|
||||
precision_t entropyInterval, entropyLeft, entropyRight;
|
||||
int nElementsLeft = cut - start, nElementsRight = end - cut;
|
||||
int nElements = end - start;
|
||||
precision_t entropyInterval;
|
||||
precision_t entropyLeft;
|
||||
precision_t entropyRight;
|
||||
size_t nElementsLeft = cut - start;
|
||||
size_t nElementsRight = end - cut;
|
||||
size_t nElements = end - start;
|
||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||
return igCache[make_tuple(start, cut, end)];
|
||||
}
|
||||
entropyInterval = entropy(start, end);
|
||||
entropyLeft = entropy(start, cut);
|
||||
entropyRight = entropy(cut, end);
|
||||
iGain = entropyInterval - ((precision_t)nElementsLeft * entropyLeft + (precision_t)nElementsRight * entropyRight) / nElements;
|
||||
iGain = entropyInterval -
|
||||
(static_cast<precision_t>(nElementsLeft) * entropyLeft +
|
||||
static_cast<precision_t>(nElementsRight) * entropyRight) /
|
||||
static_cast<precision_t>(nElements);
|
||||
igCache[make_tuple(start, cut, end)] = iGain;
|
||||
return iGain;
|
||||
}
|
28
src/Metrics.h
Normal file
28
src/Metrics.h
Normal file
@@ -0,0 +1,28 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
|
||||
namespace mdlp {
|
||||
class Metrics {
|
||||
protected:
|
||||
labels_t& y;
|
||||
indices_t& indices;
|
||||
int numClasses;
|
||||
cacheEnt_t entropyCache = cacheEnt_t();
|
||||
cacheIg_t igCache = cacheIg_t();
|
||||
public:
|
||||
Metrics(labels_t&, indices_t&);
|
||||
void setData(const labels_t&, const indices_t&);
|
||||
int computeNumClasses(size_t, size_t);
|
||||
precision_t entropy(size_t, size_t);
|
||||
precision_t informationGain(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
#endif
|
25
src/typesFImdlp.h
Normal file
25
src/typesFImdlp.h
Normal file
@@ -0,0 +1,25 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
typedef float precision_t;
|
||||
typedef int label_t;
|
||||
typedef std::vector<precision_t> samples_t;
|
||||
typedef std::vector<label_t> labels_t;
|
||||
typedef std::vector<size_t> indices_t;
|
||||
typedef std::vector<precision_t> cutPoints_t;
|
||||
typedef std::map<std::pair<int, int>, precision_t> cacheEnt_t;
|
||||
typedef std::map<std::tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
9
test_consumer/CMakeLists.txt
Normal file
9
test_consumer/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(test_fimdlp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
find_package(fimdlp REQUIRED)
|
||||
|
||||
add_executable(test_fimdlp test_fimdlp.cpp)
|
||||
target_link_libraries(test_fimdlp fimdlp::fimdlp)
|
9
test_consumer/CMakeUserPresets.json
Normal file
9
test_consumer/CMakeUserPresets.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"version": 4,
|
||||
"vendor": {
|
||||
"conan": {}
|
||||
},
|
||||
"include": [
|
||||
"build/Release/generators/CMakePresets.json"
|
||||
]
|
||||
}
|
9
test_consumer/conanfile.txt
Normal file
9
test_consumer/conanfile.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
[requires]
|
||||
fimdlp/2.0.1
|
||||
|
||||
[generators]
|
||||
CMakeDeps
|
||||
CMakeToolchain
|
||||
|
||||
[layout]
|
||||
cmake_layout
|
39
test_consumer/test_fimdlp.cpp
Normal file
39
test_consumer/test_fimdlp.cpp
Normal file
@@ -0,0 +1,39 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
#include <fimdlp/BinDisc.h>
|
||||
|
||||
int main() {
|
||||
std::cout << "Testing FIMDLP package..." << std::endl;
|
||||
|
||||
// Test data - simple continuous values with binary classification
|
||||
mdlp::samples_t data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
|
||||
mdlp::labels_t labels = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1};
|
||||
|
||||
std::cout << "Created test data with " << data.size() << " samples" << std::endl;
|
||||
|
||||
// Test MDLP discretizer
|
||||
mdlp::CPPFImdlp discretizer;
|
||||
discretizer.fit(data, labels);
|
||||
|
||||
auto cut_points = discretizer.getCutPoints();
|
||||
std::cout << "MDLP found " << cut_points.size() << " cut points" << std::endl;
|
||||
|
||||
for (size_t i = 0; i < cut_points.size(); ++i) {
|
||||
std::cout << "Cut point " << i << ": " << cut_points[i] << std::endl;
|
||||
}
|
||||
|
||||
// Test BinDisc discretizer
|
||||
mdlp::BinDisc bin_discretizer(3, mdlp::strategy_t::UNIFORM); // 3 bins, uniform strategy
|
||||
bin_discretizer.fit(data, labels);
|
||||
|
||||
auto bin_cut_points = bin_discretizer.getCutPoints();
|
||||
std::cout << "BinDisc found " << bin_cut_points.size() << " cut points" << std::endl;
|
||||
|
||||
for (size_t i = 0; i < bin_cut_points.size(); ++i) {
|
||||
std::cout << "Bin cut point " << i << ": " << bin_cut_points[i] << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "FIMDLP package test completed successfully!" << std::endl;
|
||||
return 0;
|
||||
}
|
@@ -1,116 +0,0 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
ArffFiles::ArffFiles()
|
||||
{
|
||||
}
|
||||
vector<string> ArffFiles::getLines()
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
unsigned long int ArffFiles::getSize()
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
string ArffFiles::getClassName()
|
||||
{
|
||||
return className;
|
||||
}
|
||||
string ArffFiles::getClassType()
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
vector<vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
void ArffFiles::load(string fileName, bool classLast)
|
||||
{
|
||||
ifstream file(fileName);
|
||||
string keyword, attribute, type;
|
||||
if (file.is_open()) {
|
||||
string line;
|
||||
while (getline(file, line)) {
|
||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
stringstream ss(line);
|
||||
ss >> keyword >> attribute >> type;
|
||||
attributes.push_back(make_tuple(attribute, type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw invalid_argument("No attributes found");
|
||||
if (classLast) {
|
||||
className = get<0>(attributes.back());
|
||||
classType = get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
} else {
|
||||
className = get<0>(attributes.front());
|
||||
classType = get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
}
|
||||
generateDataset(classLast);
|
||||
} else
|
||||
throw invalid_argument("Unable to open file");
|
||||
}
|
||||
void ArffFiles::generateDataset(bool classLast)
|
||||
{
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
vector<string> yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? attributes.size() : 0;
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
int pos = 0, xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
string ArffFiles::trim(const string& source)
|
||||
{
|
||||
string s(source);
|
||||
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
||||
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
|
||||
{
|
||||
vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
map<string, int> labelMap;
|
||||
int i = 0;
|
||||
for (string label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
@@ -1,28 +0,0 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
using namespace std;
|
||||
class ArffFiles {
|
||||
private:
|
||||
vector<string> lines;
|
||||
vector<tuple<string, string>> attributes;
|
||||
string className, classType;
|
||||
vector<vector<float>> X;
|
||||
vector<int> y;
|
||||
void generateDataset(bool);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(string, bool = true);
|
||||
vector<string> getLines();
|
||||
unsigned long int getSize();
|
||||
string getClassName();
|
||||
string getClassType();
|
||||
string trim(const string&);
|
||||
vector<vector<float>>& getX();
|
||||
vector<int>& getY();
|
||||
vector<tuple<string, string>> getAttributes();
|
||||
vector<int> factorize(const vector<string>& labels_t);
|
||||
};
|
||||
#endif
|
411
tests/BinDisc_unittest.cpp
Normal file
411
tests/BinDisc_unittest.cpp
Normal file
@@ -0,0 +1,411 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "gtest/gtest.h"
|
||||
#include <ArffFiles.hpp>
|
||||
#include "BinDisc.h"
|
||||
#include "Experiments.hpp"
|
||||
|
||||
namespace mdlp {
|
||||
const float margin = 1e-4;
|
||||
static std::string set_data_path()
|
||||
{
|
||||
std::string path = "../datasets/";
|
||||
std::ifstream file(path + "iris.arff");
|
||||
if (file.is_open()) {
|
||||
file.close();
|
||||
return path;
|
||||
}
|
||||
return "../../tests/datasets/";
|
||||
}
|
||||
const std::string data_path = set_data_path();
|
||||
class TestBinDisc3U : public BinDisc, public testing::Test {
|
||||
public:
|
||||
TestBinDisc3U(int n_bins = 3) : BinDisc(n_bins, strategy_t::UNIFORM) {};
|
||||
};
|
||||
class TestBinDisc3Q : public BinDisc, public testing::Test {
|
||||
public:
|
||||
TestBinDisc3Q(int n_bins = 3) : BinDisc(n_bins, strategy_t::QUANTILE) {};
|
||||
};
|
||||
class TestBinDisc4U : public BinDisc, public testing::Test {
|
||||
public:
|
||||
TestBinDisc4U(int n_bins = 4) : BinDisc(n_bins, strategy_t::UNIFORM) {};
|
||||
};
|
||||
class TestBinDisc4Q : public BinDisc, public testing::Test {
|
||||
public:
|
||||
TestBinDisc4Q(int n_bins = 4) : BinDisc(n_bins, strategy_t::QUANTILE) {};
|
||||
};
|
||||
TEST_F(TestBinDisc3U, Easy3BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||
auto y = labels_t();
|
||||
fit(X, y);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(3.66667, cuts.at(1), margin);
|
||||
EXPECT_NEAR(6.33333, cuts.at(2), margin);
|
||||
EXPECT_NEAR(9.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, Easy3BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts[0], margin);
|
||||
EXPECT_NEAR(3.666667, cuts[1], margin);
|
||||
EXPECT_NEAR(6.333333, cuts[2], margin);
|
||||
EXPECT_NEAR(9, cuts[3], margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, X10BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, X10BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, X11BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.33333, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.66667, cuts.at(2), margin);
|
||||
EXPECT_NEAR(11.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, X11BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.33333, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.66667, cuts.at(2), margin);
|
||||
EXPECT_NEAR(11.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, ConstantUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(2, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1, cuts.at(1), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, ConstantQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(2, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1, cuts.at(1), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, EmptyUniform)
|
||||
{
|
||||
samples_t X = {};
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(2, cuts.size());
|
||||
EXPECT_NEAR(0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(0, cuts.at(1), margin);
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, EmptyQuantile)
|
||||
{
|
||||
samples_t X = {};
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(2, cuts.size());
|
||||
EXPECT_NEAR(0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(0, cuts.at(1), margin);
|
||||
}
|
||||
TEST(TestBinDisc3, ExceptionNumberBins)
|
||||
{
|
||||
EXPECT_THROW(BinDisc(2), std::invalid_argument);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, EasyRepeated)
|
||||
{
|
||||
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1.66667, cuts.at(1), margin);
|
||||
EXPECT_NEAR(2.33333, cuts.at(2), margin);
|
||||
EXPECT_NEAR(3.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, EasyRepeated)
|
||||
{
|
||||
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(3, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1.66667, cuts.at(1), margin);
|
||||
EXPECT_NEAR(3.0, cuts.at(2), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||
}
|
||||
TEST_F(TestBinDisc4U, Easy4BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(3.75, cuts.at(1), margin);
|
||||
EXPECT_NEAR(6.5, cuts.at(2), margin);
|
||||
EXPECT_NEAR(9.25, cuts.at(3), margin);
|
||||
EXPECT_NEAR(12.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, Easy4BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(3.75, cuts.at(1), margin);
|
||||
EXPECT_NEAR(6.5, cuts.at(2), margin);
|
||||
EXPECT_NEAR(9.25, cuts.at(3), margin);
|
||||
EXPECT_NEAR(12.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4U, X13BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||
EXPECT_NEAR(13.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, X13BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||
EXPECT_NEAR(13.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4U, X14BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.25, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.5, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.75, cuts.at(3), margin);
|
||||
EXPECT_NEAR(14.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, X14BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.25, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.5, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.75, cuts.at(3), margin);
|
||||
EXPECT_NEAR(14.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4U, X15BinsUniform)
|
||||
{
|
||||
samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.5, cuts.at(1), margin);
|
||||
EXPECT_NEAR(8, cuts.at(2), margin);
|
||||
EXPECT_NEAR(11.5, cuts.at(3), margin);
|
||||
EXPECT_NEAR(15.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, X15BinsQuantile)
|
||||
{
|
||||
samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.5, cuts.at(1), margin);
|
||||
EXPECT_NEAR(8, cuts.at(2), margin);
|
||||
EXPECT_NEAR(11.5, cuts.at(3), margin);
|
||||
EXPECT_NEAR(15.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4U, RepeatedValuesUniform)
|
||||
{
|
||||
samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 };
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(0.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(2.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(3.0, cuts.at(3), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, RepeatedValuesQuantile)
|
||||
{
|
||||
samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 };
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(0.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(2.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(3.0, cuts.at(3), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST(TestBinDiscGeneric, Fileset)
|
||||
{
|
||||
Experiments exps(data_path + "tests.txt");
|
||||
int num = 0;
|
||||
while (exps.is_next()) {
|
||||
++num;
|
||||
Experiment exp = exps.next();
|
||||
BinDisc disc(exp.n_bins_, exp.strategy_[0] == 'Q' ? strategy_t::QUANTILE : strategy_t::UNIFORM);
|
||||
std::vector<precision_t> test;
|
||||
if (exp.type_ == experiment_t::RANGE) {
|
||||
for (float i = exp.from_; i < exp.to_; i += exp.step_) {
|
||||
test.push_back(i);
|
||||
}
|
||||
} else {
|
||||
test = exp.dataset_;
|
||||
}
|
||||
// show_vector(test, "Test");
|
||||
auto empty = std::vector<int>();
|
||||
auto Xt = disc.fit_transform(test, empty);
|
||||
auto cuts = disc.getCutPoints();
|
||||
EXPECT_EQ(exp.discretized_data_.size(), Xt.size());
|
||||
auto flag = false;
|
||||
size_t n_errors = 0;
|
||||
if (num < 40) {
|
||||
//
|
||||
// Check discretization of only the first 40 tests as after we cannot ensure the same codification due to precision problems
|
||||
//
|
||||
for (int i = 0; i < exp.discretized_data_.size(); ++i) {
|
||||
if (exp.discretized_data_.at(i) != Xt.at(i)) {
|
||||
if (!flag) {
|
||||
if (exp.type_ == experiment_t::RANGE)
|
||||
std::cout << "+Exp #: " << num << " From: " << exp.from_ << " To: " << exp.to_ << " Step: " << exp.step_ << " Bins: " << exp.n_bins_ << " Strategy: " << exp.strategy_ << std::endl;
|
||||
else {
|
||||
std::cout << "+Exp #: " << num << " strategy: " << exp.strategy_ << " " << " n_bins: " << exp.n_bins_ << " ";
|
||||
show_vector(exp.dataset_, "Dataset");
|
||||
}
|
||||
show_vector(cuts, "Cuts");
|
||||
std::cout << "Error at " << i << " test[i]=" << test.at(i) << " Expected: " << exp.discretized_data_.at(i) << " Got: " << Xt.at(i) << std::endl;
|
||||
flag = true;
|
||||
EXPECT_EQ(exp.discretized_data_.at(i), Xt.at(i));
|
||||
}
|
||||
n_errors++;
|
||||
}
|
||||
}
|
||||
if (flag) {
|
||||
std::cout << "*** Found " << n_errors << " mistakes in this experiment dataset" << std::endl;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(exp.cutpoints_.size(), cuts.size());
|
||||
for (int i = 0; i < exp.cutpoints_.size(); ++i) {
|
||||
EXPECT_NEAR(exp.cutpoints_.at(i), cuts.at(i), margin);
|
||||
}
|
||||
}
|
||||
std::cout << "* Number of experiments tested: " << num << std::endl;
|
||||
}
|
||||
}
|
@@ -1,32 +1,45 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
project(FImdlp)
|
||||
|
||||
# GoogleTest requires at least C++14
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
include(FetchContent)
|
||||
|
||||
include_directories(${GTEST_INCLUDE_DIRS})
|
||||
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||
)
|
||||
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
|
||||
enable_testing()
|
||||
include_directories(
|
||||
${TORCH_INCLUDE_DIRS}
|
||||
${fimdlp_SOURCE_DIR}/src
|
||||
${fimdlp_SOURCE_DIR}/tests/lib/Files
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
|
||||
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
||||
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
||||
add_executable(Metrics_unittest ${fimdlp_SOURCE_DIR}/src/Metrics.cpp Metrics_unittest.cpp)
|
||||
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
||||
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
||||
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
||||
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
||||
target_link_options(Metrics_unittest PRIVATE --coverage)
|
||||
|
||||
add_executable(FImdlp_unittest FImdlp_unittest.cpp
|
||||
${fimdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${fimdlp_SOURCE_DIR}/src/Metrics.cpp ${fimdlp_SOURCE_DIR}/src/Discretizer.cpp)
|
||||
target_link_libraries(FImdlp_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
|
||||
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
||||
target_link_options(FImdlp_unittest PRIVATE --coverage)
|
||||
|
||||
add_executable(BinDisc_unittest BinDisc_unittest.cpp ${fimdlp_SOURCE_DIR}/src/BinDisc.cpp ${fimdlp_SOURCE_DIR}/src/Discretizer.cpp)
|
||||
target_link_libraries(BinDisc_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
|
||||
target_compile_options(BinDisc_unittest PRIVATE --coverage)
|
||||
target_link_options(BinDisc_unittest PRIVATE --coverage)
|
||||
|
||||
add_executable(Discretizer_unittest Discretizer_unittest.cpp
|
||||
${fimdlp_SOURCE_DIR}/src/BinDisc.cpp ${fimdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${fimdlp_SOURCE_DIR}/src/Metrics.cpp ${fimdlp_SOURCE_DIR}/src/Discretizer.cpp )
|
||||
target_link_libraries(Discretizer_unittest GTest::gtest_main "${TORCH_LIBRARIES}")
|
||||
target_compile_options(Discretizer_unittest PRIVATE --coverage)
|
||||
target_link_options(Discretizer_unittest PRIVATE --coverage)
|
||||
|
||||
include(GoogleTest)
|
||||
|
||||
gtest_discover_tests(Metrics_unittest)
|
||||
gtest_discover_tests(FImdlp_unittest)
|
||||
|
||||
gtest_discover_tests(BinDisc_unittest)
|
||||
gtest_discover_tests(Discretizer_unittest)
|
274
tests/Discretizer_unittest.cpp
Normal file
274
tests/Discretizer_unittest.cpp
Normal file
@@ -0,0 +1,274 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <ArffFiles.hpp>
|
||||
#include "gtest/gtest.h"
|
||||
#include "Discretizer.h"
|
||||
#include "BinDisc.h"
|
||||
#include "CPPFImdlp.h"
|
||||
|
||||
namespace mdlp {
|
||||
const float margin = 1e-4;
|
||||
static std::string set_data_path()
|
||||
{
|
||||
std::string path = "../datasets/";
|
||||
std::ifstream file(path + "iris.arff");
|
||||
if (file.is_open()) {
|
||||
file.close();
|
||||
return path;
|
||||
}
|
||||
return "../../tests/datasets/";
|
||||
}
|
||||
const std::string data_path = set_data_path();
|
||||
const labels_t iris_quantile = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
|
||||
TEST(Discretizer, Version)
|
||||
{
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
auto version = disc->version();
|
||||
delete disc;
|
||||
std::cout << "Version computed: " << version;
|
||||
EXPECT_EQ("2.0.1", version);
|
||||
}
|
||||
TEST(Discretizer, BinIrisUniform)
|
||||
{
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
file.load(data_path + "iris.arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
auto y = labels_t();
|
||||
disc->fit(X[0], y);
|
||||
auto Xt = disc->transform(X[0]);
|
||||
labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
|
||||
delete disc;
|
||||
EXPECT_EQ(expected, Xt);
|
||||
}
|
||||
TEST(Discretizer, BinIrisQuantile)
|
||||
{
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||
file.load(data_path + "iris.arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
auto y = labels_t();
|
||||
disc->fit(X[0], y);
|
||||
auto Xt = disc->transform(X[0]);
|
||||
delete disc;
|
||||
EXPECT_EQ(iris_quantile, Xt);
|
||||
}
|
||||
|
||||
TEST(Discretizer, BinIrisQuantileTorch)
|
||||
{
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||
file.load(data_path + "iris.arff", true);
|
||||
auto X = file.getX();
|
||||
auto y = file.getY();
|
||||
auto X_torch = torch::tensor(X[0], torch::kFloat32);
|
||||
auto yt = torch::tensor(y, torch::kInt32);
|
||||
disc->fit_t(X_torch, yt);
|
||||
torch::Tensor Xt = disc->transform_t(X_torch);
|
||||
delete disc;
|
||||
EXPECT_EQ(iris_quantile.size(), Xt.size(0));
|
||||
for (int i = 0; i < iris_quantile.size(); ++i) {
|
||||
EXPECT_EQ(iris_quantile.at(i), Xt[i].item<int>());
|
||||
}
|
||||
}
|
||||
TEST(Discretizer, BinIrisQuantileTorchFit_transform)
|
||||
{
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||
file.load(data_path + "iris.arff", true);
|
||||
auto X = file.getX();
|
||||
auto y = file.getY();
|
||||
auto X_torch = torch::tensor(X[0], torch::kFloat32);
|
||||
auto yt = torch::tensor(y, torch::kInt32);
|
||||
torch::Tensor Xt = disc->fit_transform_t(X_torch, yt);
|
||||
delete disc;
|
||||
EXPECT_EQ(iris_quantile.size(), Xt.size(0));
|
||||
for (int i = 0; i < iris_quantile.size(); ++i) {
|
||||
EXPECT_EQ(iris_quantile.at(i), Xt[i].item<int>());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Discretizer, FImdlpIris)
|
||||
{
|
||||
auto labelsq = {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
1,
|
||||
3,
|
||||
1,
|
||||
2,
|
||||
0,
|
||||
3,
|
||||
1,
|
||||
0,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
1,
|
||||
3,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
0,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
1,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
2,
|
||||
};
|
||||
labels_t expected = {
|
||||
5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5,
|
||||
5, 3, 5, 4, 5, 4, 4, 4, 4, 0, 1, 1, 4, 0, 2, 0, 0, 3, 0, 2, 2, 4,
|
||||
3, 0, 0, 0, 4, 1, 0, 1, 2, 3, 1, 3, 2, 0, 0, 0, 0, 0, 3, 5, 4, 0,
|
||||
3, 0, 0, 3, 0, 0, 0, 3, 2, 2, 0, 1, 4, 0, 3, 2, 3, 3, 0, 2, 0, 5,
|
||||
4, 0, 3, 0, 1, 4, 3, 5, 0, 0, 4, 1, 1, 0, 4, 4, 1, 3, 1, 3, 1, 5,
|
||||
1, 1, 0, 3, 5, 4, 3, 4, 4, 4, 0, 4, 4, 3, 0, 3, 5, 3
|
||||
};
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new CPPFImdlp();
|
||||
file.load(data_path + "iris.arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
disc->fit(X[1], y);
|
||||
auto computed = disc->transform(X[1]);
|
||||
delete disc;
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
EXPECT_EQ(computed[i], expected[i]);
|
||||
}
|
||||
}
|
||||
}
|
139
tests/Experiments.hpp
Normal file
139
tests/Experiments.hpp
Normal file
@@ -0,0 +1,139 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef EXPERIMENTS_HPP
|
||||
#define EXPERIMENTS_HPP
|
||||
#include<sstream>
|
||||
#include<iostream>
|
||||
#include<string>
|
||||
#include<fstream>
|
||||
#include<vector>
|
||||
#include<tuple>
|
||||
#include "typesFImdlp.h"
|
||||
|
||||
template <typename T>
|
||||
void show_vector(const std::vector<T>& data, std::string title)
|
||||
{
|
||||
std::cout << title << ": ";
|
||||
std::string sep = "";
|
||||
for (const auto& d : data) {
|
||||
std::cout << sep << d;
|
||||
sep = ", ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
enum class experiment_t {
|
||||
RANGE,
|
||||
VECTOR
|
||||
};
|
||||
class Experiment {
|
||||
public:
|
||||
Experiment(float from_, float to_, float step_, int n_bins, std::string strategy, std::vector<int> data_discretized, std::vector<mdlp::precision_t> cutpoints) :
|
||||
from_{ from_ }, to_{ to_ }, step_{ step_ }, n_bins_{ n_bins }, strategy_{ strategy }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::RANGE }
|
||||
{
|
||||
validate_strategy();
|
||||
|
||||
}
|
||||
Experiment(std::vector<mdlp::precision_t> dataset, int n_bins, std::string strategy, std::vector<int> data_discretized, std::vector<mdlp::precision_t> cutpoints) :
|
||||
n_bins_{ n_bins }, strategy_{ strategy }, dataset_{ dataset }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::VECTOR }
|
||||
{
|
||||
validate_strategy();
|
||||
}
|
||||
void validate_strategy()
|
||||
{
|
||||
if (strategy_ != "Q" && strategy_ != "U") {
|
||||
throw std::invalid_argument("Invalid strategy " + strategy_);
|
||||
}
|
||||
}
|
||||
float from_;
|
||||
float to_;
|
||||
float step_;
|
||||
int n_bins_;
|
||||
std::string strategy_;
|
||||
std::vector<mdlp::precision_t> dataset_;
|
||||
std::vector<int> discretized_data_;
|
||||
std::vector<mdlp::precision_t> cutpoints_;
|
||||
experiment_t type_;
|
||||
};
|
||||
class Experiments {
|
||||
public:
|
||||
Experiments(const std::string filename) : filename{ filename }
|
||||
{
|
||||
test_file.open(filename);
|
||||
if (!test_file.is_open()) {
|
||||
throw std::runtime_error("File " + filename + " not found");
|
||||
}
|
||||
exp_end = false;
|
||||
}
|
||||
~Experiments()
|
||||
{
|
||||
test_file.close();
|
||||
}
|
||||
bool end() const
|
||||
{
|
||||
return exp_end;
|
||||
}
|
||||
bool is_next()
|
||||
{
|
||||
while (std::getline(test_file, line) && line[0] == '#');
|
||||
if (test_file.eof()) {
|
||||
exp_end = true;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
Experiment next()
|
||||
{
|
||||
return parse_experiment(line);
|
||||
}
|
||||
private:
|
||||
std::tuple<float, float, float, int, std::string> parse_header(const std::string& line)
|
||||
{
|
||||
std::istringstream iss(line);
|
||||
std::string from_, to_, step_, n_bins, strategy;
|
||||
iss >> from_ >> to_ >> step_ >> n_bins >> strategy;
|
||||
return { std::stof(from_), std::stof(to_), std::stof(step_), std::stoi(n_bins), strategy };
|
||||
}
|
||||
template <typename T>
|
||||
std::vector<T> parse_vector(const std::string& line)
|
||||
{
|
||||
std::istringstream iss(line);
|
||||
std::vector<T> data;
|
||||
std::string d;
|
||||
while (iss >> d) {
|
||||
data.push_back(std::is_same<T, float>::value ? std::stof(d) : std::stoi(d));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
Experiment parse_experiment(std::string& line)
|
||||
{
|
||||
// Read experiment lines
|
||||
std::string experiment, data, cuts, strategy;
|
||||
std::getline(test_file, experiment);
|
||||
std::getline(test_file, data);
|
||||
std::getline(test_file, cuts);
|
||||
// split data into variables
|
||||
float from_, to_, step_;
|
||||
int n_bins;
|
||||
std::vector<mdlp::precision_t> dataset;
|
||||
auto data_discretized = parse_vector<int>(data);
|
||||
auto cutpoints = parse_vector<mdlp::precision_t>(cuts);
|
||||
if (line == "RANGE") {
|
||||
tie(from_, to_, step_, n_bins, strategy) = parse_header(experiment);
|
||||
return Experiment{ from_, to_, step_, n_bins, strategy, data_discretized, cutpoints };
|
||||
}
|
||||
strategy = experiment.substr(0, 1);
|
||||
n_bins = std::stoi(experiment.substr(1, 1));
|
||||
data = experiment.substr(3, experiment.size() - 4);
|
||||
dataset = parse_vector<mdlp::precision_t>(data);
|
||||
return Experiment(dataset, n_bins, strategy, data_discretized, cutpoints);
|
||||
}
|
||||
std::ifstream test_file;
|
||||
std::string filename;
|
||||
std::string line;
|
||||
bool exp_end;
|
||||
};
|
||||
#endif
|
@@ -1,186 +1,366 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "../Metrics.h"
|
||||
#include "../CPPFImdlp.h"
|
||||
#include "ArffFiles.h"
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <ArffFiles.hpp>
|
||||
#include "gtest/gtest.h"
|
||||
#include "Metrics.h"
|
||||
#include "CPPFImdlp.h"
|
||||
|
||||
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
|
||||
try { \
|
||||
stmt; \
|
||||
} catch (const etype& ex) { \
|
||||
EXPECT_EQ(whatstring, std::string(ex.what())); \
|
||||
throw; \
|
||||
} \
|
||||
, etype)
|
||||
|
||||
namespace mdlp {
|
||||
class TestFImdlp: public CPPFImdlp, public testing::Test {
|
||||
class TestFImdlp : public CPPFImdlp, public testing::Test {
|
||||
public:
|
||||
precision_t precision = 0.000001;
|
||||
TestFImdlp(): CPPFImdlp() {}
|
||||
void SetUp()
|
||||
precision_t precision = 0.000001f;
|
||||
|
||||
TestFImdlp() : CPPFImdlp() {}
|
||||
|
||||
string data_path;
|
||||
|
||||
void SetUp() override
|
||||
{
|
||||
X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
X = { 4.7f, 4.7f, 4.7f, 4.7f, 4.8f, 4.8f, 4.8f, 4.8f, 4.9f, 4.95f, 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f,
|
||||
6.0f, 5.1f, 5.9f };
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
algorithm = false;
|
||||
fit(X, y);
|
||||
data_path = set_data_path();
|
||||
}
|
||||
void setalgorithm(bool value)
|
||||
|
||||
static string set_data_path()
|
||||
{
|
||||
algorithm = value;
|
||||
string path = "../datasets/";
|
||||
ifstream file(path + "iris.arff");
|
||||
if (file.is_open()) {
|
||||
file.close();
|
||||
return path;
|
||||
}
|
||||
return "../../tests/datasets/";
|
||||
}
|
||||
|
||||
void checkSortedVector()
|
||||
{
|
||||
indices_t testSortedIndices = sortIndices(X, y);
|
||||
precision_t prev = X[testSortedIndices[0]];
|
||||
for (auto i = 0; i < X.size(); ++i) {
|
||||
for (unsigned long i = 0; i < X.size(); ++i) {
|
||||
EXPECT_EQ(testSortedIndices[i], indices[i]);
|
||||
EXPECT_LE(prev, X[testSortedIndices[i]]);
|
||||
prev = X[testSortedIndices[i]];
|
||||
}
|
||||
}
|
||||
void checkCutPoints(cutPoints_t& expected)
|
||||
|
||||
void checkCutPoints(cutPoints_t& computed, cutPoints_t& expected) const
|
||||
{
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(cutPoints.size(), expectedSize);
|
||||
for (auto i = 0; i < cutPoints.size(); i++) {
|
||||
EXPECT_NEAR(cutPoints[i], expected[i], precision);
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
cout << "(" << computed[i] << ", " << expected[i] << ") ";
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
}
|
||||
template<typename T, typename A>
|
||||
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
|
||||
|
||||
bool test_result(const samples_t& X_, size_t cut, float midPoint, size_t limit, const string& title)
|
||||
{
|
||||
EXPECT_EQ(expected.size(), computed.size());
|
||||
ASSERT_EQ(expected.size(), computed.size());
|
||||
for (auto i = 0; i < expected.size(); i++) {
|
||||
EXPECT_NEAR(expected[i], computed[i], precision);
|
||||
pair<precision_t, size_t> result;
|
||||
labels_t y_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
X = X_;
|
||||
y = y_;
|
||||
indices = sortIndices(X, y);
|
||||
cout << "* " << title << endl;
|
||||
result = valueCutPoint(0, cut, 10);
|
||||
EXPECT_NEAR(result.first, midPoint, precision);
|
||||
EXPECT_EQ(result.second, limit);
|
||||
return true;
|
||||
}
|
||||
|
||||
void test_dataset(CPPFImdlp& test, const string& filename, vector<cutPoints_t>& expected,
|
||||
vector<int>& depths) const
|
||||
{
|
||||
ArffFiles file;
|
||||
file.load(data_path + filename + ".arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
test.fit(X[feature], y);
|
||||
EXPECT_EQ(test.get_depth(), depths[feature]);
|
||||
auto computed = test.getCutPoints();
|
||||
cout << "Feature " << feature << ": ";
|
||||
checkCutPoints(computed, expected[feature]);
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorEmptyDataset)
|
||||
{
|
||||
X = samples_t();
|
||||
y = labels_t();
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorIncorrectAlgorithm)
|
||||
{
|
||||
algorithm = 2;
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have at least one element");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorDifferentSize)
|
||||
{
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2 };
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth)
|
||||
{
|
||||
auto testLength = CPPFImdlp(2, 10, 0);
|
||||
auto testDepth = CPPFImdlp(3, 0, 0);
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2, 3 };
|
||||
EXPECT_THROW_WITH_MESSAGE(testLength.fit(X, y), invalid_argument, "min_length must be greater than 2");
|
||||
EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, JoinFit)
|
||||
{
|
||||
samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 };
|
||||
labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 };
|
||||
cutPoints_t expected = { 1.0, 1.5f, 2.5f, 4.0 };
|
||||
fit(X_, y_);
|
||||
auto computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
checkCutPoints(computed, expected);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorMaxCutPoints)
|
||||
{
|
||||
auto testmin = CPPFImdlp(2, 10, -1);
|
||||
auto testmax = CPPFImdlp(3, 0, 200);
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2, 3 };
|
||||
EXPECT_THROW_WITH_MESSAGE(testmin.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
|
||||
EXPECT_THROW_WITH_MESSAGE(testmax.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SortIndices)
|
||||
{
|
||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
X = { 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f, 6.0f, 5.1f, 5.9f };
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||
checkSortedVector();
|
||||
X = { 5.77, 5.88, 5.99 };
|
||||
X = { 5.77f, 5.88f, 5.99f };
|
||||
y = { 1, 2, 1 };
|
||||
indices = { 0, 1, 2 };
|
||||
checkSortedVector();
|
||||
X = { 5.33, 5.22, 5.11 };
|
||||
X = { 5.33f, 5.22f, 5.11f };
|
||||
y = { 1, 2, 1 };
|
||||
indices = { 2, 1, 0 };
|
||||
checkSortedVector();
|
||||
X = { 5.33, 5.22, 5.33 };
|
||||
X = { 5.33f, 5.22f, 5.33f };
|
||||
y = { 2, 2, 1 };
|
||||
indices = { 1, 2, 0 };
|
||||
}
|
||||
TEST_F(TestFImdlp, TestArtificialDatasetAlternative)
|
||||
|
||||
TEST_F(TestFImdlp, TestShortDatasets)
|
||||
{
|
||||
algorithm = 1;
|
||||
vector<precision_t> computed;
|
||||
X = { 1 };
|
||||
y = { 1 };
|
||||
fit(X, y);
|
||||
computeCutPoints(0, 20);
|
||||
cutPoints_t expected = { 5.0500001907348633 };
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
EXPECT_EQ(computed.size(), 2);
|
||||
X = { 1, 3 };
|
||||
y = { 1, 2 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 2);
|
||||
X = { 2, 4 };
|
||||
y = { 1, 2 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 2);
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2, 2 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 3);
|
||||
EXPECT_NEAR(computed[0], 1, precision);
|
||||
EXPECT_NEAR(computed[1], 1.5, precision);
|
||||
EXPECT_NEAR(computed[2], 3, precision);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, TestArtificialDataset)
|
||||
{
|
||||
algorithm = 0;
|
||||
fit(X, y);
|
||||
computeCutPoints(0, 20);
|
||||
cutPoints_t expected = { 5.0500001907348633 };
|
||||
cutPoints_t expected = { 4.7, 5.05, 6.0 };
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, TestIris)
|
||||
{
|
||||
ArffFiles file;
|
||||
string path = "../datasets/";
|
||||
|
||||
file.load(path + "iris.arff", true);
|
||||
int items = file.getSize();
|
||||
vector<samples_t>& X = file.getX();
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.4499998092651367, 6.25 },
|
||||
{ 2.8499999046325684, 3, 3.0499999523162842, 3.3499999046325684 },
|
||||
{ 2.4500000476837158, 4.75, 5.0500001907348633 },
|
||||
{ 0.80000001192092896, 1.4500000476837158, 1.75 }
|
||||
{4.3, 5.45f, 5.75f, 7.9},
|
||||
{2, 2.75f, 2.85f, 2.95f, 3.05f, 3.35f, 4.4},
|
||||
{1, 2.45f, 4.75f, 5.05f, 6.9},
|
||||
{0.1, 0.8f, 1.75f, 2.5}
|
||||
};
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
algorithm = 0;
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
fit(X[feature], y);
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected[feature].size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[feature][i], precision);
|
||||
}
|
||||
}
|
||||
vector<int> depths = { 3, 5, 4, 3 };
|
||||
auto test = CPPFImdlp();
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
TEST_F(TestFImdlp, TestIrisAlternative)
|
||||
{
|
||||
ArffFiles file;
|
||||
string path = "../datasets/";
|
||||
|
||||
file.load(path + "iris.arff", true);
|
||||
int items = file.getSize();
|
||||
vector<samples_t>& X = file.getX();
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.4499998092651367, 5.75 },
|
||||
{ 2.8499999046325684, 3.3499999046325684 },
|
||||
{ 2.4500000476837158, 4.75 },
|
||||
{ 0.80000001192092896, 1.75 }
|
||||
};
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
algorithm = 1;
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
fit(X[feature], y);
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected[feature].size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[feature][i], precision);
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsGCase)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
algorithm = 0;
|
||||
expected = { 1.5 };
|
||||
samples_t X_ = { 0, 1, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2 };
|
||||
expected = { 0, 1.5, 2 };
|
||||
samples_t X_ = { 0, 1, 2, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2, 2 };
|
||||
fit(X_, y_);
|
||||
checkCutPoints(expected);
|
||||
auto computed = getCutPoints();
|
||||
checkCutPoints(computed, expected);
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase)
|
||||
|
||||
TEST_F(TestFImdlp, ValueCutPoint)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
expected = { 1.5 };
|
||||
algorithm = true;
|
||||
samples_t X_ = { 0, 1, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2 };
|
||||
fit(X_, y_);
|
||||
checkCutPoints(expected);
|
||||
// Case titles as stated in the doc
|
||||
samples_t X1a{ 3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X1a, 6, 7.3f / 2, 6, "1a");
|
||||
samples_t X2a = { 3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X2a, 6, 7.1f / 2, 4, "2a");
|
||||
samples_t X2b = { 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X2b, 6, 7.5f / 2, 7, "2b");
|
||||
samples_t X3a = { 3.f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X3a, 4, 7.1f / 2, 4, "3a");
|
||||
samples_t X3b = { 3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f };
|
||||
test_result(X3b, 4, 7.1f / 2, 4, "3b");
|
||||
samples_t X4a = { 3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.9f, 4.0f };
|
||||
test_result(X4a, 4, 6.9f / 2, 2, "4a");
|
||||
samples_t X4b = { 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X4b, 4, 7.5f / 2, 7, "4b");
|
||||
samples_t X4c = { 3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f };
|
||||
test_result(X4c, 4, 6.9f / 2, 2, "4c");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MaxDepth)
|
||||
{
|
||||
// Set max_depth to 1
|
||||
auto test = CPPFImdlp(3, 1, 0);
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 7.9},
|
||||
{2, 3.35f, 4.4},
|
||||
{1, 2.45f, 6.9},
|
||||
{0.1, 0.8f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 1, 1, 1, 1 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MinLength)
|
||||
{
|
||||
auto test = CPPFImdlp(75, 100, 0);
|
||||
// Set min_length to 75
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 5.75f, 7.9},
|
||||
{2, 2.85f, 3.35f, 4.4},
|
||||
{1, 2.45f, 4.75f, 6.9},
|
||||
{0.1, 0.8f, 1.75f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 3, 2, 2, 2 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MinLengthMaxDepth)
|
||||
{
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 0);
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 5.75f, 7.9},
|
||||
{2, 2.85f, 3.35f, 4.4},
|
||||
{1, 2.45f, 4.75f, 6.9},
|
||||
{0.1, 0.8f, 1.75f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 2, 2, 2, 2 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MaxCutPointsInteger)
|
||||
{
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 1);
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 7.9},
|
||||
{2, 2.85f, 4.4},
|
||||
{1, 2.45f, 6.9},
|
||||
{0.1, 0.8f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 2, 2, 2, 2 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MaxCutPointsFloat)
|
||||
{
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 0.2f);
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 5.75f, 7.9},
|
||||
{2, 2.85f, 3.35f, 4.4},
|
||||
{1, 2.45f, 4.75f, 6.9},
|
||||
{0.1, 0.8f, 1.75f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 2, 2, 2, 2 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, ProposedCuts)
|
||||
{
|
||||
vector<pair<float, size_t>> proposed_list = { {0.1f, 2},
|
||||
{0.5f, 10},
|
||||
{0.07f, 1},
|
||||
{1.0f, 1},
|
||||
{2.0f, 2} };
|
||||
size_t expected;
|
||||
size_t computed;
|
||||
for (auto proposed_item : proposed_list) {
|
||||
tie(proposed_cuts, expected) = proposed_item;
|
||||
computed = compute_max_num_cut_points();
|
||||
ASSERT_EQ(expected, computed);
|
||||
}
|
||||
}
|
||||
TEST_F(TestFImdlp, TransformTest)
|
||||
{
|
||||
labels_t expected = {
|
||||
5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5,
|
||||
5, 3, 5, 4, 5, 4, 4, 4, 4, 0, 1, 1, 4, 0, 2, 0, 0, 3, 0, 2, 2, 4,
|
||||
3, 0, 0, 0, 4, 1, 0, 1, 2, 3, 1, 3, 2, 0, 0, 0, 0, 0, 3, 5, 4, 0,
|
||||
3, 0, 0, 3, 0, 0, 0, 3, 2, 2, 0, 1, 4, 0, 3, 2, 3, 3, 0, 2, 0, 5,
|
||||
4, 0, 3, 0, 1, 4, 3, 5, 0, 0, 4, 1, 1, 0, 4, 4, 1, 3, 1, 3, 1, 5,
|
||||
1, 1, 0, 3, 5, 4, 3, 4, 4, 4, 0, 4, 4, 3, 0, 3, 5, 3
|
||||
};
|
||||
ArffFiles file;
|
||||
file.load(data_path + "iris.arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
fit(X[1], y);
|
||||
auto computed = transform(X[1]);
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
EXPECT_EQ(computed[i], expected[i]);
|
||||
}
|
||||
auto computed_ft = fit_transform(X[1], y);
|
||||
EXPECT_EQ(computed_ft.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed_ft.size(); i++) {
|
||||
EXPECT_EQ(computed_ft[i], expected[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,23 +1,27 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "../Metrics.h"
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "Metrics.h"
|
||||
|
||||
namespace mdlp {
|
||||
class TestMetrics: public Metrics, public testing::Test {
|
||||
class TestMetrics : public Metrics, public testing::Test {
|
||||
public:
|
||||
labels_t y;
|
||||
samples_t X;
|
||||
indices_t indices;
|
||||
precision_t precision = 0.000001;
|
||||
labels_t y_ = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
indices_t indices_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
precision_t precision = 1e-6;
|
||||
|
||||
TestMetrics(): Metrics(y, indices) {}
|
||||
void SetUp()
|
||||
TestMetrics() : Metrics(y_, indices_) {};
|
||||
|
||||
void SetUp() override
|
||||
{
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
setData(y, indices);
|
||||
setData(y_, indices_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestMetrics, NumClasses)
|
||||
{
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
@@ -25,19 +29,31 @@ namespace mdlp {
|
||||
EXPECT_EQ(2, computeNumClasses(0, 10));
|
||||
EXPECT_EQ(2, computeNumClasses(8, 10));
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, Entropy)
|
||||
{
|
||||
EXPECT_EQ(1, entropy(0, 10));
|
||||
EXPECT_EQ(0, entropy(0, 5));
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.468996, entropy(0, 10), precision);
|
||||
ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, EntropyDouble)
|
||||
{
|
||||
y = { 0, 0, 1, 2, 3 };
|
||||
samples_t expected_entropies = { 0.0, 0.0, 0.91829583, 1.5, 1.4575424759098898 };
|
||||
for (auto idx = 0; idx < y.size(); ++idx) {
|
||||
ASSERT_NEAR(expected_entropies[idx], entropy(0, idx + 1), precision);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, InformationGain)
|
||||
{
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.108032, informationGain(0, 5, 10), precision);
|
||||
ASSERT_NEAR(0.108032f, informationGain(0, 5, 10), precision);
|
||||
}
|
||||
}
|
||||
|
@@ -1,4 +0,0 @@
|
||||
rm -fr lcoverage/*
|
||||
lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
||||
genhtml lcoverage/main_coverage.info --output-directory lcoverage
|
||||
open lcoverage/index.html
|
863
tests/datasets/diabetes.arff
Executable file
863
tests/datasets/diabetes.arff
Executable file
@@ -0,0 +1,863 @@
|
||||
% 1. Title: Pima Indians Diabetes Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Original owners: National Institute of Diabetes and Digestive and
|
||||
% Kidney Diseases
|
||||
% (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu)
|
||||
% Research Center, RMI Group Leader
|
||||
% Applied Physics Laboratory
|
||||
% The Johns Hopkins University
|
||||
% Johns Hopkins Road
|
||||
% Laurel, MD 20707
|
||||
% (301) 953-6231
|
||||
% (c) Date received: 9 May 1990
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% 1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \&
|
||||
% Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast
|
||||
% the onset of diabetes mellitus. In {\it Proceedings of the Symposium
|
||||
% on Computer Applications and Medical Care} (pp. 261--265). IEEE
|
||||
% Computer Society Press.
|
||||
%
|
||||
% The diagnostic, binary-valued variable investigated is whether the
|
||||
% patient shows signs of diabetes according to World Health Organization
|
||||
% criteria (i.e., if the 2 hour post-load plasma glucose was at least
|
||||
% 200 mg/dl at any survey examination or if found during routine medical
|
||||
% care). The population lives near Phoenix, Arizona, USA.
|
||||
%
|
||||
% Results: Their ADAP algorithm makes a real-valued prediction between
|
||||
% 0 and 1. This was transformed into a binary decision using a cutoff of
|
||||
% 0.448. Using 576 training instances, the sensitivity and specificity
|
||||
% of their algorithm was 76% on the remaining 192 instances.
|
||||
%
|
||||
% 4. Relevant Information:
|
||||
% Several constraints were placed on the selection of these instances from
|
||||
% a larger database. In particular, all patients here are females at
|
||||
% least 21 years old of Pima Indian heritage. ADAP is an adaptive learning
|
||||
% routine that generates and executes digital analogs of perceptron-like
|
||||
% devices. It is a unique algorithm; see the paper for details.
|
||||
%
|
||||
% 5. Number of Instances: 768
|
||||
%
|
||||
% 6. Number of Attributes: 8 plus class
|
||||
%
|
||||
% 7. For Each Attribute: (all numeric-valued)
|
||||
% 1. Number of times pregnant
|
||||
% 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
|
||||
% 3. Diastolic blood pressure (mm Hg)
|
||||
% 4. Triceps skin fold thickness (mm)
|
||||
% 5. 2-Hour serum insulin (mu U/ml)
|
||||
% 6. Body mass index (weight in kg/(height in m)^2)
|
||||
% 7. Diabetes pedigree function
|
||||
% 8. Age (years)
|
||||
% 9. Class variable (0 or 1)
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% 9. Class Distribution: (class value 1 is interpreted as "tested positive for
|
||||
% diabetes")
|
||||
%
|
||||
% Class Value Number of instances
|
||||
% 0 500
|
||||
% 1 268
|
||||
%
|
||||
% 10. Brief statistical analysis:
|
||||
%
|
||||
% Attribute number: Mean: Standard Deviation:
|
||||
% 1. 3.8 3.4
|
||||
% 2. 120.9 32.0
|
||||
% 3. 69.1 19.4
|
||||
% 4. 20.5 16.0
|
||||
% 5. 79.8 115.2
|
||||
% 6. 32.0 7.9
|
||||
% 7. 0.5 0.3
|
||||
% 8. 33.2 11.8
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
% Relabeled values in attribute 'class'
|
||||
% From: 0 To: tested_negative
|
||||
% From: 1 To: tested_positive
|
||||
%
|
||||
@relation pima_diabetes
|
||||
@attribute 'preg' real
|
||||
@attribute 'plas' real
|
||||
@attribute 'pres' real
|
||||
@attribute 'skin' real
|
||||
@attribute 'insu' real
|
||||
@attribute 'mass' real
|
||||
@attribute 'pedi' real
|
||||
@attribute 'age' real
|
||||
@attribute 'class' { tested_negative, tested_positive}
|
||||
@data
|
||||
6,148,72,35,0,33.6,0.627,50,tested_positive
|
||||
1,85,66,29,0,26.6,0.351,31,tested_negative
|
||||
8,183,64,0,0,23.3,0.672,32,tested_positive
|
||||
1,89,66,23,94,28.1,0.167,21,tested_negative
|
||||
0,137,40,35,168,43.1,2.288,33,tested_positive
|
||||
5,116,74,0,0,25.6,0.201,30,tested_negative
|
||||
3,78,50,32,88,31,0.248,26,tested_positive
|
||||
10,115,0,0,0,35.3,0.134,29,tested_negative
|
||||
2,197,70,45,543,30.5,0.158,53,tested_positive
|
||||
8,125,96,0,0,0,0.232,54,tested_positive
|
||||
4,110,92,0,0,37.6,0.191,30,tested_negative
|
||||
10,168,74,0,0,38,0.537,34,tested_positive
|
||||
10,139,80,0,0,27.1,1.441,57,tested_negative
|
||||
1,189,60,23,846,30.1,0.398,59,tested_positive
|
||||
5,166,72,19,175,25.8,0.587,51,tested_positive
|
||||
7,100,0,0,0,30,0.484,32,tested_positive
|
||||
0,118,84,47,230,45.8,0.551,31,tested_positive
|
||||
7,107,74,0,0,29.6,0.254,31,tested_positive
|
||||
1,103,30,38,83,43.3,0.183,33,tested_negative
|
||||
1,115,70,30,96,34.6,0.529,32,tested_positive
|
||||
3,126,88,41,235,39.3,0.704,27,tested_negative
|
||||
8,99,84,0,0,35.4,0.388,50,tested_negative
|
||||
7,196,90,0,0,39.8,0.451,41,tested_positive
|
||||
9,119,80,35,0,29,0.263,29,tested_positive
|
||||
11,143,94,33,146,36.6,0.254,51,tested_positive
|
||||
10,125,70,26,115,31.1,0.205,41,tested_positive
|
||||
7,147,76,0,0,39.4,0.257,43,tested_positive
|
||||
1,97,66,15,140,23.2,0.487,22,tested_negative
|
||||
13,145,82,19,110,22.2,0.245,57,tested_negative
|
||||
5,117,92,0,0,34.1,0.337,38,tested_negative
|
||||
5,109,75,26,0,36,0.546,60,tested_negative
|
||||
3,158,76,36,245,31.6,0.851,28,tested_positive
|
||||
3,88,58,11,54,24.8,0.267,22,tested_negative
|
||||
6,92,92,0,0,19.9,0.188,28,tested_negative
|
||||
10,122,78,31,0,27.6,0.512,45,tested_negative
|
||||
4,103,60,33,192,24,0.966,33,tested_negative
|
||||
11,138,76,0,0,33.2,0.42,35,tested_negative
|
||||
9,102,76,37,0,32.9,0.665,46,tested_positive
|
||||
2,90,68,42,0,38.2,0.503,27,tested_positive
|
||||
4,111,72,47,207,37.1,1.39,56,tested_positive
|
||||
3,180,64,25,70,34,0.271,26,tested_negative
|
||||
7,133,84,0,0,40.2,0.696,37,tested_negative
|
||||
7,106,92,18,0,22.7,0.235,48,tested_negative
|
||||
9,171,110,24,240,45.4,0.721,54,tested_positive
|
||||
7,159,64,0,0,27.4,0.294,40,tested_negative
|
||||
0,180,66,39,0,42,1.893,25,tested_positive
|
||||
1,146,56,0,0,29.7,0.564,29,tested_negative
|
||||
2,71,70,27,0,28,0.586,22,tested_negative
|
||||
7,103,66,32,0,39.1,0.344,31,tested_positive
|
||||
7,105,0,0,0,0,0.305,24,tested_negative
|
||||
1,103,80,11,82,19.4,0.491,22,tested_negative
|
||||
1,101,50,15,36,24.2,0.526,26,tested_negative
|
||||
5,88,66,21,23,24.4,0.342,30,tested_negative
|
||||
8,176,90,34,300,33.7,0.467,58,tested_positive
|
||||
7,150,66,42,342,34.7,0.718,42,tested_negative
|
||||
1,73,50,10,0,23,0.248,21,tested_negative
|
||||
7,187,68,39,304,37.7,0.254,41,tested_positive
|
||||
0,100,88,60,110,46.8,0.962,31,tested_negative
|
||||
0,146,82,0,0,40.5,1.781,44,tested_negative
|
||||
0,105,64,41,142,41.5,0.173,22,tested_negative
|
||||
2,84,0,0,0,0,0.304,21,tested_negative
|
||||
8,133,72,0,0,32.9,0.27,39,tested_positive
|
||||
5,44,62,0,0,25,0.587,36,tested_negative
|
||||
2,141,58,34,128,25.4,0.699,24,tested_negative
|
||||
7,114,66,0,0,32.8,0.258,42,tested_positive
|
||||
5,99,74,27,0,29,0.203,32,tested_negative
|
||||
0,109,88,30,0,32.5,0.855,38,tested_positive
|
||||
2,109,92,0,0,42.7,0.845,54,tested_negative
|
||||
1,95,66,13,38,19.6,0.334,25,tested_negative
|
||||
4,146,85,27,100,28.9,0.189,27,tested_negative
|
||||
2,100,66,20,90,32.9,0.867,28,tested_positive
|
||||
5,139,64,35,140,28.6,0.411,26,tested_negative
|
||||
13,126,90,0,0,43.4,0.583,42,tested_positive
|
||||
4,129,86,20,270,35.1,0.231,23,tested_negative
|
||||
1,79,75,30,0,32,0.396,22,tested_negative
|
||||
1,0,48,20,0,24.7,0.14,22,tested_negative
|
||||
7,62,78,0,0,32.6,0.391,41,tested_negative
|
||||
5,95,72,33,0,37.7,0.37,27,tested_negative
|
||||
0,131,0,0,0,43.2,0.27,26,tested_positive
|
||||
2,112,66,22,0,25,0.307,24,tested_negative
|
||||
3,113,44,13,0,22.4,0.14,22,tested_negative
|
||||
2,74,0,0,0,0,0.102,22,tested_negative
|
||||
7,83,78,26,71,29.3,0.767,36,tested_negative
|
||||
0,101,65,28,0,24.6,0.237,22,tested_negative
|
||||
5,137,108,0,0,48.8,0.227,37,tested_positive
|
||||
2,110,74,29,125,32.4,0.698,27,tested_negative
|
||||
13,106,72,54,0,36.6,0.178,45,tested_negative
|
||||
2,100,68,25,71,38.5,0.324,26,tested_negative
|
||||
15,136,70,32,110,37.1,0.153,43,tested_positive
|
||||
1,107,68,19,0,26.5,0.165,24,tested_negative
|
||||
1,80,55,0,0,19.1,0.258,21,tested_negative
|
||||
4,123,80,15,176,32,0.443,34,tested_negative
|
||||
7,81,78,40,48,46.7,0.261,42,tested_negative
|
||||
4,134,72,0,0,23.8,0.277,60,tested_positive
|
||||
2,142,82,18,64,24.7,0.761,21,tested_negative
|
||||
6,144,72,27,228,33.9,0.255,40,tested_negative
|
||||
2,92,62,28,0,31.6,0.13,24,tested_negative
|
||||
1,71,48,18,76,20.4,0.323,22,tested_negative
|
||||
6,93,50,30,64,28.7,0.356,23,tested_negative
|
||||
1,122,90,51,220,49.7,0.325,31,tested_positive
|
||||
1,163,72,0,0,39,1.222,33,tested_positive
|
||||
1,151,60,0,0,26.1,0.179,22,tested_negative
|
||||
0,125,96,0,0,22.5,0.262,21,tested_negative
|
||||
1,81,72,18,40,26.6,0.283,24,tested_negative
|
||||
2,85,65,0,0,39.6,0.93,27,tested_negative
|
||||
1,126,56,29,152,28.7,0.801,21,tested_negative
|
||||
1,96,122,0,0,22.4,0.207,27,tested_negative
|
||||
4,144,58,28,140,29.5,0.287,37,tested_negative
|
||||
3,83,58,31,18,34.3,0.336,25,tested_negative
|
||||
0,95,85,25,36,37.4,0.247,24,tested_positive
|
||||
3,171,72,33,135,33.3,0.199,24,tested_positive
|
||||
8,155,62,26,495,34,0.543,46,tested_positive
|
||||
1,89,76,34,37,31.2,0.192,23,tested_negative
|
||||
4,76,62,0,0,34,0.391,25,tested_negative
|
||||
7,160,54,32,175,30.5,0.588,39,tested_positive
|
||||
4,146,92,0,0,31.2,0.539,61,tested_positive
|
||||
5,124,74,0,0,34,0.22,38,tested_positive
|
||||
5,78,48,0,0,33.7,0.654,25,tested_negative
|
||||
4,97,60,23,0,28.2,0.443,22,tested_negative
|
||||
4,99,76,15,51,23.2,0.223,21,tested_negative
|
||||
0,162,76,56,100,53.2,0.759,25,tested_positive
|
||||
6,111,64,39,0,34.2,0.26,24,tested_negative
|
||||
2,107,74,30,100,33.6,0.404,23,tested_negative
|
||||
5,132,80,0,0,26.8,0.186,69,tested_negative
|
||||
0,113,76,0,0,33.3,0.278,23,tested_positive
|
||||
1,88,30,42,99,55,0.496,26,tested_positive
|
||||
3,120,70,30,135,42.9,0.452,30,tested_negative
|
||||
1,118,58,36,94,33.3,0.261,23,tested_negative
|
||||
1,117,88,24,145,34.5,0.403,40,tested_positive
|
||||
0,105,84,0,0,27.9,0.741,62,tested_positive
|
||||
4,173,70,14,168,29.7,0.361,33,tested_positive
|
||||
9,122,56,0,0,33.3,1.114,33,tested_positive
|
||||
3,170,64,37,225,34.5,0.356,30,tested_positive
|
||||
8,84,74,31,0,38.3,0.457,39,tested_negative
|
||||
2,96,68,13,49,21.1,0.647,26,tested_negative
|
||||
2,125,60,20,140,33.8,0.088,31,tested_negative
|
||||
0,100,70,26,50,30.8,0.597,21,tested_negative
|
||||
0,93,60,25,92,28.7,0.532,22,tested_negative
|
||||
0,129,80,0,0,31.2,0.703,29,tested_negative
|
||||
5,105,72,29,325,36.9,0.159,28,tested_negative
|
||||
3,128,78,0,0,21.1,0.268,55,tested_negative
|
||||
5,106,82,30,0,39.5,0.286,38,tested_negative
|
||||
2,108,52,26,63,32.5,0.318,22,tested_negative
|
||||
10,108,66,0,0,32.4,0.272,42,tested_positive
|
||||
4,154,62,31,284,32.8,0.237,23,tested_negative
|
||||
0,102,75,23,0,0,0.572,21,tested_negative
|
||||
9,57,80,37,0,32.8,0.096,41,tested_negative
|
||||
2,106,64,35,119,30.5,1.4,34,tested_negative
|
||||
5,147,78,0,0,33.7,0.218,65,tested_negative
|
||||
2,90,70,17,0,27.3,0.085,22,tested_negative
|
||||
1,136,74,50,204,37.4,0.399,24,tested_negative
|
||||
4,114,65,0,0,21.9,0.432,37,tested_negative
|
||||
9,156,86,28,155,34.3,1.189,42,tested_positive
|
||||
1,153,82,42,485,40.6,0.687,23,tested_negative
|
||||
8,188,78,0,0,47.9,0.137,43,tested_positive
|
||||
7,152,88,44,0,50,0.337,36,tested_positive
|
||||
2,99,52,15,94,24.6,0.637,21,tested_negative
|
||||
1,109,56,21,135,25.2,0.833,23,tested_negative
|
||||
2,88,74,19,53,29,0.229,22,tested_negative
|
||||
17,163,72,41,114,40.9,0.817,47,tested_positive
|
||||
4,151,90,38,0,29.7,0.294,36,tested_negative
|
||||
7,102,74,40,105,37.2,0.204,45,tested_negative
|
||||
0,114,80,34,285,44.2,0.167,27,tested_negative
|
||||
2,100,64,23,0,29.7,0.368,21,tested_negative
|
||||
0,131,88,0,0,31.6,0.743,32,tested_positive
|
||||
6,104,74,18,156,29.9,0.722,41,tested_positive
|
||||
3,148,66,25,0,32.5,0.256,22,tested_negative
|
||||
4,120,68,0,0,29.6,0.709,34,tested_negative
|
||||
4,110,66,0,0,31.9,0.471,29,tested_negative
|
||||
3,111,90,12,78,28.4,0.495,29,tested_negative
|
||||
6,102,82,0,0,30.8,0.18,36,tested_positive
|
||||
6,134,70,23,130,35.4,0.542,29,tested_positive
|
||||
2,87,0,23,0,28.9,0.773,25,tested_negative
|
||||
1,79,60,42,48,43.5,0.678,23,tested_negative
|
||||
2,75,64,24,55,29.7,0.37,33,tested_negative
|
||||
8,179,72,42,130,32.7,0.719,36,tested_positive
|
||||
6,85,78,0,0,31.2,0.382,42,tested_negative
|
||||
0,129,110,46,130,67.1,0.319,26,tested_positive
|
||||
5,143,78,0,0,45,0.19,47,tested_negative
|
||||
5,130,82,0,0,39.1,0.956,37,tested_positive
|
||||
6,87,80,0,0,23.2,0.084,32,tested_negative
|
||||
0,119,64,18,92,34.9,0.725,23,tested_negative
|
||||
1,0,74,20,23,27.7,0.299,21,tested_negative
|
||||
5,73,60,0,0,26.8,0.268,27,tested_negative
|
||||
4,141,74,0,0,27.6,0.244,40,tested_negative
|
||||
7,194,68,28,0,35.9,0.745,41,tested_positive
|
||||
8,181,68,36,495,30.1,0.615,60,tested_positive
|
||||
1,128,98,41,58,32,1.321,33,tested_positive
|
||||
8,109,76,39,114,27.9,0.64,31,tested_positive
|
||||
5,139,80,35,160,31.6,0.361,25,tested_positive
|
||||
3,111,62,0,0,22.6,0.142,21,tested_negative
|
||||
9,123,70,44,94,33.1,0.374,40,tested_negative
|
||||
7,159,66,0,0,30.4,0.383,36,tested_positive
|
||||
11,135,0,0,0,52.3,0.578,40,tested_positive
|
||||
8,85,55,20,0,24.4,0.136,42,tested_negative
|
||||
5,158,84,41,210,39.4,0.395,29,tested_positive
|
||||
1,105,58,0,0,24.3,0.187,21,tested_negative
|
||||
3,107,62,13,48,22.9,0.678,23,tested_positive
|
||||
4,109,64,44,99,34.8,0.905,26,tested_positive
|
||||
4,148,60,27,318,30.9,0.15,29,tested_positive
|
||||
0,113,80,16,0,31,0.874,21,tested_negative
|
||||
1,138,82,0,0,40.1,0.236,28,tested_negative
|
||||
0,108,68,20,0,27.3,0.787,32,tested_negative
|
||||
2,99,70,16,44,20.4,0.235,27,tested_negative
|
||||
6,103,72,32,190,37.7,0.324,55,tested_negative
|
||||
5,111,72,28,0,23.9,0.407,27,tested_negative
|
||||
8,196,76,29,280,37.5,0.605,57,tested_positive
|
||||
5,162,104,0,0,37.7,0.151,52,tested_positive
|
||||
1,96,64,27,87,33.2,0.289,21,tested_negative
|
||||
7,184,84,33,0,35.5,0.355,41,tested_positive
|
||||
2,81,60,22,0,27.7,0.29,25,tested_negative
|
||||
0,147,85,54,0,42.8,0.375,24,tested_negative
|
||||
7,179,95,31,0,34.2,0.164,60,tested_negative
|
||||
0,140,65,26,130,42.6,0.431,24,tested_positive
|
||||
9,112,82,32,175,34.2,0.26,36,tested_positive
|
||||
12,151,70,40,271,41.8,0.742,38,tested_positive
|
||||
5,109,62,41,129,35.8,0.514,25,tested_positive
|
||||
6,125,68,30,120,30,0.464,32,tested_negative
|
||||
5,85,74,22,0,29,1.224,32,tested_positive
|
||||
5,112,66,0,0,37.8,0.261,41,tested_positive
|
||||
0,177,60,29,478,34.6,1.072,21,tested_positive
|
||||
2,158,90,0,0,31.6,0.805,66,tested_positive
|
||||
7,119,0,0,0,25.2,0.209,37,tested_negative
|
||||
7,142,60,33,190,28.8,0.687,61,tested_negative
|
||||
1,100,66,15,56,23.6,0.666,26,tested_negative
|
||||
1,87,78,27,32,34.6,0.101,22,tested_negative
|
||||
0,101,76,0,0,35.7,0.198,26,tested_negative
|
||||
3,162,52,38,0,37.2,0.652,24,tested_positive
|
||||
4,197,70,39,744,36.7,2.329,31,tested_negative
|
||||
0,117,80,31,53,45.2,0.089,24,tested_negative
|
||||
4,142,86,0,0,44,0.645,22,tested_positive
|
||||
6,134,80,37,370,46.2,0.238,46,tested_positive
|
||||
1,79,80,25,37,25.4,0.583,22,tested_negative
|
||||
4,122,68,0,0,35,0.394,29,tested_negative
|
||||
3,74,68,28,45,29.7,0.293,23,tested_negative
|
||||
4,171,72,0,0,43.6,0.479,26,tested_positive
|
||||
7,181,84,21,192,35.9,0.586,51,tested_positive
|
||||
0,179,90,27,0,44.1,0.686,23,tested_positive
|
||||
9,164,84,21,0,30.8,0.831,32,tested_positive
|
||||
0,104,76,0,0,18.4,0.582,27,tested_negative
|
||||
1,91,64,24,0,29.2,0.192,21,tested_negative
|
||||
4,91,70,32,88,33.1,0.446,22,tested_negative
|
||||
3,139,54,0,0,25.6,0.402,22,tested_positive
|
||||
6,119,50,22,176,27.1,1.318,33,tested_positive
|
||||
2,146,76,35,194,38.2,0.329,29,tested_negative
|
||||
9,184,85,15,0,30,1.213,49,tested_positive
|
||||
10,122,68,0,0,31.2,0.258,41,tested_negative
|
||||
0,165,90,33,680,52.3,0.427,23,tested_negative
|
||||
9,124,70,33,402,35.4,0.282,34,tested_negative
|
||||
1,111,86,19,0,30.1,0.143,23,tested_negative
|
||||
9,106,52,0,0,31.2,0.38,42,tested_negative
|
||||
2,129,84,0,0,28,0.284,27,tested_negative
|
||||
2,90,80,14,55,24.4,0.249,24,tested_negative
|
||||
0,86,68,32,0,35.8,0.238,25,tested_negative
|
||||
12,92,62,7,258,27.6,0.926,44,tested_positive
|
||||
1,113,64,35,0,33.6,0.543,21,tested_positive
|
||||
3,111,56,39,0,30.1,0.557,30,tested_negative
|
||||
2,114,68,22,0,28.7,0.092,25,tested_negative
|
||||
1,193,50,16,375,25.9,0.655,24,tested_negative
|
||||
11,155,76,28,150,33.3,1.353,51,tested_positive
|
||||
3,191,68,15,130,30.9,0.299,34,tested_negative
|
||||
3,141,0,0,0,30,0.761,27,tested_positive
|
||||
4,95,70,32,0,32.1,0.612,24,tested_negative
|
||||
3,142,80,15,0,32.4,0.2,63,tested_negative
|
||||
4,123,62,0,0,32,0.226,35,tested_positive
|
||||
5,96,74,18,67,33.6,0.997,43,tested_negative
|
||||
0,138,0,0,0,36.3,0.933,25,tested_positive
|
||||
2,128,64,42,0,40,1.101,24,tested_negative
|
||||
0,102,52,0,0,25.1,0.078,21,tested_negative
|
||||
2,146,0,0,0,27.5,0.24,28,tested_positive
|
||||
10,101,86,37,0,45.6,1.136,38,tested_positive
|
||||
2,108,62,32,56,25.2,0.128,21,tested_negative
|
||||
3,122,78,0,0,23,0.254,40,tested_negative
|
||||
1,71,78,50,45,33.2,0.422,21,tested_negative
|
||||
13,106,70,0,0,34.2,0.251,52,tested_negative
|
||||
2,100,70,52,57,40.5,0.677,25,tested_negative
|
||||
7,106,60,24,0,26.5,0.296,29,tested_positive
|
||||
0,104,64,23,116,27.8,0.454,23,tested_negative
|
||||
5,114,74,0,0,24.9,0.744,57,tested_negative
|
||||
2,108,62,10,278,25.3,0.881,22,tested_negative
|
||||
0,146,70,0,0,37.9,0.334,28,tested_positive
|
||||
10,129,76,28,122,35.9,0.28,39,tested_negative
|
||||
7,133,88,15,155,32.4,0.262,37,tested_negative
|
||||
7,161,86,0,0,30.4,0.165,47,tested_positive
|
||||
2,108,80,0,0,27,0.259,52,tested_positive
|
||||
7,136,74,26,135,26,0.647,51,tested_negative
|
||||
5,155,84,44,545,38.7,0.619,34,tested_negative
|
||||
1,119,86,39,220,45.6,0.808,29,tested_positive
|
||||
4,96,56,17,49,20.8,0.34,26,tested_negative
|
||||
5,108,72,43,75,36.1,0.263,33,tested_negative
|
||||
0,78,88,29,40,36.9,0.434,21,tested_negative
|
||||
0,107,62,30,74,36.6,0.757,25,tested_positive
|
||||
2,128,78,37,182,43.3,1.224,31,tested_positive
|
||||
1,128,48,45,194,40.5,0.613,24,tested_positive
|
||||
0,161,50,0,0,21.9,0.254,65,tested_negative
|
||||
6,151,62,31,120,35.5,0.692,28,tested_negative
|
||||
2,146,70,38,360,28,0.337,29,tested_positive
|
||||
0,126,84,29,215,30.7,0.52,24,tested_negative
|
||||
14,100,78,25,184,36.6,0.412,46,tested_positive
|
||||
8,112,72,0,0,23.6,0.84,58,tested_negative
|
||||
0,167,0,0,0,32.3,0.839,30,tested_positive
|
||||
2,144,58,33,135,31.6,0.422,25,tested_positive
|
||||
5,77,82,41,42,35.8,0.156,35,tested_negative
|
||||
5,115,98,0,0,52.9,0.209,28,tested_positive
|
||||
3,150,76,0,0,21,0.207,37,tested_negative
|
||||
2,120,76,37,105,39.7,0.215,29,tested_negative
|
||||
10,161,68,23,132,25.5,0.326,47,tested_positive
|
||||
0,137,68,14,148,24.8,0.143,21,tested_negative
|
||||
0,128,68,19,180,30.5,1.391,25,tested_positive
|
||||
2,124,68,28,205,32.9,0.875,30,tested_positive
|
||||
6,80,66,30,0,26.2,0.313,41,tested_negative
|
||||
0,106,70,37,148,39.4,0.605,22,tested_negative
|
||||
2,155,74,17,96,26.6,0.433,27,tested_positive
|
||||
3,113,50,10,85,29.5,0.626,25,tested_negative
|
||||
7,109,80,31,0,35.9,1.127,43,tested_positive
|
||||
2,112,68,22,94,34.1,0.315,26,tested_negative
|
||||
3,99,80,11,64,19.3,0.284,30,tested_negative
|
||||
3,182,74,0,0,30.5,0.345,29,tested_positive
|
||||
3,115,66,39,140,38.1,0.15,28,tested_negative
|
||||
6,194,78,0,0,23.5,0.129,59,tested_positive
|
||||
4,129,60,12,231,27.5,0.527,31,tested_negative
|
||||
3,112,74,30,0,31.6,0.197,25,tested_positive
|
||||
0,124,70,20,0,27.4,0.254,36,tested_positive
|
||||
13,152,90,33,29,26.8,0.731,43,tested_positive
|
||||
2,112,75,32,0,35.7,0.148,21,tested_negative
|
||||
1,157,72,21,168,25.6,0.123,24,tested_negative
|
||||
1,122,64,32,156,35.1,0.692,30,tested_positive
|
||||
10,179,70,0,0,35.1,0.2,37,tested_negative
|
||||
2,102,86,36,120,45.5,0.127,23,tested_positive
|
||||
6,105,70,32,68,30.8,0.122,37,tested_negative
|
||||
8,118,72,19,0,23.1,1.476,46,tested_negative
|
||||
2,87,58,16,52,32.7,0.166,25,tested_negative
|
||||
1,180,0,0,0,43.3,0.282,41,tested_positive
|
||||
12,106,80,0,0,23.6,0.137,44,tested_negative
|
||||
1,95,60,18,58,23.9,0.26,22,tested_negative
|
||||
0,165,76,43,255,47.9,0.259,26,tested_negative
|
||||
0,117,0,0,0,33.8,0.932,44,tested_negative
|
||||
5,115,76,0,0,31.2,0.343,44,tested_positive
|
||||
9,152,78,34,171,34.2,0.893,33,tested_positive
|
||||
7,178,84,0,0,39.9,0.331,41,tested_positive
|
||||
1,130,70,13,105,25.9,0.472,22,tested_negative
|
||||
1,95,74,21,73,25.9,0.673,36,tested_negative
|
||||
1,0,68,35,0,32,0.389,22,tested_negative
|
||||
5,122,86,0,0,34.7,0.29,33,tested_negative
|
||||
8,95,72,0,0,36.8,0.485,57,tested_negative
|
||||
8,126,88,36,108,38.5,0.349,49,tested_negative
|
||||
1,139,46,19,83,28.7,0.654,22,tested_negative
|
||||
3,116,0,0,0,23.5,0.187,23,tested_negative
|
||||
3,99,62,19,74,21.8,0.279,26,tested_negative
|
||||
5,0,80,32,0,41,0.346,37,tested_positive
|
||||
4,92,80,0,0,42.2,0.237,29,tested_negative
|
||||
4,137,84,0,0,31.2,0.252,30,tested_negative
|
||||
3,61,82,28,0,34.4,0.243,46,tested_negative
|
||||
1,90,62,12,43,27.2,0.58,24,tested_negative
|
||||
3,90,78,0,0,42.7,0.559,21,tested_negative
|
||||
9,165,88,0,0,30.4,0.302,49,tested_positive
|
||||
1,125,50,40,167,33.3,0.962,28,tested_positive
|
||||
13,129,0,30,0,39.9,0.569,44,tested_positive
|
||||
12,88,74,40,54,35.3,0.378,48,tested_negative
|
||||
1,196,76,36,249,36.5,0.875,29,tested_positive
|
||||
5,189,64,33,325,31.2,0.583,29,tested_positive
|
||||
5,158,70,0,0,29.8,0.207,63,tested_negative
|
||||
5,103,108,37,0,39.2,0.305,65,tested_negative
|
||||
4,146,78,0,0,38.5,0.52,67,tested_positive
|
||||
4,147,74,25,293,34.9,0.385,30,tested_negative
|
||||
5,99,54,28,83,34,0.499,30,tested_negative
|
||||
6,124,72,0,0,27.6,0.368,29,tested_positive
|
||||
0,101,64,17,0,21,0.252,21,tested_negative
|
||||
3,81,86,16,66,27.5,0.306,22,tested_negative
|
||||
1,133,102,28,140,32.8,0.234,45,tested_positive
|
||||
3,173,82,48,465,38.4,2.137,25,tested_positive
|
||||
0,118,64,23,89,0,1.731,21,tested_negative
|
||||
0,84,64,22,66,35.8,0.545,21,tested_negative
|
||||
2,105,58,40,94,34.9,0.225,25,tested_negative
|
||||
2,122,52,43,158,36.2,0.816,28,tested_negative
|
||||
12,140,82,43,325,39.2,0.528,58,tested_positive
|
||||
0,98,82,15,84,25.2,0.299,22,tested_negative
|
||||
1,87,60,37,75,37.2,0.509,22,tested_negative
|
||||
4,156,75,0,0,48.3,0.238,32,tested_positive
|
||||
0,93,100,39,72,43.4,1.021,35,tested_negative
|
||||
1,107,72,30,82,30.8,0.821,24,tested_negative
|
||||
0,105,68,22,0,20,0.236,22,tested_negative
|
||||
1,109,60,8,182,25.4,0.947,21,tested_negative
|
||||
1,90,62,18,59,25.1,1.268,25,tested_negative
|
||||
1,125,70,24,110,24.3,0.221,25,tested_negative
|
||||
1,119,54,13,50,22.3,0.205,24,tested_negative
|
||||
5,116,74,29,0,32.3,0.66,35,tested_positive
|
||||
8,105,100,36,0,43.3,0.239,45,tested_positive
|
||||
5,144,82,26,285,32,0.452,58,tested_positive
|
||||
3,100,68,23,81,31.6,0.949,28,tested_negative
|
||||
1,100,66,29,196,32,0.444,42,tested_negative
|
||||
5,166,76,0,0,45.7,0.34,27,tested_positive
|
||||
1,131,64,14,415,23.7,0.389,21,tested_negative
|
||||
4,116,72,12,87,22.1,0.463,37,tested_negative
|
||||
4,158,78,0,0,32.9,0.803,31,tested_positive
|
||||
2,127,58,24,275,27.7,1.6,25,tested_negative
|
||||
3,96,56,34,115,24.7,0.944,39,tested_negative
|
||||
0,131,66,40,0,34.3,0.196,22,tested_positive
|
||||
3,82,70,0,0,21.1,0.389,25,tested_negative
|
||||
3,193,70,31,0,34.9,0.241,25,tested_positive
|
||||
4,95,64,0,0,32,0.161,31,tested_positive
|
||||
6,137,61,0,0,24.2,0.151,55,tested_negative
|
||||
5,136,84,41,88,35,0.286,35,tested_positive
|
||||
9,72,78,25,0,31.6,0.28,38,tested_negative
|
||||
5,168,64,0,0,32.9,0.135,41,tested_positive
|
||||
2,123,48,32,165,42.1,0.52,26,tested_negative
|
||||
4,115,72,0,0,28.9,0.376,46,tested_positive
|
||||
0,101,62,0,0,21.9,0.336,25,tested_negative
|
||||
8,197,74,0,0,25.9,1.191,39,tested_positive
|
||||
1,172,68,49,579,42.4,0.702,28,tested_positive
|
||||
6,102,90,39,0,35.7,0.674,28,tested_negative
|
||||
1,112,72,30,176,34.4,0.528,25,tested_negative
|
||||
1,143,84,23,310,42.4,1.076,22,tested_negative
|
||||
1,143,74,22,61,26.2,0.256,21,tested_negative
|
||||
0,138,60,35,167,34.6,0.534,21,tested_positive
|
||||
3,173,84,33,474,35.7,0.258,22,tested_positive
|
||||
1,97,68,21,0,27.2,1.095,22,tested_negative
|
||||
4,144,82,32,0,38.5,0.554,37,tested_positive
|
||||
1,83,68,0,0,18.2,0.624,27,tested_negative
|
||||
3,129,64,29,115,26.4,0.219,28,tested_positive
|
||||
1,119,88,41,170,45.3,0.507,26,tested_negative
|
||||
2,94,68,18,76,26,0.561,21,tested_negative
|
||||
0,102,64,46,78,40.6,0.496,21,tested_negative
|
||||
2,115,64,22,0,30.8,0.421,21,tested_negative
|
||||
8,151,78,32,210,42.9,0.516,36,tested_positive
|
||||
4,184,78,39,277,37,0.264,31,tested_positive
|
||||
0,94,0,0,0,0,0.256,25,tested_negative
|
||||
1,181,64,30,180,34.1,0.328,38,tested_positive
|
||||
0,135,94,46,145,40.6,0.284,26,tested_negative
|
||||
1,95,82,25,180,35,0.233,43,tested_positive
|
||||
2,99,0,0,0,22.2,0.108,23,tested_negative
|
||||
3,89,74,16,85,30.4,0.551,38,tested_negative
|
||||
1,80,74,11,60,30,0.527,22,tested_negative
|
||||
2,139,75,0,0,25.6,0.167,29,tested_negative
|
||||
1,90,68,8,0,24.5,1.138,36,tested_negative
|
||||
0,141,0,0,0,42.4,0.205,29,tested_positive
|
||||
12,140,85,33,0,37.4,0.244,41,tested_negative
|
||||
5,147,75,0,0,29.9,0.434,28,tested_negative
|
||||
1,97,70,15,0,18.2,0.147,21,tested_negative
|
||||
6,107,88,0,0,36.8,0.727,31,tested_negative
|
||||
0,189,104,25,0,34.3,0.435,41,tested_positive
|
||||
2,83,66,23,50,32.2,0.497,22,tested_negative
|
||||
4,117,64,27,120,33.2,0.23,24,tested_negative
|
||||
8,108,70,0,0,30.5,0.955,33,tested_positive
|
||||
4,117,62,12,0,29.7,0.38,30,tested_positive
|
||||
0,180,78,63,14,59.4,2.42,25,tested_positive
|
||||
1,100,72,12,70,25.3,0.658,28,tested_negative
|
||||
0,95,80,45,92,36.5,0.33,26,tested_negative
|
||||
0,104,64,37,64,33.6,0.51,22,tested_positive
|
||||
0,120,74,18,63,30.5,0.285,26,tested_negative
|
||||
1,82,64,13,95,21.2,0.415,23,tested_negative
|
||||
2,134,70,0,0,28.9,0.542,23,tested_positive
|
||||
0,91,68,32,210,39.9,0.381,25,tested_negative
|
||||
2,119,0,0,0,19.6,0.832,72,tested_negative
|
||||
2,100,54,28,105,37.8,0.498,24,tested_negative
|
||||
14,175,62,30,0,33.6,0.212,38,tested_positive
|
||||
1,135,54,0,0,26.7,0.687,62,tested_negative
|
||||
5,86,68,28,71,30.2,0.364,24,tested_negative
|
||||
10,148,84,48,237,37.6,1.001,51,tested_positive
|
||||
9,134,74,33,60,25.9,0.46,81,tested_negative
|
||||
9,120,72,22,56,20.8,0.733,48,tested_negative
|
||||
1,71,62,0,0,21.8,0.416,26,tested_negative
|
||||
8,74,70,40,49,35.3,0.705,39,tested_negative
|
||||
5,88,78,30,0,27.6,0.258,37,tested_negative
|
||||
10,115,98,0,0,24,1.022,34,tested_negative
|
||||
0,124,56,13,105,21.8,0.452,21,tested_negative
|
||||
0,74,52,10,36,27.8,0.269,22,tested_negative
|
||||
0,97,64,36,100,36.8,0.6,25,tested_negative
|
||||
8,120,0,0,0,30,0.183,38,tested_positive
|
||||
6,154,78,41,140,46.1,0.571,27,tested_negative
|
||||
1,144,82,40,0,41.3,0.607,28,tested_negative
|
||||
0,137,70,38,0,33.2,0.17,22,tested_negative
|
||||
0,119,66,27,0,38.8,0.259,22,tested_negative
|
||||
7,136,90,0,0,29.9,0.21,50,tested_negative
|
||||
4,114,64,0,0,28.9,0.126,24,tested_negative
|
||||
0,137,84,27,0,27.3,0.231,59,tested_negative
|
||||
2,105,80,45,191,33.7,0.711,29,tested_positive
|
||||
7,114,76,17,110,23.8,0.466,31,tested_negative
|
||||
8,126,74,38,75,25.9,0.162,39,tested_negative
|
||||
4,132,86,31,0,28,0.419,63,tested_negative
|
||||
3,158,70,30,328,35.5,0.344,35,tested_positive
|
||||
0,123,88,37,0,35.2,0.197,29,tested_negative
|
||||
4,85,58,22,49,27.8,0.306,28,tested_negative
|
||||
0,84,82,31,125,38.2,0.233,23,tested_negative
|
||||
0,145,0,0,0,44.2,0.63,31,tested_positive
|
||||
0,135,68,42,250,42.3,0.365,24,tested_positive
|
||||
1,139,62,41,480,40.7,0.536,21,tested_negative
|
||||
0,173,78,32,265,46.5,1.159,58,tested_negative
|
||||
4,99,72,17,0,25.6,0.294,28,tested_negative
|
||||
8,194,80,0,0,26.1,0.551,67,tested_negative
|
||||
2,83,65,28,66,36.8,0.629,24,tested_negative
|
||||
2,89,90,30,0,33.5,0.292,42,tested_negative
|
||||
4,99,68,38,0,32.8,0.145,33,tested_negative
|
||||
4,125,70,18,122,28.9,1.144,45,tested_positive
|
||||
3,80,0,0,0,0,0.174,22,tested_negative
|
||||
6,166,74,0,0,26.6,0.304,66,tested_negative
|
||||
5,110,68,0,0,26,0.292,30,tested_negative
|
||||
2,81,72,15,76,30.1,0.547,25,tested_negative
|
||||
7,195,70,33,145,25.1,0.163,55,tested_positive
|
||||
6,154,74,32,193,29.3,0.839,39,tested_negative
|
||||
2,117,90,19,71,25.2,0.313,21,tested_negative
|
||||
3,84,72,32,0,37.2,0.267,28,tested_negative
|
||||
6,0,68,41,0,39,0.727,41,tested_positive
|
||||
7,94,64,25,79,33.3,0.738,41,tested_negative
|
||||
3,96,78,39,0,37.3,0.238,40,tested_negative
|
||||
10,75,82,0,0,33.3,0.263,38,tested_negative
|
||||
0,180,90,26,90,36.5,0.314,35,tested_positive
|
||||
1,130,60,23,170,28.6,0.692,21,tested_negative
|
||||
2,84,50,23,76,30.4,0.968,21,tested_negative
|
||||
8,120,78,0,0,25,0.409,64,tested_negative
|
||||
12,84,72,31,0,29.7,0.297,46,tested_positive
|
||||
0,139,62,17,210,22.1,0.207,21,tested_negative
|
||||
9,91,68,0,0,24.2,0.2,58,tested_negative
|
||||
2,91,62,0,0,27.3,0.525,22,tested_negative
|
||||
3,99,54,19,86,25.6,0.154,24,tested_negative
|
||||
3,163,70,18,105,31.6,0.268,28,tested_positive
|
||||
9,145,88,34,165,30.3,0.771,53,tested_positive
|
||||
7,125,86,0,0,37.6,0.304,51,tested_negative
|
||||
13,76,60,0,0,32.8,0.18,41,tested_negative
|
||||
6,129,90,7,326,19.6,0.582,60,tested_negative
|
||||
2,68,70,32,66,25,0.187,25,tested_negative
|
||||
3,124,80,33,130,33.2,0.305,26,tested_negative
|
||||
6,114,0,0,0,0,0.189,26,tested_negative
|
||||
9,130,70,0,0,34.2,0.652,45,tested_positive
|
||||
3,125,58,0,0,31.6,0.151,24,tested_negative
|
||||
3,87,60,18,0,21.8,0.444,21,tested_negative
|
||||
1,97,64,19,82,18.2,0.299,21,tested_negative
|
||||
3,116,74,15,105,26.3,0.107,24,tested_negative
|
||||
0,117,66,31,188,30.8,0.493,22,tested_negative
|
||||
0,111,65,0,0,24.6,0.66,31,tested_negative
|
||||
2,122,60,18,106,29.8,0.717,22,tested_negative
|
||||
0,107,76,0,0,45.3,0.686,24,tested_negative
|
||||
1,86,66,52,65,41.3,0.917,29,tested_negative
|
||||
6,91,0,0,0,29.8,0.501,31,tested_negative
|
||||
1,77,56,30,56,33.3,1.251,24,tested_negative
|
||||
4,132,0,0,0,32.9,0.302,23,tested_positive
|
||||
0,105,90,0,0,29.6,0.197,46,tested_negative
|
||||
0,57,60,0,0,21.7,0.735,67,tested_negative
|
||||
0,127,80,37,210,36.3,0.804,23,tested_negative
|
||||
3,129,92,49,155,36.4,0.968,32,tested_positive
|
||||
8,100,74,40,215,39.4,0.661,43,tested_positive
|
||||
3,128,72,25,190,32.4,0.549,27,tested_positive
|
||||
10,90,85,32,0,34.9,0.825,56,tested_positive
|
||||
4,84,90,23,56,39.5,0.159,25,tested_negative
|
||||
1,88,78,29,76,32,0.365,29,tested_negative
|
||||
8,186,90,35,225,34.5,0.423,37,tested_positive
|
||||
5,187,76,27,207,43.6,1.034,53,tested_positive
|
||||
4,131,68,21,166,33.1,0.16,28,tested_negative
|
||||
1,164,82,43,67,32.8,0.341,50,tested_negative
|
||||
4,189,110,31,0,28.5,0.68,37,tested_negative
|
||||
1,116,70,28,0,27.4,0.204,21,tested_negative
|
||||
3,84,68,30,106,31.9,0.591,25,tested_negative
|
||||
6,114,88,0,0,27.8,0.247,66,tested_negative
|
||||
1,88,62,24,44,29.9,0.422,23,tested_negative
|
||||
1,84,64,23,115,36.9,0.471,28,tested_negative
|
||||
7,124,70,33,215,25.5,0.161,37,tested_negative
|
||||
1,97,70,40,0,38.1,0.218,30,tested_negative
|
||||
8,110,76,0,0,27.8,0.237,58,tested_negative
|
||||
11,103,68,40,0,46.2,0.126,42,tested_negative
|
||||
11,85,74,0,0,30.1,0.3,35,tested_negative
|
||||
6,125,76,0,0,33.8,0.121,54,tested_positive
|
||||
0,198,66,32,274,41.3,0.502,28,tested_positive
|
||||
1,87,68,34,77,37.6,0.401,24,tested_negative
|
||||
6,99,60,19,54,26.9,0.497,32,tested_negative
|
||||
0,91,80,0,0,32.4,0.601,27,tested_negative
|
||||
2,95,54,14,88,26.1,0.748,22,tested_negative
|
||||
1,99,72,30,18,38.6,0.412,21,tested_negative
|
||||
6,92,62,32,126,32,0.085,46,tested_negative
|
||||
4,154,72,29,126,31.3,0.338,37,tested_negative
|
||||
0,121,66,30,165,34.3,0.203,33,tested_positive
|
||||
3,78,70,0,0,32.5,0.27,39,tested_negative
|
||||
2,130,96,0,0,22.6,0.268,21,tested_negative
|
||||
3,111,58,31,44,29.5,0.43,22,tested_negative
|
||||
2,98,60,17,120,34.7,0.198,22,tested_negative
|
||||
1,143,86,30,330,30.1,0.892,23,tested_negative
|
||||
1,119,44,47,63,35.5,0.28,25,tested_negative
|
||||
6,108,44,20,130,24,0.813,35,tested_negative
|
||||
2,118,80,0,0,42.9,0.693,21,tested_positive
|
||||
10,133,68,0,0,27,0.245,36,tested_negative
|
||||
2,197,70,99,0,34.7,0.575,62,tested_positive
|
||||
0,151,90,46,0,42.1,0.371,21,tested_positive
|
||||
6,109,60,27,0,25,0.206,27,tested_negative
|
||||
12,121,78,17,0,26.5,0.259,62,tested_negative
|
||||
8,100,76,0,0,38.7,0.19,42,tested_negative
|
||||
8,124,76,24,600,28.7,0.687,52,tested_positive
|
||||
1,93,56,11,0,22.5,0.417,22,tested_negative
|
||||
8,143,66,0,0,34.9,0.129,41,tested_positive
|
||||
6,103,66,0,0,24.3,0.249,29,tested_negative
|
||||
3,176,86,27,156,33.3,1.154,52,tested_positive
|
||||
0,73,0,0,0,21.1,0.342,25,tested_negative
|
||||
11,111,84,40,0,46.8,0.925,45,tested_positive
|
||||
2,112,78,50,140,39.4,0.175,24,tested_negative
|
||||
3,132,80,0,0,34.4,0.402,44,tested_positive
|
||||
2,82,52,22,115,28.5,1.699,25,tested_negative
|
||||
6,123,72,45,230,33.6,0.733,34,tested_negative
|
||||
0,188,82,14,185,32,0.682,22,tested_positive
|
||||
0,67,76,0,0,45.3,0.194,46,tested_negative
|
||||
1,89,24,19,25,27.8,0.559,21,tested_negative
|
||||
1,173,74,0,0,36.8,0.088,38,tested_positive
|
||||
1,109,38,18,120,23.1,0.407,26,tested_negative
|
||||
1,108,88,19,0,27.1,0.4,24,tested_negative
|
||||
6,96,0,0,0,23.7,0.19,28,tested_negative
|
||||
1,124,74,36,0,27.8,0.1,30,tested_negative
|
||||
7,150,78,29,126,35.2,0.692,54,tested_positive
|
||||
4,183,0,0,0,28.4,0.212,36,tested_positive
|
||||
1,124,60,32,0,35.8,0.514,21,tested_negative
|
||||
1,181,78,42,293,40,1.258,22,tested_positive
|
||||
1,92,62,25,41,19.5,0.482,25,tested_negative
|
||||
0,152,82,39,272,41.5,0.27,27,tested_negative
|
||||
1,111,62,13,182,24,0.138,23,tested_negative
|
||||
3,106,54,21,158,30.9,0.292,24,tested_negative
|
||||
3,174,58,22,194,32.9,0.593,36,tested_positive
|
||||
7,168,88,42,321,38.2,0.787,40,tested_positive
|
||||
6,105,80,28,0,32.5,0.878,26,tested_negative
|
||||
11,138,74,26,144,36.1,0.557,50,tested_positive
|
||||
3,106,72,0,0,25.8,0.207,27,tested_negative
|
||||
6,117,96,0,0,28.7,0.157,30,tested_negative
|
||||
2,68,62,13,15,20.1,0.257,23,tested_negative
|
||||
9,112,82,24,0,28.2,1.282,50,tested_positive
|
||||
0,119,0,0,0,32.4,0.141,24,tested_positive
|
||||
2,112,86,42,160,38.4,0.246,28,tested_negative
|
||||
2,92,76,20,0,24.2,1.698,28,tested_negative
|
||||
6,183,94,0,0,40.8,1.461,45,tested_negative
|
||||
0,94,70,27,115,43.5,0.347,21,tested_negative
|
||||
2,108,64,0,0,30.8,0.158,21,tested_negative
|
||||
4,90,88,47,54,37.7,0.362,29,tested_negative
|
||||
0,125,68,0,0,24.7,0.206,21,tested_negative
|
||||
0,132,78,0,0,32.4,0.393,21,tested_negative
|
||||
5,128,80,0,0,34.6,0.144,45,tested_negative
|
||||
4,94,65,22,0,24.7,0.148,21,tested_negative
|
||||
7,114,64,0,0,27.4,0.732,34,tested_positive
|
||||
0,102,78,40,90,34.5,0.238,24,tested_negative
|
||||
2,111,60,0,0,26.2,0.343,23,tested_negative
|
||||
1,128,82,17,183,27.5,0.115,22,tested_negative
|
||||
10,92,62,0,0,25.9,0.167,31,tested_negative
|
||||
13,104,72,0,0,31.2,0.465,38,tested_positive
|
||||
5,104,74,0,0,28.8,0.153,48,tested_negative
|
||||
2,94,76,18,66,31.6,0.649,23,tested_negative
|
||||
7,97,76,32,91,40.9,0.871,32,tested_positive
|
||||
1,100,74,12,46,19.5,0.149,28,tested_negative
|
||||
0,102,86,17,105,29.3,0.695,27,tested_negative
|
||||
4,128,70,0,0,34.3,0.303,24,tested_negative
|
||||
6,147,80,0,0,29.5,0.178,50,tested_positive
|
||||
4,90,0,0,0,28,0.61,31,tested_negative
|
||||
3,103,72,30,152,27.6,0.73,27,tested_negative
|
||||
2,157,74,35,440,39.4,0.134,30,tested_negative
|
||||
1,167,74,17,144,23.4,0.447,33,tested_positive
|
||||
0,179,50,36,159,37.8,0.455,22,tested_positive
|
||||
11,136,84,35,130,28.3,0.26,42,tested_positive
|
||||
0,107,60,25,0,26.4,0.133,23,tested_negative
|
||||
1,91,54,25,100,25.2,0.234,23,tested_negative
|
||||
1,117,60,23,106,33.8,0.466,27,tested_negative
|
||||
5,123,74,40,77,34.1,0.269,28,tested_negative
|
||||
2,120,54,0,0,26.8,0.455,27,tested_negative
|
||||
1,106,70,28,135,34.2,0.142,22,tested_negative
|
||||
2,155,52,27,540,38.7,0.24,25,tested_positive
|
||||
2,101,58,35,90,21.8,0.155,22,tested_negative
|
||||
1,120,80,48,200,38.9,1.162,41,tested_negative
|
||||
11,127,106,0,0,39,0.19,51,tested_negative
|
||||
3,80,82,31,70,34.2,1.292,27,tested_positive
|
||||
10,162,84,0,0,27.7,0.182,54,tested_negative
|
||||
1,199,76,43,0,42.9,1.394,22,tested_positive
|
||||
8,167,106,46,231,37.6,0.165,43,tested_positive
|
||||
9,145,80,46,130,37.9,0.637,40,tested_positive
|
||||
6,115,60,39,0,33.7,0.245,40,tested_positive
|
||||
1,112,80,45,132,34.8,0.217,24,tested_negative
|
||||
4,145,82,18,0,32.5,0.235,70,tested_positive
|
||||
10,111,70,27,0,27.5,0.141,40,tested_positive
|
||||
6,98,58,33,190,34,0.43,43,tested_negative
|
||||
9,154,78,30,100,30.9,0.164,45,tested_negative
|
||||
6,165,68,26,168,33.6,0.631,49,tested_negative
|
||||
1,99,58,10,0,25.4,0.551,21,tested_negative
|
||||
10,68,106,23,49,35.5,0.285,47,tested_negative
|
||||
3,123,100,35,240,57.3,0.88,22,tested_negative
|
||||
8,91,82,0,0,35.6,0.587,68,tested_negative
|
||||
6,195,70,0,0,30.9,0.328,31,tested_positive
|
||||
9,156,86,0,0,24.8,0.23,53,tested_positive
|
||||
0,93,60,0,0,35.3,0.263,25,tested_negative
|
||||
3,121,52,0,0,36,0.127,25,tested_positive
|
||||
2,101,58,17,265,24.2,0.614,23,tested_negative
|
||||
2,56,56,28,45,24.2,0.332,22,tested_negative
|
||||
0,162,76,36,0,49.6,0.364,26,tested_positive
|
||||
0,95,64,39,105,44.6,0.366,22,tested_negative
|
||||
4,125,80,0,0,32.3,0.536,27,tested_positive
|
||||
5,136,82,0,0,0,0.64,69,tested_negative
|
||||
2,129,74,26,205,33.2,0.591,25,tested_negative
|
||||
3,130,64,0,0,23.1,0.314,22,tested_negative
|
||||
1,107,50,19,0,28.3,0.181,29,tested_negative
|
||||
1,140,74,26,180,24.1,0.828,23,tested_negative
|
||||
1,144,82,46,180,46.1,0.335,46,tested_positive
|
||||
8,107,80,0,0,24.6,0.856,34,tested_negative
|
||||
13,158,114,0,0,42.3,0.257,44,tested_positive
|
||||
2,121,70,32,95,39.1,0.886,23,tested_negative
|
||||
7,129,68,49,125,38.5,0.439,43,tested_positive
|
||||
2,90,60,0,0,23.5,0.191,25,tested_negative
|
||||
7,142,90,24,480,30.4,0.128,43,tested_positive
|
||||
3,169,74,19,125,29.9,0.268,31,tested_positive
|
||||
0,99,0,0,0,25,0.253,22,tested_negative
|
||||
4,127,88,11,155,34.5,0.598,28,tested_negative
|
||||
4,118,70,0,0,44.5,0.904,26,tested_negative
|
||||
2,122,76,27,200,35.9,0.483,26,tested_negative
|
||||
6,125,78,31,0,27.6,0.565,49,tested_positive
|
||||
1,168,88,29,0,35,0.905,52,tested_positive
|
||||
2,129,0,0,0,38.5,0.304,41,tested_negative
|
||||
4,110,76,20,100,28.4,0.118,27,tested_negative
|
||||
6,80,80,36,0,39.8,0.177,28,tested_negative
|
||||
10,115,0,0,0,0,0.261,30,tested_positive
|
||||
2,127,46,21,335,34.4,0.176,22,tested_negative
|
||||
9,164,78,0,0,32.8,0.148,45,tested_positive
|
||||
2,93,64,32,160,38,0.674,23,tested_positive
|
||||
3,158,64,13,387,31.2,0.295,24,tested_negative
|
||||
5,126,78,27,22,29.6,0.439,40,tested_negative
|
||||
10,129,62,36,0,41.2,0.441,38,tested_positive
|
||||
0,134,58,20,291,26.4,0.352,21,tested_negative
|
||||
3,102,74,0,0,29.5,0.121,32,tested_negative
|
||||
7,187,50,33,392,33.9,0.826,34,tested_positive
|
||||
3,173,78,39,185,33.8,0.97,31,tested_positive
|
||||
10,94,72,18,0,23.1,0.595,56,tested_negative
|
||||
1,108,60,46,178,35.5,0.415,24,tested_negative
|
||||
5,97,76,27,0,35.6,0.378,52,tested_positive
|
||||
4,83,86,19,0,29.3,0.317,34,tested_negative
|
||||
1,114,66,36,200,38.1,0.289,21,tested_negative
|
||||
1,149,68,29,127,29.3,0.349,42,tested_positive
|
||||
5,117,86,30,105,39.1,0.251,42,tested_negative
|
||||
1,111,94,0,0,32.8,0.265,45,tested_negative
|
||||
4,112,78,40,0,39.4,0.236,38,tested_negative
|
||||
1,116,78,29,180,36.1,0.496,25,tested_negative
|
||||
0,141,84,26,0,32.4,0.433,22,tested_negative
|
||||
2,175,88,0,0,22.9,0.326,22,tested_negative
|
||||
2,92,52,0,0,30.1,0.141,22,tested_negative
|
||||
3,130,78,23,79,28.4,0.323,34,tested_positive
|
||||
8,120,86,0,0,28.4,0.259,22,tested_positive
|
||||
2,174,88,37,120,44.5,0.646,24,tested_positive
|
||||
2,106,56,27,165,29,0.426,22,tested_negative
|
||||
2,105,75,0,0,23.3,0.56,53,tested_negative
|
||||
4,95,60,32,0,35.4,0.284,28,tested_negative
|
||||
0,126,86,27,120,27.4,0.515,21,tested_negative
|
||||
8,65,72,23,0,32,0.6,42,tested_negative
|
||||
2,99,60,17,160,36.6,0.453,21,tested_negative
|
||||
1,102,74,0,0,39.5,0.293,42,tested_positive
|
||||
11,120,80,37,150,42.3,0.785,48,tested_positive
|
||||
3,102,44,20,94,30.8,0.4,26,tested_negative
|
||||
1,109,58,18,116,28.5,0.219,22,tested_negative
|
||||
9,140,94,0,0,32.7,0.734,45,tested_positive
|
||||
13,153,88,37,140,40.6,1.174,39,tested_negative
|
||||
12,100,84,33,105,30,0.488,46,tested_negative
|
||||
1,147,94,41,0,49.3,0.358,27,tested_positive
|
||||
1,81,74,41,57,46.3,1.096,32,tested_negative
|
||||
3,187,70,22,200,36.4,0.408,36,tested_positive
|
||||
6,162,62,0,0,24.3,0.178,50,tested_positive
|
||||
4,136,70,0,0,31.2,1.182,22,tested_positive
|
||||
1,121,78,39,74,39,0.261,28,tested_negative
|
||||
3,108,62,24,0,26,0.223,25,tested_negative
|
||||
0,181,88,44,510,43.3,0.222,26,tested_positive
|
||||
8,154,78,32,0,32.4,0.443,45,tested_positive
|
||||
1,128,88,39,110,36.5,1.057,37,tested_positive
|
||||
7,137,90,41,0,32,0.391,39,tested_negative
|
||||
0,123,72,0,0,36.3,0.258,52,tested_positive
|
||||
1,106,76,0,0,37.5,0.197,26,tested_negative
|
||||
6,190,92,0,0,35.5,0.278,66,tested_positive
|
||||
2,88,58,26,16,28.4,0.766,22,tested_negative
|
||||
9,170,74,31,0,44,0.403,43,tested_positive
|
||||
9,89,62,0,0,22.5,0.142,33,tested_negative
|
||||
10,101,76,48,180,32.9,0.171,63,tested_negative
|
||||
2,122,70,27,0,36.8,0.34,27,tested_negative
|
||||
5,121,72,23,112,26.2,0.245,30,tested_negative
|
||||
1,126,60,0,0,30.1,0.349,47,tested_positive
|
||||
1,93,70,31,0,30.4,0.315,23,tested_negative
|
332
tests/datasets/glass.arff
Executable file
332
tests/datasets/glass.arff
Executable file
@@ -0,0 +1,332 @@
|
||||
% 1. Title: Glass Identification Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Creator: B. German
|
||||
% -- Central Research Establishment
|
||||
% Home Office Forensic Science Service
|
||||
% Aldermaston, Reading, Berkshire RG7 4PN
|
||||
% (b) Donor: Vina Spiehler, Ph.D., DABFT
|
||||
% Diagnostic Products Corporation
|
||||
% (213) 776-0180 (ext 3014)
|
||||
% (c) Date: September, 1987
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% -- Rule Induction in Forensic Science
|
||||
% -- Ian W. Evett and Ernest J. Spiehler
|
||||
% -- Central Research Establishment
|
||||
% Home Office Forensic Science Service
|
||||
% Aldermaston, Reading, Berkshire RG7 4PN
|
||||
% -- Unknown technical note number (sorry, not listed here)
|
||||
% -- General Results: nearest neighbor held its own with respect to the
|
||||
% rule-based system
|
||||
%
|
||||
% 4. Relevant Information:n
|
||||
% Vina conducted a comparison test of her rule-based system, BEAGLE, the
|
||||
% nearest-neighbor algorithm, and discriminant analysis. BEAGLE is
|
||||
% a product available through VRS Consulting, Inc.; 4676 Admiralty Way,
|
||||
% Suite 206; Marina Del Ray, CA 90292 (213) 827-7890 and FAX: -3189.
|
||||
% In determining whether the glass was a type of "float" glass or not,
|
||||
% the following results were obtained (# incorrect answers):
|
||||
%
|
||||
% Type of Sample Beagle NN DA
|
||||
% Windows that were float processed (87) 10 12 21
|
||||
% Windows that were not: (76) 19 16 22
|
||||
%
|
||||
% The study of classification of types of glass was motivated by
|
||||
% criminological investigation. At the scene of the crime, the glass left
|
||||
% can be used as evidence...if it is correctly identified!
|
||||
%
|
||||
% 5. Number of Instances: 214
|
||||
%
|
||||
% 6. Number of Attributes: 10 (including an Id#) plus the class attribute
|
||||
% -- all attributes are continuously valued
|
||||
%
|
||||
% 7. Attribute Information:
|
||||
% 1. Id number: 1 to 214
|
||||
% 2. RI: refractive index
|
||||
% 3. Na: Sodium (unit measurement: weight percent in corresponding oxide, as
|
||||
% are attributes 4-10)
|
||||
% 4. Mg: Magnesium
|
||||
% 5. Al: Aluminum
|
||||
% 6. Si: Silicon
|
||||
% 7. K: Potassium
|
||||
% 8. Ca: Calcium
|
||||
% 9. Ba: Barium
|
||||
% 10. Fe: Iron
|
||||
% 11. Type of glass: (class attribute)
|
||||
% -- 1 building_windows_float_processed
|
||||
% -- 2 building_windows_non_float_processed
|
||||
% -- 3 vehicle_windows_float_processed
|
||||
% -- 4 vehicle_windows_non_float_processed (none in this database)
|
||||
% -- 5 containers
|
||||
% -- 6 tableware
|
||||
% -- 7 headlamps
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% Summary Statistics:
|
||||
% Attribute: Min Max Mean SD Correlation with class
|
||||
% 2. RI: 1.5112 1.5339 1.5184 0.0030 -0.1642
|
||||
% 3. Na: 10.73 17.38 13.4079 0.8166 0.5030
|
||||
% 4. Mg: 0 4.49 2.6845 1.4424 -0.7447
|
||||
% 5. Al: 0.29 3.5 1.4449 0.4993 0.5988
|
||||
% 6. Si: 69.81 75.41 72.6509 0.7745 0.1515
|
||||
% 7. K: 0 6.21 0.4971 0.6522 -0.0100
|
||||
% 8. Ca: 5.43 16.19 8.9570 1.4232 0.0007
|
||||
% 9. Ba: 0 3.15 0.1750 0.4972 0.5751
|
||||
% 10. Fe: 0 0.51 0.0570 0.0974 -0.1879
|
||||
%
|
||||
% 9. Class Distribution: (out of 214 total instances)
|
||||
% -- 163 Window glass (building windows and vehicle windows)
|
||||
% -- 87 float processed
|
||||
% -- 70 building windows
|
||||
% -- 17 vehicle windows
|
||||
% -- 76 non-float processed
|
||||
% -- 76 building windows
|
||||
% -- 0 vehicle windows
|
||||
% -- 51 Non-window glass
|
||||
% -- 13 containers
|
||||
% -- 9 tableware
|
||||
% -- 29 headlamps
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
% Relabeled values in attribute 'Type'
|
||||
% From: '1' To: 'build wind float'
|
||||
% From: '2' To: 'build wind non-float'
|
||||
% From: '3' To: 'vehic wind float'
|
||||
% From: '4' To: 'vehic wind non-float'
|
||||
% From: '5' To: containers
|
||||
% From: '6' To: tableware
|
||||
% From: '7' To: headlamps
|
||||
%
|
||||
@relation Glass
|
||||
@attribute 'RI' real
|
||||
@attribute 'Na' real
|
||||
@attribute 'Mg' real
|
||||
@attribute 'Al' real
|
||||
@attribute 'Si' real
|
||||
@attribute 'K' real
|
||||
@attribute 'Ca' real
|
||||
@attribute 'Ba' real
|
||||
@attribute 'Fe' real
|
||||
@attribute 'Type' {'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
|
||||
@data
|
||||
1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'
|
||||
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0,0,'vehic wind float'
|
||||
1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0,0,'build wind float'
|
||||
1.51299,14.4,1.74,1.54,74.55,0,7.59,0,0,tableware
|
||||
1.53393,12.3,0,1,70.16,0.12,16.19,0,0.24,'build wind non-float'
|
||||
1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,'build wind non-float'
|
||||
1.51779,13.64,3.65,0.65,73,0.06,8.93,0,0,'vehic wind float'
|
||||
1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0,0,'build wind float'
|
||||
1.51545,14.14,0,2.68,73.39,0.08,9.07,0.61,0.05,headlamps
|
||||
1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0,0.28,'build wind non-float'
|
||||
1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0,0,'build wind non-float'
|
||||
1.51743,12.2,3.25,1.16,73.55,0.62,8.9,0,0.24,'build wind non-float'
|
||||
1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0,0,'build wind float'
|
||||
1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0,0,'vehic wind float'
|
||||
1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0,0.17,'vehic wind float'
|
||||
1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0,0,'build wind non-float'
|
||||
1.51719,14.75,0,2,73.02,0,8.53,1.59,0.08,headlamps
|
||||
1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0,0,'build wind non-float'
|
||||
1.51994,13.27,0,1.76,73.03,0.47,11.32,0,0,containers
|
||||
1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0,'build wind non-float'
|
||||
1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0,0.17,'build wind float'
|
||||
1.52475,11.45,0,1.88,72.19,0.81,13.24,0,0.34,'build wind non-float'
|
||||
1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0,0.22,'build wind non-float'
|
||||
1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0,0.11,'build wind float'
|
||||
1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,containers
|
||||
1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0,0,'build wind non-float'
|
||||
1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0,0,'build wind non-float'
|
||||
1.51683,14.56,0,1.98,73.29,0,8.52,1.57,0.07,headlamps
|
||||
1.51687,13.23,3.54,1.48,72.84,0.56,8.1,0,0,'build wind non-float'
|
||||
1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0,0,'vehic wind float'
|
||||
1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0,0,'build wind non-float'
|
||||
1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0,0,'vehic wind float'
|
||||
1.51115,17.38,0,0.34,75.41,0,6.65,0,0,tableware
|
||||
1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0,0,'build wind non-float'
|
||||
1.51755,13,3.6,1.36,72.99,0.57,8.4,0,0.11,'build wind float'
|
||||
1.51571,12.72,3.46,1.56,73.2,0.67,8.09,0,0.24,'build wind float'
|
||||
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0,0.26,'build wind float'
|
||||
1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0,0,'build wind non-float'
|
||||
1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,'build wind non-float'
|
||||
1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0,0,'build wind non-float'
|
||||
1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0,0,'build wind float'
|
||||
1.51806,13,3.8,1.08,73.07,0.56,8.38,0,0.12,'build wind non-float'
|
||||
1.51627,13,3.58,1.54,72.83,0.61,8.04,0,0,'build wind non-float'
|
||||
1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0,0,'build wind non-float'
|
||||
1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,'vehic wind float'
|
||||
1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0,0,'build wind float'
|
||||
1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0,containers
|
||||
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0,0,'build wind float'
|
||||
1.51784,13.08,3.49,1.28,72.86,0.6,8.49,0,0,'build wind float'
|
||||
1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0,0,'build wind non-float'
|
||||
1.51753,12.57,3.47,1.38,73.39,0.6,8.55,0,0.06,'build wind float'
|
||||
1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,'build wind non-float'
|
||||
1.51743,13.3,3.6,1.14,73.09,0.58,8.17,0,0,'build wind float'
|
||||
1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0,0,'build wind non-float'
|
||||
1.5164,14.37,0,2.74,72.85,0,9.45,0.54,0,headlamps
|
||||
1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0,0.07,'build wind float'
|
||||
1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0,0,headlamps
|
||||
1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0,0,'build wind float'
|
||||
1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0,0,'vehic wind float'
|
||||
1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0,0,'build wind non-float'
|
||||
1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0,0.32,'build wind non-float'
|
||||
1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0,0,'build wind float'
|
||||
1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0,0,'build wind float'
|
||||
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
|
||||
1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0,0.16,'build wind float'
|
||||
1.51556,13.87,0,2.54,73.23,0.14,9.41,0.81,0.01,headlamps
|
||||
1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0,0.11,'build wind float'
|
||||
1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0,0.37,'vehic wind float'
|
||||
1.53125,10.73,0,2.1,69.81,0.58,13.3,3.15,0.28,'build wind non-float'
|
||||
1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0,0.17,'build wind float'
|
||||
1.51829,14.46,2.24,1.62,72.38,0,9.26,0,0,tableware
|
||||
1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0,0.14,'build wind non-float'
|
||||
1.51888,14.99,0.78,1.74,72.5,0,9.95,0,0,tableware
|
||||
1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0,0.1,'build wind non-float'
|
||||
1.523,13.31,3.58,0.82,71.99,0.12,10.17,0,0.03,'build wind float'
|
||||
1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0,0,'build wind non-float'
|
||||
1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0,0,'build wind float'
|
||||
1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0,0.31,'build wind float'
|
||||
1.51646,13.04,3.4,1.26,73.01,0.52,8.58,0,0,'vehic wind float'
|
||||
1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0,0,'build wind float'
|
||||
1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0,0,'build wind float'
|
||||
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0,0,'build wind float'
|
||||
1.52127,14.32,3.9,0.83,71.5,0,9.49,0,0,'vehic wind float'
|
||||
1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0,0,'build wind float'
|
||||
1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0,0,containers
|
||||
1.518,13.71,3.93,1.54,71.81,0.54,8.21,0,0.15,'build wind non-float'
|
||||
1.52777,12.64,0,0.67,72.02,0.06,14.4,0,0,'build wind non-float'
|
||||
1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0,0.19,'build wind float'
|
||||
1.51764,12.98,3.54,1.21,73,0.65,8.53,0,0,'build wind float'
|
||||
1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0,0,'build wind non-float'
|
||||
1.51645,14.94,0,1.87,73.11,0,8.67,1.38,0,headlamps
|
||||
1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0,0.3,'build wind float'
|
||||
1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0,0.16,'build wind float'
|
||||
1.51937,13.79,2.41,1.19,72.76,0,9.77,0,0,tableware
|
||||
1.51514,14.85,0,2.42,73.72,0,8.39,0.56,0,headlamps
|
||||
1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0,0.07,'build wind float'
|
||||
1.51732,14.95,0,1.8,72.99,0,8.61,1.55,0,headlamps
|
||||
1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0,0.18,'build wind non-float'
|
||||
1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0,0,'build wind non-float'
|
||||
1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0,0,'build wind non-float'
|
||||
1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0,0,'build wind float'
|
||||
1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0,0.29,'build wind non-float'
|
||||
1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0,headlamps
|
||||
1.51685,14.92,0,1.99,73.06,0,8.4,1.59,0,headlamps
|
||||
1.51658,14.8,0,1.99,73.11,0,8.28,1.71,0,headlamps
|
||||
1.51316,13.02,0,3.04,70.48,6.21,6.96,0,0,containers
|
||||
1.51709,13,3.47,1.79,72.72,0.66,8.18,0,0,'build wind non-float'
|
||||
1.51727,14.7,0,2.34,73.28,0,8.95,0.66,0,headlamps
|
||||
1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0,0,'build wind float'
|
||||
1.51969,12.64,0,1.65,73.75,0.38,11.53,0,0,containers
|
||||
1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0,0.2,'build wind non-float'
|
||||
1.51617,14.95,0,2.27,73.3,0,8.71,0.67,0,headlamps
|
||||
1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0,0,'build wind float'
|
||||
1.51651,14.38,0,1.94,73.61,0,8.48,1.57,0,headlamps
|
||||
1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0,0,'vehic wind float'
|
||||
1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0,0,headlamps
|
||||
1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,'build wind non-float'
|
||||
1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0,headlamps
|
||||
1.51818,13.72,0,0.56,74.45,0,10.99,0,0,'build wind non-float'
|
||||
1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0,0.24,'build wind float'
|
||||
1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0,0.24,'build wind non-float'
|
||||
1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0,0.24,'build wind float'
|
||||
1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0,0,'build wind non-float'
|
||||
1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0,0,'build wind non-float'
|
||||
1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0,0.28,containers
|
||||
1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0,0.17,'build wind float'
|
||||
1.51653,11.95,0,1.19,75.18,2.7,8.93,0,0,headlamps
|
||||
1.51623,14.14,0,2.88,72.61,0.08,9.18,1.06,0,headlamps
|
||||
1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0,0,'build wind float'
|
||||
1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0,0,'build wind float'
|
||||
1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0,0,'build wind non-float'
|
||||
1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0,0,'build wind non-float'
|
||||
1.52065,14.36,0,2.02,73.42,0,8.44,1.64,0,headlamps
|
||||
1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0,0.14,'build wind float'
|
||||
1.52369,13.44,0,1.58,72.22,0.32,12.24,0,0,containers
|
||||
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0,0,'build wind float'
|
||||
1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0,0,'build wind float'
|
||||
1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0,0,'build wind non-float'
|
||||
1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0,0,'build wind float'
|
||||
1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0,0.09,'build wind non-float'
|
||||
1.51784,12.68,3.67,1.16,73.11,0.61,8.7,0,0,'build wind float'
|
||||
1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0,'build wind float'
|
||||
1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0,'build wind float'
|
||||
1.51666,12.86,0,1.83,73.88,0.97,10.17,0,0,containers
|
||||
1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0,0,'build wind non-float'
|
||||
1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0,0.12,'build wind non-float'
|
||||
1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0,0,'build wind non-float'
|
||||
1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0,0.17,'build wind non-float'
|
||||
1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0,0.12,'build wind non-float'
|
||||
1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0,0,'build wind non-float'
|
||||
1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0,headlamps
|
||||
1.52227,14.17,3.81,0.78,71.35,0,9.69,0,0,'build wind float'
|
||||
1.52614,13.7,0,1.36,71.24,0.19,13.44,0,0.1,'build wind non-float'
|
||||
1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0,0,'build wind non-float'
|
||||
1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0,0,'vehic wind float'
|
||||
1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0,0,'build wind float'
|
||||
1.51508,15.15,0,2.25,73.5,0,8.34,0.63,0,headlamps
|
||||
1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0,0,containers
|
||||
1.51966,14.77,3.75,0.29,72.02,0.03,9,0,0,'build wind float'
|
||||
1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0,0,'build wind non-float'
|
||||
1.52664,11.23,0,0.77,73.21,0,14.68,0,0,'build wind non-float'
|
||||
1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0,0.11,'build wind float'
|
||||
1.51602,14.85,0,2.38,73.28,0,8.76,0.64,0.09,headlamps
|
||||
1.51321,13,0,3.02,70.7,6.21,6.93,0,0,containers
|
||||
1.52739,11.02,0,0.75,73.08,0,14.96,0,0,'build wind non-float'
|
||||
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
|
||||
1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0,0,'build wind float'
|
||||
1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0,0.35,'build wind non-float'
|
||||
1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0,0,'build wind non-float'
|
||||
1.51609,15.01,0,2.51,73.05,0.05,8.83,0.53,0,headlamps
|
||||
1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0,0,'build wind non-float'
|
||||
1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0,0.19,'build wind non-float'
|
||||
1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0,0.1,'build wind float'
|
||||
1.51831,14.39,0,1.82,72.86,1.41,6.47,2.88,0,headlamps
|
||||
1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0,0,'build wind float'
|
||||
1.51613,13.88,1.78,1.79,73.1,0,8.67,0.76,0,headlamps
|
||||
1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0,0,'build wind float'
|
||||
1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0,0,'build wind float'
|
||||
1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0,0,containers
|
||||
1.51969,14.56,0,0.56,73.48,0,11.22,0,0,tableware
|
||||
1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0,0,'build wind non-float'
|
||||
1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0,0.1,'build wind non-float'
|
||||
1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0,0.09,'vehic wind float'
|
||||
1.52222,14.43,0,1,72.67,0.1,11.52,0,0.08,'build wind non-float'
|
||||
1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0,0,'build wind float'
|
||||
1.51711,14.23,0,2.08,73.36,0,8.62,1.67,0,headlamps
|
||||
1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0,0,'build wind float'
|
||||
1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0,0,'build wind float'
|
||||
1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0,0.1,'vehic wind float'
|
||||
1.52043,13.38,0,1.4,72.25,0.33,12.5,0,0,containers
|
||||
1.519,13.49,3.48,1.35,71.95,0.55,9,0,0,'build wind float'
|
||||
1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0,0.09,'build wind float'
|
||||
1.51905,14,2.39,1.56,72.37,0,9.57,0,0,tableware
|
||||
1.51531,14.38,0,2.66,73.1,0.04,9.08,0.64,0,headlamps
|
||||
1.51916,14.15,0,2.09,72.74,0,10.88,0,0,tableware
|
||||
1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0,0.15,'build wind non-float'
|
||||
1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0,0,'build wind non-float'
|
||||
1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0,0,'build wind non-float'
|
||||
1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0,0.09,'build wind non-float'
|
||||
1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0,0.21,'build wind non-float'
|
||||
1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0,0,'build wind non-float'
|
||||
1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0,0.16,'build wind float'
|
||||
1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0,0,'vehic wind float'
|
||||
1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,'build wind float'
|
||||
1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0,0,'build wind non-float'
|
||||
1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0,0,'build wind float'
|
||||
1.51623,14.2,0,2.79,73.46,0.04,9.04,0.4,0.09,headlamps
|
||||
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0,0,'build wind float'
|
||||
1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0,0,'build wind float'
|
||||
1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0,0,'vehic wind float'
|
||||
1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0,0,'build wind non-float'
|
||||
1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0,0.14,'build wind non-float'
|
||||
1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0,0,'build wind non-float'
|
||||
1.51852,14.09,2.19,1.66,72.67,0,9.32,0,0,tableware
|
399
tests/datasets/liver-disorders.arff
Executable file
399
tests/datasets/liver-disorders.arff
Executable file
@@ -0,0 +1,399 @@
|
||||
% 1. Title: BUPA liver disorders
|
||||
%
|
||||
% 2. Source information:
|
||||
% -- Creators: BUPA Medical Research Ltd.
|
||||
% -- Donor: Richard S. Forsyth
|
||||
% 8 Grosvenor Avenue
|
||||
% Mapperley Park
|
||||
% Nottingham NG3 5DX
|
||||
% 0602-621676
|
||||
% -- Date: 5/15/1990
|
||||
%
|
||||
% 3. Past usage:
|
||||
% -- None known other than what is shown in the PC/BEAGLE User's Guide
|
||||
% (written by Richard S. Forsyth).
|
||||
%
|
||||
% 4. Relevant information:
|
||||
% -- The first 5 variables are all blood tests which are thought
|
||||
% to be sensitive to liver disorders that might arise from
|
||||
% excessive alcohol consumption. Each line in the bupa.data file
|
||||
% constitutes the record of a single male individual.
|
||||
% -- It appears that drinks>5 is some sort of a selector on this database.
|
||||
% See the PC/BEAGLE User's Guide for more information.
|
||||
%
|
||||
% 5. Number of instances: 345
|
||||
%
|
||||
% 6. Number of attributes: 7 overall
|
||||
%
|
||||
% 7. Attribute information:
|
||||
% 1. mcv mean corpuscular volume
|
||||
% 2. alkphos alkaline phosphotase
|
||||
% 3. sgpt alamine aminotransferase
|
||||
% 4. sgot aspartate aminotransferase
|
||||
% 5. gammagt gamma-glutamyl transpeptidase
|
||||
% 6. drinks number of half-pint equivalents of alcoholic beverages
|
||||
% drunk per day
|
||||
% 7. selector field used to split data into two sets
|
||||
%
|
||||
% 8. Missing values: none%
|
||||
% Information about the dataset
|
||||
% CLASSTYPE: nominal
|
||||
% CLASSINDEX: last
|
||||
%
|
||||
|
||||
@relation liver-disorders
|
||||
|
||||
@attribute mcv INTEGER
|
||||
@attribute alkphos INTEGER
|
||||
@attribute sgpt INTEGER
|
||||
@attribute sgot INTEGER
|
||||
@attribute gammagt INTEGER
|
||||
@attribute drinks REAL
|
||||
@attribute selector {1,2}
|
||||
|
||||
@data
|
||||
85,92,45,27,31,0.0,1
|
||||
85,64,59,32,23,0.0,2
|
||||
86,54,33,16,54,0.0,2
|
||||
91,78,34,24,36,0.0,2
|
||||
87,70,12,28,10,0.0,2
|
||||
98,55,13,17,17,0.0,2
|
||||
88,62,20,17,9,0.5,1
|
||||
88,67,21,11,11,0.5,1
|
||||
92,54,22,20,7,0.5,1
|
||||
90,60,25,19,5,0.5,1
|
||||
89,52,13,24,15,0.5,1
|
||||
82,62,17,17,15,0.5,1
|
||||
90,64,61,32,13,0.5,1
|
||||
86,77,25,19,18,0.5,1
|
||||
96,67,29,20,11,0.5,1
|
||||
91,78,20,31,18,0.5,1
|
||||
89,67,23,16,10,0.5,1
|
||||
89,79,17,17,16,0.5,1
|
||||
91,107,20,20,56,0.5,1
|
||||
94,116,11,33,11,0.5,1
|
||||
92,59,35,13,19,0.5,1
|
||||
93,23,35,20,20,0.5,1
|
||||
90,60,23,27,5,0.5,1
|
||||
96,68,18,19,19,0.5,1
|
||||
84,80,47,33,97,0.5,1
|
||||
92,70,24,13,26,0.5,1
|
||||
90,47,28,15,18,0.5,1
|
||||
88,66,20,21,10,0.5,1
|
||||
91,102,17,13,19,0.5,1
|
||||
87,41,31,19,16,0.5,1
|
||||
86,79,28,16,17,0.5,1
|
||||
91,57,31,23,42,0.5,1
|
||||
93,77,32,18,29,0.5,1
|
||||
88,96,28,21,40,0.5,1
|
||||
94,65,22,18,11,0.5,1
|
||||
91,72,155,68,82,0.5,2
|
||||
85,54,47,33,22,0.5,2
|
||||
79,39,14,19,9,0.5,2
|
||||
85,85,25,26,30,0.5,2
|
||||
89,63,24,20,38,0.5,2
|
||||
84,92,68,37,44,0.5,2
|
||||
89,68,26,39,42,0.5,2
|
||||
89,101,18,25,13,0.5,2
|
||||
86,84,18,14,16,0.5,2
|
||||
85,65,25,14,18,0.5,2
|
||||
88,61,19,21,13,0.5,2
|
||||
92,56,14,16,10,0.5,2
|
||||
95,50,29,25,50,0.5,2
|
||||
91,75,24,22,11,0.5,2
|
||||
83,40,29,25,38,0.5,2
|
||||
89,74,19,23,16,0.5,2
|
||||
85,64,24,22,11,0.5,2
|
||||
92,57,64,36,90,0.5,2
|
||||
94,48,11,23,43,0.5,2
|
||||
87,52,21,19,30,0.5,2
|
||||
85,65,23,29,15,0.5,2
|
||||
84,82,21,21,19,0.5,2
|
||||
88,49,20,22,19,0.5,2
|
||||
96,67,26,26,36,0.5,2
|
||||
90,63,24,24,24,0.5,2
|
||||
90,45,33,34,27,0.5,2
|
||||
90,72,14,15,18,0.5,2
|
||||
91,55,4,8,13,0.5,2
|
||||
91,52,15,22,11,0.5,2
|
||||
87,71,32,19,27,1.0,1
|
||||
89,77,26,20,19,1.0,1
|
||||
89,67,5,17,14,1.0,2
|
||||
85,51,26,24,23,1.0,2
|
||||
103,75,19,30,13,1.0,2
|
||||
90,63,16,21,14,1.0,2
|
||||
90,63,29,23,57,2.0,1
|
||||
90,67,35,19,35,2.0,1
|
||||
87,66,27,22,9,2.0,1
|
||||
90,73,34,21,22,2.0,1
|
||||
86,54,20,21,16,2.0,1
|
||||
90,80,19,14,42,2.0,1
|
||||
87,90,43,28,156,2.0,2
|
||||
96,72,28,19,30,2.0,2
|
||||
91,55,9,25,16,2.0,2
|
||||
95,78,27,25,30,2.0,2
|
||||
92,101,34,30,64,2.0,2
|
||||
89,51,41,22,48,2.0,2
|
||||
91,99,42,33,16,2.0,2
|
||||
94,58,21,18,26,2.0,2
|
||||
92,60,30,27,297,2.0,2
|
||||
94,58,21,18,26,2.0,2
|
||||
88,47,33,26,29,2.0,2
|
||||
92,65,17,25,9,2.0,2
|
||||
92,79,22,20,11,3.0,1
|
||||
84,83,20,25,7,3.0,1
|
||||
88,68,27,21,26,3.0,1
|
||||
86,48,20,20,6,3.0,1
|
||||
99,69,45,32,30,3.0,1
|
||||
88,66,23,12,15,3.0,1
|
||||
89,62,42,30,20,3.0,1
|
||||
90,51,23,17,27,3.0,1
|
||||
81,61,32,37,53,3.0,2
|
||||
89,89,23,18,104,3.0,2
|
||||
89,65,26,18,36,3.0,2
|
||||
92,75,26,26,24,3.0,2
|
||||
85,59,25,20,25,3.0,2
|
||||
92,61,18,13,81,3.0,2
|
||||
89,63,22,27,10,4.0,1
|
||||
90,84,18,23,13,4.0,1
|
||||
88,95,25,19,14,4.0,1
|
||||
89,35,27,29,17,4.0,1
|
||||
91,80,37,23,27,4.0,1
|
||||
91,109,33,15,18,4.0,1
|
||||
91,65,17,5,7,4.0,1
|
||||
88,107,29,20,50,4.0,2
|
||||
87,76,22,55,9,4.0,2
|
||||
87,86,28,23,21,4.0,2
|
||||
87,42,26,23,17,4.0,2
|
||||
88,80,24,25,17,4.0,2
|
||||
90,96,34,49,169,4.0,2
|
||||
86,67,11,15,8,4.0,2
|
||||
92,40,19,20,21,4.0,2
|
||||
85,60,17,21,14,4.0,2
|
||||
89,90,15,17,25,4.0,2
|
||||
91,57,15,16,16,4.0,2
|
||||
96,55,48,39,42,4.0,2
|
||||
79,101,17,27,23,4.0,2
|
||||
90,134,14,20,14,4.0,2
|
||||
89,76,14,21,24,4.0,2
|
||||
88,93,29,27,31,4.0,2
|
||||
90,67,10,16,16,4.0,2
|
||||
92,73,24,21,48,4.0,2
|
||||
91,55,28,28,82,4.0,2
|
||||
83,45,19,21,13,4.0,2
|
||||
90,74,19,14,22,4.0,2
|
||||
92,66,21,16,33,5.0,1
|
||||
93,63,26,18,18,5.0,1
|
||||
86,78,47,39,107,5.0,2
|
||||
97,44,113,45,150,5.0,2
|
||||
87,59,15,19,12,5.0,2
|
||||
86,44,21,11,15,5.0,2
|
||||
87,64,16,20,24,5.0,2
|
||||
92,57,21,23,22,5.0,2
|
||||
90,70,25,23,112,5.0,2
|
||||
99,59,17,19,11,5.0,2
|
||||
92,80,10,26,20,6.0,1
|
||||
95,60,26,22,28,6.0,1
|
||||
91,63,25,26,15,6.0,1
|
||||
92,62,37,21,36,6.0,1
|
||||
95,50,13,14,15,6.0,1
|
||||
90,76,37,19,50,6.0,1
|
||||
96,70,70,26,36,6.0,1
|
||||
95,62,64,42,76,6.0,1
|
||||
92,62,20,23,20,6.0,1
|
||||
91,63,25,26,15,6.0,1
|
||||
82,56,67,38,92,6.0,2
|
||||
92,82,27,24,37,6.0,2
|
||||
90,63,12,26,21,6.0,2
|
||||
88,37,9,15,16,6.0,2
|
||||
100,60,29,23,76,6.0,2
|
||||
98,43,35,23,69,6.0,2
|
||||
91,74,87,50,67,6.0,2
|
||||
92,87,57,25,44,6.0,2
|
||||
93,99,36,34,48,6.0,2
|
||||
90,72,17,19,19,6.0,2
|
||||
97,93,21,20,68,6.0,2
|
||||
93,50,18,25,17,6.0,2
|
||||
90,57,20,26,33,6.0,2
|
||||
92,76,31,28,41,6.0,2
|
||||
88,55,19,17,14,6.0,2
|
||||
89,63,24,29,29,6.0,2
|
||||
92,79,70,32,84,7.0,1
|
||||
92,93,58,35,120,7.0,1
|
||||
93,84,58,47,62,7.0,2
|
||||
97,71,29,22,52,8.0,1
|
||||
84,99,33,19,26,8.0,1
|
||||
96,44,42,23,73,8.0,1
|
||||
90,62,22,21,21,8.0,1
|
||||
92,94,18,17,6,8.0,1
|
||||
90,67,77,39,114,8.0,1
|
||||
97,71,29,22,52,8.0,1
|
||||
91,69,25,25,66,8.0,2
|
||||
93,59,17,20,14,8.0,2
|
||||
92,95,85,48,200,8.0,2
|
||||
90,50,26,22,53,8.0,2
|
||||
91,62,59,47,60,8.0,2
|
||||
92,93,22,28,123,9.0,1
|
||||
92,77,86,41,31,10.0,1
|
||||
86,66,22,24,26,10.0,2
|
||||
98,57,31,34,73,10.0,2
|
||||
95,80,50,64,55,10.0,2
|
||||
92,108,53,33,94,12.0,2
|
||||
97,92,22,28,49,12.0,2
|
||||
93,77,39,37,108,16.0,1
|
||||
94,83,81,34,201,20.0,1
|
||||
87,75,25,21,14,0.0,1
|
||||
88,56,23,18,12,0.0,1
|
||||
84,97,41,20,32,0.0,2
|
||||
94,91,27,20,15,0.5,1
|
||||
97,62,17,13,5,0.5,1
|
||||
92,85,25,20,12,0.5,1
|
||||
82,48,27,15,12,0.5,1
|
||||
88,74,31,25,15,0.5,1
|
||||
95,77,30,14,21,0.5,1
|
||||
88,94,26,18,8,0.5,1
|
||||
91,70,19,19,22,0.5,1
|
||||
83,54,27,15,12,0.5,1
|
||||
91,105,40,26,56,0.5,1
|
||||
86,79,37,28,14,0.5,1
|
||||
91,96,35,22,135,0.5,1
|
||||
89,82,23,14,35,0.5,1
|
||||
90,73,24,23,11,0.5,1
|
||||
90,87,19,25,19,0.5,1
|
||||
89,82,33,32,18,0.5,1
|
||||
85,79,17,8,9,0.5,1
|
||||
85,119,30,26,17,0.5,1
|
||||
78,69,24,18,31,0.5,1
|
||||
88,107,34,21,27,0.5,1
|
||||
89,115,17,27,7,0.5,1
|
||||
92,67,23,15,12,0.5,1
|
||||
89,101,27,34,14,0.5,1
|
||||
91,84,11,12,10,0.5,1
|
||||
94,101,41,20,53,0.5,2
|
||||
88,46,29,22,18,0.5,2
|
||||
88,122,35,29,42,0.5,2
|
||||
84,88,28,25,35,0.5,2
|
||||
90,79,18,15,24,0.5,2
|
||||
87,69,22,26,11,0.5,2
|
||||
65,63,19,20,14,0.5,2
|
||||
90,64,12,17,14,0.5,2
|
||||
85,58,18,24,16,0.5,2
|
||||
88,81,41,27,36,0.5,2
|
||||
86,78,52,29,62,0.5,2
|
||||
82,74,38,28,48,0.5,2
|
||||
86,58,36,27,59,0.5,2
|
||||
94,56,30,18,27,0.5,2
|
||||
87,57,30,30,22,0.5,2
|
||||
98,74,148,75,159,0.5,2
|
||||
94,75,20,25,38,0.5,2
|
||||
83,68,17,20,71,0.5,2
|
||||
93,56,25,21,33,0.5,2
|
||||
101,65,18,21,22,0.5,2
|
||||
92,65,25,20,31,0.5,2
|
||||
92,58,14,16,13,0.5,2
|
||||
86,58,16,23,23,0.5,2
|
||||
85,62,15,13,22,0.5,2
|
||||
86,57,13,20,13,0.5,2
|
||||
86,54,26,30,13,0.5,2
|
||||
81,41,33,27,34,1.0,1
|
||||
91,67,32,26,13,1.0,1
|
||||
91,80,21,19,14,1.0,1
|
||||
92,60,23,15,19,1.0,1
|
||||
91,60,32,14,8,1.0,1
|
||||
93,65,28,22,10,1.0,1
|
||||
90,63,45,24,85,1.0,2
|
||||
87,92,21,22,37,1.0,2
|
||||
83,78,31,19,115,1.0,2
|
||||
95,62,24,23,14,1.0,2
|
||||
93,59,41,30,48,1.0,2
|
||||
84,82,43,32,38,2.0,1
|
||||
87,71,33,20,22,2.0,1
|
||||
86,44,24,15,18,2.0,1
|
||||
86,66,28,24,21,2.0,1
|
||||
88,58,31,17,17,2.0,1
|
||||
90,61,28,29,31,2.0,1
|
||||
88,69,70,24,64,2.0,1
|
||||
93,87,18,17,26,2.0,1
|
||||
98,58,33,21,28,2.0,1
|
||||
91,44,18,18,23,2.0,2
|
||||
87,75,37,19,70,2.0,2
|
||||
94,91,30,26,25,2.0,2
|
||||
88,85,14,15,10,2.0,2
|
||||
89,109,26,25,27,2.0,2
|
||||
87,59,37,27,34,2.0,2
|
||||
93,58,20,23,18,2.0,2
|
||||
88,57,9,15,16,2.0,2
|
||||
94,65,38,27,17,3.0,1
|
||||
91,71,12,22,11,3.0,1
|
||||
90,55,20,20,16,3.0,1
|
||||
91,64,21,17,26,3.0,2
|
||||
88,47,35,26,33,3.0,2
|
||||
82,72,31,20,84,3.0,2
|
||||
85,58,83,49,51,3.0,2
|
||||
91,54,25,22,35,4.0,1
|
||||
98,50,27,25,53,4.0,2
|
||||
86,62,29,21,26,4.0,2
|
||||
89,48,32,22,14,4.0,2
|
||||
82,68,20,22,9,4.0,2
|
||||
83,70,17,19,23,4.0,2
|
||||
96,70,21,26,21,4.0,2
|
||||
94,117,77,56,52,4.0,2
|
||||
93,45,11,14,21,4.0,2
|
||||
93,49,27,21,29,4.0,2
|
||||
84,73,46,32,39,4.0,2
|
||||
91,63,17,17,46,4.0,2
|
||||
90,57,31,18,37,4.0,2
|
||||
87,45,19,13,16,4.0,2
|
||||
91,68,14,20,19,4.0,2
|
||||
86,55,29,35,108,4.0,2
|
||||
91,86,52,47,52,4.0,2
|
||||
88,46,15,33,55,4.0,2
|
||||
85,52,22,23,34,4.0,2
|
||||
89,72,33,27,55,4.0,2
|
||||
95,59,23,18,19,4.0,2
|
||||
94,43,154,82,121,4.0,2
|
||||
96,56,38,26,23,5.0,2
|
||||
90,52,10,17,12,5.0,2
|
||||
94,45,20,16,12,5.0,2
|
||||
99,42,14,21,49,5.0,2
|
||||
93,102,47,23,37,5.0,2
|
||||
94,71,25,26,31,5.0,2
|
||||
92,73,33,34,115,5.0,2
|
||||
87,54,41,29,23,6.0,1
|
||||
92,67,15,14,14,6.0,1
|
||||
98,101,31,26,32,6.0,1
|
||||
92,53,51,33,92,6.0,1
|
||||
97,94,43,43,82,6.0,1
|
||||
93,43,11,16,54,6.0,1
|
||||
93,68,24,18,19,6.0,1
|
||||
95,36,38,19,15,6.0,1
|
||||
99,86,58,42,203,6.0,1
|
||||
98,66,103,57,114,6.0,1
|
||||
92,80,10,26,20,6.0,1
|
||||
96,74,27,25,43,6.0,2
|
||||
95,93,21,27,47,6.0,2
|
||||
86,109,16,22,28,6.0,2
|
||||
91,46,30,24,39,7.0,2
|
||||
102,82,34,78,203,7.0,2
|
||||
85,50,12,18,14,7.0,2
|
||||
91,57,33,23,12,8.0,1
|
||||
91,52,76,32,24,8.0,1
|
||||
93,70,46,30,33,8.0,1
|
||||
87,55,36,19,25,8.0,1
|
||||
98,123,28,24,31,8.0,1
|
||||
82,55,18,23,44,8.0,2
|
||||
95,73,20,25,225,8.0,2
|
||||
97,80,17,20,53,8.0,2
|
||||
100,83,25,24,28,8.0,2
|
||||
88,91,56,35,126,9.0,2
|
||||
91,138,45,21,48,10.0,1
|
||||
92,41,37,22,37,10.0,1
|
||||
86,123,20,25,23,10.0,2
|
||||
91,93,35,34,37,10.0,2
|
||||
87,87,15,23,11,10.0,2
|
||||
87,56,52,43,55,10.0,2
|
||||
99,75,26,24,41,12.0,1
|
||||
96,69,53,43,203,12.0,2
|
||||
98,77,55,35,89,15.0,1
|
||||
91,68,27,26,14,16.0,1
|
||||
98,99,57,45,65,20.0,1
|
180
tests/datasets/test.arff
Executable file
180
tests/datasets/test.arff
Executable file
@@ -0,0 +1,180 @@
|
||||
% 1. Title: Test Feature extracted from Glass
|
||||
%
|
||||
|
||||
@RELATION test
|
||||
|
||||
@ATTRIBUTE Mg REAL
|
||||
@ATTRIBUTE Type {0,1,2,3,4,5,6}
|
||||
|
||||
@DATA
|
||||
3.5,0
|
||||
3.52,1
|
||||
1.74,2
|
||||
0.0,3
|
||||
2.85,3
|
||||
3.65,1
|
||||
2.84,0
|
||||
0.0,4
|
||||
3.9,3
|
||||
3.58,3
|
||||
3.25,3
|
||||
3.76,1
|
||||
3.45,1
|
||||
3.48,3
|
||||
0.0,4
|
||||
0.0,5
|
||||
2.96,3
|
||||
3.65,0
|
||||
0.0,3
|
||||
3.74,3
|
||||
3.66,0
|
||||
1.61,5
|
||||
3.49,3
|
||||
3.52,3
|
||||
3.54,3
|
||||
3.53,1
|
||||
3.56,3
|
||||
3.34,1
|
||||
0.0,2
|
||||
3.61,3
|
||||
3.6,0
|
||||
3.46,0
|
||||
2.72,3
|
||||
3.51,3
|
||||
3.09,3
|
||||
3.48,0
|
||||
3.8,3
|
||||
3.58,3
|
||||
3.54,1
|
||||
3.42,0
|
||||
2.68,5
|
||||
3.49,0
|
||||
3.68,3
|
||||
3.6,0
|
||||
3.59,3
|
||||
0.0,4
|
||||
3.54,0
|
||||
2.2,4
|
||||
3.59,0
|
||||
3.66,1
|
||||
3.87,3
|
||||
3.45,0
|
||||
3.82,0
|
||||
3.72,0
|
||||
3.33,0
|
||||
3.78,1
|
||||
2.24,2
|
||||
3.83,3
|
||||
0.78,2
|
||||
3.9,3
|
||||
3.58,0
|
||||
3.57,3
|
||||
3.52,0
|
||||
3.47,0
|
||||
3.48,0
|
||||
3.66,0
|
||||
3.62,0
|
||||
3.39,0
|
||||
0.0,3
|
||||
3.55,0
|
||||
1.01,3
|
||||
0.0,4
|
||||
3.43,0
|
||||
3.58,0
|
||||
0.0,4
|
||||
3.74,0
|
||||
0.0,4
|
||||
3.44,3
|
||||
3.97,3
|
||||
3.6,0
|
||||
3.64,3
|
||||
1.83,4
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,4
|
||||
3.73,0
|
||||
3.58,1
|
||||
3.34,4
|
||||
2.09,3
|
||||
2.71,0
|
||||
3.18,3
|
||||
3.43,0
|
||||
3.15,3
|
||||
3.56,0
|
||||
0.0,4
|
||||
0.0,4
|
||||
4.49,0
|
||||
3.59,0
|
||||
3.56,3
|
||||
3.52,3
|
||||
0.0,4
|
||||
0.0,5
|
||||
3.61,0
|
||||
3.74,0
|
||||
3.62,3
|
||||
3.84,0
|
||||
3.67,0
|
||||
3.58,0
|
||||
0.0,5
|
||||
3.66,3
|
||||
3.68,3
|
||||
2.28,3
|
||||
3.67,3
|
||||
3.2,4
|
||||
3.81,0
|
||||
0.0,3
|
||||
3.39,1
|
||||
3.57,0
|
||||
1.85,5
|
||||
3.75,0
|
||||
3.76,3
|
||||
0.0,3
|
||||
3.86,0
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,3
|
||||
3.5,0
|
||||
3.67,3
|
||||
3.55,3
|
||||
0.0,4
|
||||
3.61,3
|
||||
3.41,3
|
||||
3.7,0
|
||||
0.0,4
|
||||
3.58,0
|
||||
1.78,4
|
||||
3.85,0
|
||||
3.48,0
|
||||
1.71,5
|
||||
0.0,2
|
||||
3.5,3
|
||||
3.49,3
|
||||
3.36,1
|
||||
0.0,3
|
||||
3.54,0
|
||||
0.0,4
|
||||
2.87,0
|
||||
3.57,1
|
||||
3.48,0
|
||||
2.81,0
|
||||
0.0,4
|
||||
0.0,2
|
||||
3.62,3
|
||||
3.58,3
|
||||
3.45,3
|
||||
3.48,3
|
||||
3.54,3
|
||||
3.54,3
|
||||
3.37,0
|
||||
3.41,1
|
||||
3.48,0
|
||||
3.43,3
|
||||
3.5,0
|
||||
0.0,4
|
||||
3.54,0
|
||||
3.52,3
|
||||
3.52,3
|
||||
2.88,3
|
||||
2.19,2
|
222
tests/datasets/tests.txt
Normal file
222
tests/datasets/tests.txt
Normal file
@@ -0,0 +1,222 @@
|
||||
#
|
||||
# from, to, step, #bins, Q/U
|
||||
# discretized data
|
||||
# cut points
|
||||
#
|
||||
#
|
||||
# Range experiments
|
||||
#
|
||||
RANGE
|
||||
0, 100, 1, 4, Q
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.0, 24.75, 49.5, 74.25, 99.0
|
||||
RANGE
|
||||
0, 50, 1, 4, Q
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.0, 12.25, 24.5, 36.75, 49.0
|
||||
RANGE
|
||||
0, 100, 1, 3, Q
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.0, 33.0, 66.0, 99.0
|
||||
RANGE
|
||||
0, 50, 1, 3, Q
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.0, 16.33333, 32.66667, 49.0
|
||||
RANGE
|
||||
0, 10, 1, 3, Q
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
0.0, 3.0, 6.0, 9.0
|
||||
RANGE
|
||||
0, 100, 1, 4, U
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.0, 24.75, 49.5, 74.25, 99.0
|
||||
RANGE
|
||||
0, 50, 1, 4, U
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.0, 12.25, 24.5, 36.75, 49.0
|
||||
RANGE
|
||||
0, 100, 1, 3, U
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.0, 33.0, 66.0, 99.0
|
||||
RANGE
|
||||
0, 50, 1, 3, U
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.0, 16.33333, 32.66667, 49.0
|
||||
RANGE
|
||||
0, 10, 1, 3, U
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
0.0, 3.0, 6.0, 9.0
|
||||
RANGE
|
||||
1, 10, 1, 3, Q
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2
|
||||
1.0, 3.66667, 6.33333, 9.0
|
||||
RANGE
|
||||
1, 10, 1, 3, U
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2
|
||||
1.0, 3.66667, 6.33333, 9.0
|
||||
RANGE
|
||||
1, 11, 1, 3, Q
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.0, 7.0, 10.0
|
||||
RANGE
|
||||
1, 11, 1, 3, U
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.0, 7.0, 10.0
|
||||
RANGE
|
||||
1, 12, 1, 3, Q
|
||||
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.33333, 7.66667, 11.0
|
||||
RANGE
|
||||
1, 12, 1, 3, U
|
||||
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.33333, 7.66667, 11.0
|
||||
RANGE
|
||||
1, 13, 1, 3, Q
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.66667, 8.33333, 12.0
|
||||
RANGE
|
||||
1, 13, 1, 3, U
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.66667, 8.33333, 12.0
|
||||
RANGE
|
||||
1, 14, 1, 3, Q
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.0, 9.0, 13.0
|
||||
RANGE
|
||||
1, 14, 1, 3, U
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.0, 9.0, 13.0
|
||||
RANGE
|
||||
1, 15, 1, 3, Q
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.33333, 9.66667, 14.0
|
||||
RANGE
|
||||
1, 15, 1, 3, U
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.33333, 9.66667, 14.0
|
||||
#
|
||||
# Vector experiments
|
||||
#
|
||||
VECTOR
|
||||
Q3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
|
||||
1, 0, 0, 1, 0, 0, 1, 0, 0
|
||||
1.0, 1.66667, 3.0
|
||||
VECTOR
|
||||
U3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
|
||||
2, 0, 0, 2, 0, 0, 2, 0, 0
|
||||
1.0, 1.66667, 2.33333, 3.0
|
||||
VECTOR
|
||||
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.66667, 8.33333, 12.0
|
||||
VECTOR
|
||||
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.66667, 8.33333, 12.0
|
||||
VECTOR
|
||||
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.0, 9.0, 13.0
|
||||
VECTOR
|
||||
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.0, 9.0, 13.0
|
||||
VECTOR
|
||||
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.33333, 9.66667, 14.0
|
||||
VECTOR
|
||||
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.33333, 9.66667, 14.0
|
||||
VECTOR
|
||||
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.66667, 10.33333, 15.0
|
||||
VECTOR
|
||||
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.66667, 10.33333, 15.0
|
||||
VECTOR
|
||||
Q3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
||||
2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
|
||||
1.0, 5.66667, 10.33333, 15.0
|
||||
VECTOR
|
||||
U3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
||||
2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
|
||||
1.0, 5.66667, 10.33333, 15.0
|
||||
VECTOR
|
||||
Q3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
|
||||
0, 1, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
0.0, 1.0, 3.0, 4.0
|
||||
VECTOR
|
||||
U3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
|
||||
0, 0, 0, 0, 1, 1, 2, 2, 2, 2
|
||||
0.0, 1.33333, 2.66667, 4.0
|
||||
#
|
||||
# Vector experiments with iris
|
||||
#
|
||||
VECTOR
|
||||
Q3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1
|
||||
4.3, 5.4, 6.3, 7.9
|
||||
VECTOR
|
||||
U3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 2, 1, 1, 2, 0, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1
|
||||
4.3, 5.5, 6.7, 7.9
|
||||
VECTOR
|
||||
Q4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||
1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2
|
||||
4.3, 5.1, 5.8, 6.4, 7.9
|
||||
VECTOR
|
||||
U4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1
|
||||
4.3, 5.2, 6.1, 7.0, 7.9
|
||||
VECTOR
|
||||
Q3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||
2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 1, 1, 1, 1, 0, 1, 0, 2, 2, 0, 1, 0, 0, 2, 1, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 0, 1, 0, 2, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 0, 2, 2, 1, 0, 1, 2, 1
|
||||
2.0, 2.9, 3.2, 4.4
|
||||
VECTOR
|
||||
U3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||
1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1
|
||||
2.0, 2.8, 3.6, 4.4
|
||||
VECTOR
|
||||
Q4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||
3, 2, 2, 2, 3, 3, 3, 3, 1, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 0, 2, 3, 3, 2, 3, 2, 3, 3, 2, 2, 2, 0, 1, 1, 3, 0, 1, 0, 0, 2, 0, 1, 1, 2, 2, 0, 0, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 0, 0, 0, 0, 0, 2, 3, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 1, 1, 0, 1, 3, 0, 2, 1, 2, 2, 0, 1, 0, 3, 2, 0, 2, 0, 1, 2, 2, 3, 0, 0, 2, 1, 1, 0, 3, 2, 1, 2, 1, 2, 1, 3, 1, 1, 0, 2, 3, 2, 2, 2, 2, 2, 0, 2, 3, 2, 0, 2, 3, 2
|
||||
2.0, 2.8, 3.0, 3.3, 4.4
|
||||
VECTOR
|
||||
U4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||
2, 1, 2, 1, 2, 3, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 3, 2, 3, 3, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 3, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 3, 1, 3, 2, 2, 2, 2, 2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 2, 1, 3, 1, 0, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 0, 1, 2, 1
|
||||
2.0, 2.6, 3.2, 3.8, 4.4
|
||||
VECTOR
|
||||
Q3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
1.0, 2.63333, 4.9, 6.9
|
||||
VECTOR
|
||||
U3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
1.0, 2.96667, 4.93333, 6.9
|
||||
VECTOR
|
||||
Q4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 3, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3
|
||||
1.0, 1.6, 4.35, 5.1, 6.9
|
||||
VECTOR
|
||||
U4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, 3, 2, 2, 2, 2, 2
|
||||
1.0, 2.475, 3.95, 5.425, 6.9
|
||||
VECTOR
|
||||
Q3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.1, 0.86667, 1.6, 2.5
|
||||
VECTOR
|
||||
U3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.1, 0.9, 1.7, 2.5
|
||||
VECTOR
|
||||
Q4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 3, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.1, 0.3, 1.3, 1.8, 2.5
|
||||
VECTOR
|
||||
U4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2
|
||||
0.1, 0.7, 1.3, 1.9, 2.5
|
1
tests/lib/Files
Submodule
1
tests/lib/Files
Submodule
Submodule tests/lib/Files added at a5316928d4
12
tests/test
12
tests/test
@@ -1,12 +0,0 @@
|
||||
cmake -S . -B build -Wno-dev
|
||||
if test $? -ne 0; then
|
||||
echo "Error in creating build commands."
|
||||
exit 1
|
||||
fi
|
||||
cmake --build build
|
||||
if test $? -ne 0; then
|
||||
echo "Error in build command."
|
||||
exit 1
|
||||
fi
|
||||
cd build
|
||||
ctest --output-on-failure
|
71
tests/tests_do.py
Normal file
71
tests/tests_do.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# ***************************************************************
|
||||
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
# SPDX-FileType: SOURCE
|
||||
# SPDX-License-Identifier: MIT
|
||||
# ***************************************************************
|
||||
|
||||
import json
|
||||
from sklearn.preprocessing import KBinsDiscretizer
|
||||
|
||||
with open("datasets/tests.txt") as f:
|
||||
data = f.readlines()
|
||||
|
||||
data = [x.strip() for x in data if x[0] != "#"]
|
||||
|
||||
errors = False
|
||||
for i in range(0, len(data), 4):
|
||||
experiment_type = data[i]
|
||||
print("Experiment:", data[i + 1])
|
||||
if experiment_type == "RANGE":
|
||||
range_data = data[i + 1]
|
||||
from_, to_, step_, n_bins_, strategy_ = range_data.split(",")
|
||||
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
|
||||
else:
|
||||
strategy_ = data[i + 1][0]
|
||||
n_bins_ = data[i + 1][1]
|
||||
vector = data[i + 1][2:]
|
||||
X = [[float(x)] for x in json.loads(vector)]
|
||||
|
||||
strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
|
||||
disc = KBinsDiscretizer(
|
||||
n_bins=int(n_bins_),
|
||||
encode="ordinal",
|
||||
strategy=strategy,
|
||||
)
|
||||
expected_data = data[i + 2]
|
||||
cuts_data = data[i + 3]
|
||||
disc.fit(X)
|
||||
#
|
||||
# Normalize the cutpoints to remove numerical errors such as 33.0000000001
|
||||
# instead of 33
|
||||
#
|
||||
for j in range(len(disc.bin_edges_[0])):
|
||||
disc.bin_edges_[0][j] = round(disc.bin_edges_[0][j], 5)
|
||||
result = disc.transform(X)
|
||||
result = [int(x) for x in result.flatten()]
|
||||
expected = [int(x) for x in expected_data.split(",")]
|
||||
#
|
||||
# Check the Results
|
||||
#
|
||||
assert len(result) == len(expected)
|
||||
for j in range(len(result)):
|
||||
if result[j] != expected[j]:
|
||||
print("* Error at", j, "Expected=", expected[j], "Result=", result[j])
|
||||
errors = True
|
||||
expected_cuts = disc.bin_edges_[0]
|
||||
computed_cuts = [float(x) for x in cuts_data.split(",")]
|
||||
assert len(expected_cuts) == len(computed_cuts)
|
||||
for j in range(len(expected_cuts)):
|
||||
if round(expected_cuts[j], 5) != computed_cuts[j]:
|
||||
print(
|
||||
"* Error at",
|
||||
j,
|
||||
"Expected=",
|
||||
expected_cuts[j],
|
||||
"Result=",
|
||||
computed_cuts[j],
|
||||
)
|
||||
errors = True
|
||||
if errors:
|
||||
raise Exception("There were errors!")
|
||||
print("*** All tests run succesfully! ***")
|
209
tests/tests_generate.ipynb
Normal file
209
tests/tests_generate.ipynb
Normal file
@@ -0,0 +1,209 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import KBinsDiscretizer\n",
|
||||
"from sklearn.datasets import load_iris"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiments_range = [\n",
|
||||
" [0, 100, 1, 4, \"Q\"],\n",
|
||||
" [0, 50, 1, 4, \"Q\"],\n",
|
||||
" [0, 100, 1, 3, \"Q\"],\n",
|
||||
" [0, 50, 1, 3, \"Q\"],\n",
|
||||
" [0, 10, 1, 3, \"Q\"],\n",
|
||||
" [0, 100, 1, 4, \"U\"],\n",
|
||||
" [0, 50, 1, 4, \"U\"],\n",
|
||||
" [0, 100, 1, 3, \"U\"],\n",
|
||||
" [0, 50, 1, 3, \"U\"],\n",
|
||||
"# \n",
|
||||
" [0, 10, 1, 3, \"U\"],\n",
|
||||
" [1, 10, 1, 3, \"Q\"],\n",
|
||||
" [1, 10, 1, 3, \"U\"],\n",
|
||||
" [1, 11, 1, 3, \"Q\"],\n",
|
||||
" [1, 11, 1, 3, \"U\"],\n",
|
||||
" [1, 12, 1, 3, \"Q\"],\n",
|
||||
" [1, 12, 1, 3, \"U\"],\n",
|
||||
" [1, 13, 1, 3, \"Q\"],\n",
|
||||
" [1, 13, 1, 3, \"U\"],\n",
|
||||
" [1, 14, 1, 3, \"Q\"],\n",
|
||||
" [1, 14, 1, 3, \"U\"],\n",
|
||||
" [1, 15, 1, 3, \"Q\"],\n",
|
||||
" [1, 15, 1, 3, \"U\"]\n",
|
||||
"]\n",
|
||||
"experiments_vectors = [\n",
|
||||
" (3, [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]),\n",
|
||||
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]),\n",
|
||||
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]),\n",
|
||||
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]),\n",
|
||||
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]),\n",
|
||||
" (3, [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]),\n",
|
||||
" (3, [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0])\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/rmontanana/miniconda3/lib/python3.11/site-packages/sklearn/preprocessing/_discretization.py:307: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def write_lists(file, data, cuts):\n",
|
||||
" sep = \"\"\n",
|
||||
" for res in data:\n",
|
||||
" file.write(f\"{sep}{int(res):d}\")\n",
|
||||
" sep= \", \"\n",
|
||||
" file.write(\"\\n\")\n",
|
||||
" sep = \"\"\n",
|
||||
" for res in cuts:\n",
|
||||
" file.write(sep + str(round(res,5)))\n",
|
||||
" sep = \", \"\n",
|
||||
" file.write(\"\\n\")\n",
|
||||
"\n",
|
||||
"def normalize_cuts(cuts):\n",
|
||||
" #\n",
|
||||
" # Normalize the cutpoints to remove numerical errors such as 33.0000000001\n",
|
||||
" # instead of 33\n",
|
||||
" #\n",
|
||||
" for k in range(cuts.shape[0]):\n",
|
||||
" for i in range(len(cuts[k])):\n",
|
||||
" cuts[k][i] = round(cuts[k][i], 5)\n",
|
||||
"\n",
|
||||
"with open(\"datasets/tests.txt\", \"w\") as file:\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" file.write(\"# from, to, step, #bins, Q/U\\n\")\n",
|
||||
" file.write(\"# discretized data\\n\")\n",
|
||||
" file.write(\"# cut points\\n\")\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" #\n",
|
||||
" # Range experiments\n",
|
||||
" #\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" file.write(\"# Range experiments\\n\")\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" for experiment in experiments_range:\n",
|
||||
" file.write(\"RANGE\\n\")\n",
|
||||
" (from_, to_, step_, bins_, strategy) = experiment\n",
|
||||
" disc = KBinsDiscretizer(n_bins=bins_, encode='ordinal', strategy='quantile' if strategy.strip() == \"Q\" else 'uniform')\n",
|
||||
" data = [[x] for x in range(from_, to_, step_)]\n",
|
||||
" disc.fit(data)\n",
|
||||
" normalize_cuts(disc.bin_edges_)\n",
|
||||
" result = disc.transform(data)\n",
|
||||
" file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n",
|
||||
" write_lists(file, result, disc.bin_edges_[0])\n",
|
||||
" #\n",
|
||||
" # Vector experiments\n",
|
||||
" #\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" file.write(\"# Vector experiments\\n\")\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" for n_bins, experiment in experiments_vectors:\n",
|
||||
" for strategy in [\"Q\", \"U\"]:\n",
|
||||
" file.write(\"VECTOR\\n\")\n",
|
||||
" file.write(f\"{strategy}{n_bins}{experiment}\\n\")\n",
|
||||
" disc = KBinsDiscretizer(\n",
|
||||
" n_bins=n_bins,\n",
|
||||
" encode=\"ordinal\",\n",
|
||||
" \n",
|
||||
" strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\",\n",
|
||||
" )\n",
|
||||
" data = [[x] for x in experiment]\n",
|
||||
" disc.fit(data)\n",
|
||||
" normalize_cuts(disc.bin_edges_)\n",
|
||||
" result = disc.transform(data)\n",
|
||||
" write_lists(file, result, disc.bin_edges_[0])\n",
|
||||
" #\n",
|
||||
" # Vector experiments iris\n",
|
||||
" #\n",
|
||||
" file.write(\"#\\n\");\n",
|
||||
" file.write(\"# Vector experiments with iris\\n\");\n",
|
||||
" file.write(\"#\\n\");\n",
|
||||
" X, y = load_iris(return_X_y=True)\n",
|
||||
" for i in range(X.shape[1]):\n",
|
||||
" for n_bins in [3, 4]:\n",
|
||||
" for strategy in [\"Q\", \"U\"]:\n",
|
||||
" file.write(\"VECTOR\\n\")\n",
|
||||
" experiment = X[:, i]\n",
|
||||
" file.write(f\"{strategy}{n_bins}{experiment.tolist()}\\n\")\n",
|
||||
" disc = KBinsDiscretizer(\n",
|
||||
" n_bins=n_bins,\n",
|
||||
" encode=\"ordinal\",\n",
|
||||
" strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\")\n",
|
||||
" data = [[x] for x in experiment]\n",
|
||||
" disc.fit(data)\n",
|
||||
" normalize_cuts(disc.bin_edges_)\n",
|
||||
" result = disc.transform(data)\n",
|
||||
" write_lists(file, result, disc.bin_edges_[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Cut points: [array([ 0., 33., 66., 99.])]\n",
|
||||
"Mistaken transformed data disc.transform([[33]]) = [[0.]]\n",
|
||||
"Reason of the mistake the cutpoint has decimals (double): 33.00000000000001\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#\n",
|
||||
"# Proving the mistakes due to floating point precision\n",
|
||||
"#\n",
|
||||
"from sklearn.preprocessing import KBinsDiscretizer\n",
|
||||
"\n",
|
||||
"data = [[x] for x in range(100)]\n",
|
||||
"disc = KBinsDiscretizer(n_bins=3, encode=\"ordinal\", strategy=\"quantile\")\n",
|
||||
"disc.fit(data)\n",
|
||||
"print(\"Cut points: \", disc.bin_edges_)\n",
|
||||
"print(\"Mistaken transformed data disc.transform([[33]]) =\", disc.transform([[33]]))\n",
|
||||
"print(\"Reason of the mistake the cutpoint has decimals (double): \", disc.bin_edges_[0][1])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.1.undefined"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@@ -1,16 +0,0 @@
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
typedef float precision_t;
|
||||
typedef vector<precision_t> samples_t;
|
||||
typedef vector<int> labels_t;
|
||||
typedef vector<size_t> indices_t;
|
||||
typedef vector<precision_t> cutPoints_t;
|
||||
typedef map<tuple<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
38
update_coverage.py
Normal file
38
update_coverage.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# ***************************************************************
|
||||
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
# SPDX-FileType: SOURCE
|
||||
# SPDX-License-Identifier: MIT
|
||||
# ***************************************************************
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
readme_file = "README.md"
|
||||
print("Updating coverage...")
|
||||
# Generate badge line
|
||||
output = subprocess.check_output(
|
||||
"lcov --summary " + sys.argv[1] + "/coverage.info",
|
||||
shell=True,
|
||||
)
|
||||
value = output.decode("utf-8").strip()
|
||||
percentage = 0
|
||||
for line in value.splitlines():
|
||||
if "lines" in line:
|
||||
percentage = float(line.split(":")[1].split("%")[0])
|
||||
break
|
||||
print(f"Coverage: {percentage}%")
|
||||
if percentage < 90:
|
||||
print("⛔Coverage is less than 90%. I won't update the badge.")
|
||||
sys.exit(1)
|
||||
percentage_label = str(percentage).replace(".", ",")
|
||||
coverage_line = f"[](html/index.html)"
|
||||
# Update README.md
|
||||
with open(readme_file, "r") as f:
|
||||
lines = f.readlines()
|
||||
with open(readme_file, "w") as f:
|
||||
for line in lines:
|
||||
if "img.shields.io/badge/Coverage" in line:
|
||||
f.write(coverage_line + "\n")
|
||||
else:
|
||||
f.write(line)
|
||||
print(f"✅Coverage updated with value: {percentage}")
|
Reference in New Issue
Block a user