mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 23:45:57 +00:00
Compare commits
128 Commits
v1.0.0.0
...
8a1b68376d
Author | SHA1 | Date | |
---|---|---|---|
8a1b68376d
|
|||
|
563a84659f | ||
1b9d924ebe
|
|||
08d8910b34
|
|||
|
6d8b55a808 | ||
c1759ba1ce
|
|||
f1dae498ac
|
|||
4418ea8a6f
|
|||
159e24b5cb
|
|||
77e28e728e
|
|||
18db982dec
|
|||
99b751a4d4
|
|||
059fd33b4e
|
|||
e068bf0a54
|
|||
|
cfb993f5ec | ||
7d62d6af4a
|
|||
ea70535984
|
|||
2d8b949abd
|
|||
ab12622009
|
|||
248a511972
|
|||
d9bd0126f9
|
|||
210af46a88
|
|||
2db60e007d
|
|||
1cf245fa49
|
|||
|
e36d9af8f9 | ||
7b0673fd4b
|
|||
a1346e1943
|
|||
b3fc598c29
|
|||
cc1efa0b4e
|
|||
90965877eb
|
|||
c4e6c041fe
|
|||
7938df7f0f
|
|||
7ee9896734
|
|||
8f7f605670
|
|||
2f55b27691
|
|||
378fbd51ef
|
|||
402d0da878
|
|||
f34bcc2ed7
|
|||
c9ba35fb58
|
|||
e205668906
|
|||
633aa52849
|
|||
61de687476
|
|||
7ff88c8e4b
|
|||
|
638bb2a59e | ||
|
f258fc220f | ||
0beeda320d
|
|||
6b68a41c42
|
|||
236d1b2f8b
|
|||
52ee93178f
|
|||
eeda4347e9
|
|||
5708dc3de9
|
|||
fbffc3a9c4
|
|||
ab3786e2a2
|
|||
be1917d05b
|
|||
5679d607e5
|
|||
e8559faf1f
|
|||
b21e85f5e8
|
|||
|
db76afc4e2 | ||
a1f26a257c
|
|||
22997f5d69
|
|||
ef16488ffa
|
|||
449bf3a67e
|
|||
e689d1f69c
|
|||
d77d27459b
|
|||
49c08bfe12
|
|||
62e9276fbf
|
|||
c52c7d0828
|
|||
|
0b35a15d62 | ||
c662a96da8
|
|||
0ead15be7c
|
|||
da41a9317d
|
|||
42e83b3d26
|
|||
77135739cf
|
|||
27ea3bf338
|
|||
12222f7903
|
|||
cfade7a556
|
|||
f0845c5bd1
|
|||
1f4abade2c
|
|||
770502c8e5
|
|||
ed7433672d
|
|||
14860ea0b9
|
|||
d9a6f528f6
|
|||
7551b0d669
|
|||
ffb8df4d1c
|
|||
ed784736ca
|
|||
49e9dd3e12
|
|||
083a56b311
|
|||
4492252729
|
|||
c00b7a613c
|
|||
200015000c
|
|||
ce9ddb3be3
|
|||
90428218c2
|
|||
0b63d9ace0
|
|||
6875127394
|
|||
747f610ce9
|
|||
a7d13f602d
|
|||
552b03afc9
|
|||
4a9664c4aa
|
|||
964555de20
|
|||
d6cece1006
|
|||
|
e25ca378f0 | ||
71c1dc2928
|
|||
ebea31afd1
|
|||
89d675eb1f
|
|||
e8fcc20a32
|
|||
848ee7ba24
|
|||
|
32a6fd9ba0 | ||
cd04f97fd0
|
|||
458a313aee
|
|||
e97aea2a4d
|
|||
4707bc0b7f
|
|||
8c868981e8
|
|||
e812e91540
|
|||
dddeea4024
|
|||
5b7d66d922
|
|||
|
5d5eb98afc | ||
a44f01460a
|
|||
de25ba78bd
|
|||
79c029832a
|
|||
5bb0e1e6ca
|
|||
dec1295933
|
|||
04c1772019
|
|||
e37702dcb0
|
|||
1c7492d3b6
|
|||
2ab828b400
|
|||
|
1b89f5927c
|
||
7b20bde428
|
|||
13ebb43bf3
|
11
.conan/profiles/default
Normal file
11
.conan/profiles/default
Normal file
@@ -0,0 +1,11 @@
|
||||
[settings]
|
||||
os=Linux
|
||||
arch=x86_64
|
||||
compiler=gcc
|
||||
compiler.version=11
|
||||
compiler.libcxx=libstdc++11
|
||||
build_type=Release
|
||||
|
||||
[conf]
|
||||
tools.system.package_manager:mode=install
|
||||
tools.system.package_manager:sudo=True
|
16
.devcontainer/Dockerfile
Normal file
16
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
||||
FROM mcr.microsoft.com/devcontainers/cpp:0-ubuntu-22.04
|
||||
|
||||
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||
&& apt-get -y install --no-install-recommends \
|
||||
python3 \
|
||||
python3-pip \
|
||||
lcov \
|
||||
cmake \
|
||||
&& apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip3 install --no-cache-dir \
|
||||
cpplint \
|
||||
cmake-format\
|
||||
gcovr
|
||||
# [Optional] Uncomment this section to install additional vcpkg ports.
|
||||
# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install <your-port-name-here>"
|
32
.devcontainer/devcontainer.json
Normal file
32
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,32 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/cpp
|
||||
{
|
||||
"name": "C++",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile"
|
||||
},
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
// "postCreateCommand": "gcc -v",
|
||||
// Configure tool-specific properties.
|
||||
"customizations": {
|
||||
// Configure properties specific to VS Code.
|
||||
"vscode": {
|
||||
"settings": {},
|
||||
"extensions": [
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.cpptools-extension-pack",
|
||||
"ms-vscode.cpptools-themes",
|
||||
"jbenden.c-cpp-flylint",
|
||||
"matepek.vscode-catch2-test-adapter",
|
||||
"ms-vscode.cmake-tools",
|
||||
"GitHub.copilot"
|
||||
]
|
||||
}
|
||||
}
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
}
|
59
.devcontainer/reinstall-cmake.sh
Normal file
59
.devcontainer/reinstall-cmake.sh
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env bash
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information.
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
#
|
||||
set -e
|
||||
|
||||
CMAKE_VERSION=${1:-"none"}
|
||||
|
||||
if [ "${CMAKE_VERSION}" = "none" ]; then
|
||||
echo "No CMake version specified, skipping CMake reinstallation"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Cleanup temporary directory and associated files when exiting the script.
|
||||
cleanup() {
|
||||
EXIT_CODE=$?
|
||||
set +e
|
||||
if [[ -n "${TMP_DIR}" ]]; then
|
||||
echo "Executing cleanup of tmp files"
|
||||
rm -Rf "${TMP_DIR}"
|
||||
fi
|
||||
exit $EXIT_CODE
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
|
||||
echo "Installing CMake..."
|
||||
apt-get -y purge --auto-remove cmake
|
||||
mkdir -p /opt/cmake
|
||||
|
||||
architecture=$(dpkg --print-architecture)
|
||||
case "${architecture}" in
|
||||
arm64)
|
||||
ARCH=aarch64 ;;
|
||||
amd64)
|
||||
ARCH=x86_64 ;;
|
||||
*)
|
||||
echo "Unsupported architecture ${architecture}."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
CMAKE_BINARY_NAME="cmake-${CMAKE_VERSION}-linux-${ARCH}.sh"
|
||||
CMAKE_CHECKSUM_NAME="cmake-${CMAKE_VERSION}-SHA-256.txt"
|
||||
TMP_DIR=$(mktemp -d -t cmake-XXXXXXXXXX)
|
||||
|
||||
echo "${TMP_DIR}"
|
||||
cd "${TMP_DIR}"
|
||||
|
||||
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_BINARY_NAME}" -O
|
||||
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_CHECKSUM_NAME}" -O
|
||||
|
||||
sha256sum -c --ignore-missing "${CMAKE_CHECKSUM_NAME}"
|
||||
sh "${TMP_DIR}/${CMAKE_BINARY_NAME}" --prefix=/opt/cmake --skip-license
|
||||
|
||||
ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
|
||||
ln -s /opt/cmake/bin/ctest /usr/local/bin/ctest
|
44
.github/workflows/build.yml
vendored
Normal file
44
.github/workflows/build.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Build
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- "*"
|
||||
pull_request:
|
||||
types: [ opened, synchronize, reopened ]
|
||||
jobs:
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
|
||||
submodules: recursive
|
||||
- name: Install sonar-scanner and build-wrapper
|
||||
uses: SonarSource/sonarcloud-github-c-cpp@v2
|
||||
- name: Install lcov & gcovr
|
||||
run: |
|
||||
sudo apt-get -y install lcov
|
||||
sudo apt-get -y install gcovr
|
||||
- name: Install Libtorch
|
||||
run: |
|
||||
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcpu.zip
|
||||
unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip
|
||||
- name: Tests & build-wrapper
|
||||
run: |
|
||||
cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch -DCMAKE_BUILD_TYPE=Debug -DENABLE_TESTING=ON
|
||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Debug
|
||||
cmake --build build -j 4
|
||||
cd build
|
||||
ctest -C Debug --output-on-failure -j 4
|
||||
gcovr -f ../src/CPPFImdlp.cpp -f ../src/Metrics.cpp -f ../src/BinDisc.cpp -f ../src/Discretizer.cpp --txt --sonarqube=coverage.xml
|
||||
- name: Run sonar-scanner
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
run: |
|
||||
sonar-scanner --define sonar.cfamily.compile-commands="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
|
||||
--define sonar.coverageReportPaths=build/coverage.xml
|
6
.gitignore
vendored
6
.gitignore
vendored
@@ -31,7 +31,13 @@
|
||||
*.out
|
||||
*.app
|
||||
**/build
|
||||
build_Debug
|
||||
build_Release
|
||||
build_debug
|
||||
build_release
|
||||
**/lcoverage
|
||||
.idea
|
||||
cmake-*
|
||||
**/CMakeFiles
|
||||
**/gcovr-report
|
||||
CMakeUserPresets.json
|
||||
|
42
.vscode/launch.json
vendored
Normal file
42
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "C++ Launch config",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/tests/build/BinDisc_unittest",
|
||||
"cwd": "${workspaceFolder}/tests/build",
|
||||
"args": [],
|
||||
"launchCompleteCommand": "exec-run",
|
||||
"stopAtEntry": false,
|
||||
"linux": {
|
||||
"MIMode": "gdb",
|
||||
"miDebuggerPath": "/usr/bin/gdb",
|
||||
"setupCommands": [
|
||||
{
|
||||
"description": "Enable pretty-printing for gdb",
|
||||
"text": "-enable-pretty-printing",
|
||||
"ignoreFailures": true
|
||||
},
|
||||
{
|
||||
"description": "Auto load symbols when loading an .so file",
|
||||
"text": "set auto-solib-add",
|
||||
"ignoreFailures": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"osx": {
|
||||
"type": "lldb",
|
||||
"MIMode": "lldb"
|
||||
},
|
||||
"windows": {
|
||||
"MIMode": "gdb",
|
||||
"miDebuggerPath": "C:\\MinGw\\bin\\gdb.exe"
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
113
.vscode/settings.json
vendored
Normal file
113
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
{
|
||||
"sonarlint.connectedMode.project": {
|
||||
"connectionId": "rmontanana",
|
||||
"projectKey": "rmontanana_mdlp"
|
||||
},
|
||||
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
|
||||
"cmake.configureOnOpen": true,
|
||||
"sonarlint.pathToCompileCommands": "${workspaceFolder}/build/compile_commands.json",
|
||||
"files.associations": {
|
||||
"*.rmd": "markdown",
|
||||
"*.py": "python",
|
||||
"vector": "cpp",
|
||||
"__bit_reference": "cpp",
|
||||
"__bits": "cpp",
|
||||
"__config": "cpp",
|
||||
"__debug": "cpp",
|
||||
"__errc": "cpp",
|
||||
"__hash_table": "cpp",
|
||||
"__locale": "cpp",
|
||||
"__mutex_base": "cpp",
|
||||
"__node_handle": "cpp",
|
||||
"__nullptr": "cpp",
|
||||
"__split_buffer": "cpp",
|
||||
"__string": "cpp",
|
||||
"__threading_support": "cpp",
|
||||
"__tuple": "cpp",
|
||||
"array": "cpp",
|
||||
"atomic": "cpp",
|
||||
"bitset": "cpp",
|
||||
"cctype": "cpp",
|
||||
"chrono": "cpp",
|
||||
"clocale": "cpp",
|
||||
"cmath": "cpp",
|
||||
"compare": "cpp",
|
||||
"complex": "cpp",
|
||||
"concepts": "cpp",
|
||||
"cstdarg": "cpp",
|
||||
"cstddef": "cpp",
|
||||
"cstdint": "cpp",
|
||||
"cstdio": "cpp",
|
||||
"cstdlib": "cpp",
|
||||
"cstring": "cpp",
|
||||
"ctime": "cpp",
|
||||
"cwchar": "cpp",
|
||||
"cwctype": "cpp",
|
||||
"exception": "cpp",
|
||||
"initializer_list": "cpp",
|
||||
"ios": "cpp",
|
||||
"iosfwd": "cpp",
|
||||
"istream": "cpp",
|
||||
"limits": "cpp",
|
||||
"locale": "cpp",
|
||||
"memory": "cpp",
|
||||
"mutex": "cpp",
|
||||
"new": "cpp",
|
||||
"optional": "cpp",
|
||||
"ostream": "cpp",
|
||||
"ratio": "cpp",
|
||||
"sstream": "cpp",
|
||||
"stdexcept": "cpp",
|
||||
"streambuf": "cpp",
|
||||
"string": "cpp",
|
||||
"string_view": "cpp",
|
||||
"system_error": "cpp",
|
||||
"tuple": "cpp",
|
||||
"type_traits": "cpp",
|
||||
"typeinfo": "cpp",
|
||||
"unordered_map": "cpp",
|
||||
"variant": "cpp",
|
||||
"algorithm": "cpp",
|
||||
"iostream": "cpp",
|
||||
"iomanip": "cpp",
|
||||
"numeric": "cpp",
|
||||
"set": "cpp",
|
||||
"__tree": "cpp",
|
||||
"deque": "cpp",
|
||||
"list": "cpp",
|
||||
"map": "cpp",
|
||||
"unordered_set": "cpp",
|
||||
"any": "cpp",
|
||||
"condition_variable": "cpp",
|
||||
"forward_list": "cpp",
|
||||
"fstream": "cpp",
|
||||
"stack": "cpp",
|
||||
"thread": "cpp",
|
||||
"__memory": "cpp",
|
||||
"filesystem": "cpp",
|
||||
"*.toml": "toml",
|
||||
"utility": "cpp",
|
||||
"span": "cpp",
|
||||
"*.tcc": "cpp",
|
||||
"bit": "cpp",
|
||||
"charconv": "cpp",
|
||||
"cinttypes": "cpp",
|
||||
"codecvt": "cpp",
|
||||
"functional": "cpp",
|
||||
"iterator": "cpp",
|
||||
"memory_resource": "cpp",
|
||||
"random": "cpp",
|
||||
"source_location": "cpp",
|
||||
"format": "cpp",
|
||||
"numbers": "cpp",
|
||||
"semaphore": "cpp",
|
||||
"stop_token": "cpp",
|
||||
"text_encoding": "cpp",
|
||||
"typeindex": "cpp",
|
||||
"valarray": "cpp",
|
||||
"csignal": "cpp",
|
||||
"regex": "cpp",
|
||||
"future": "cpp",
|
||||
"shared_mutex": "cpp"
|
||||
}
|
||||
}
|
26
.vscode/tasks.json
vendored
Normal file
26
.vscode/tasks.json
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"type": "cmake",
|
||||
"label": "CMake: build",
|
||||
"command": "build",
|
||||
"targets": [
|
||||
"all"
|
||||
],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"problemMatcher": [],
|
||||
"detail": "CMake template build task"
|
||||
},
|
||||
{
|
||||
"type": "cmake",
|
||||
"label": "CMake: configure",
|
||||
"command": "configure",
|
||||
"problemMatcher": [],
|
||||
"detail": "CMake template configure task"
|
||||
}
|
||||
]
|
||||
}
|
222
CHANGELOG.md
Normal file
222
CHANGELOG.md
Normal file
@@ -0,0 +1,222 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [2.1.1] - 2025-07-17
|
||||
|
||||
### Internal Changes
|
||||
|
||||
- Updated Libtorch to version 2.7.1
|
||||
- Updated ArffFiles library to version 1.2.1
|
||||
- Enhance CMake configuration for better compatibility
|
||||
|
||||
## [2.1.0] - 2025-06-28
|
||||
|
||||
### Added
|
||||
|
||||
- Conan dependency manager support
|
||||
- Technical analysis report
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated README.md
|
||||
- Refactored library version and installation system
|
||||
- Updated config variable names
|
||||
|
||||
### Fixed
|
||||
|
||||
- Removed unneeded semicolon
|
||||
|
||||
## [2.0.1] - 2024-07-22
|
||||
|
||||
### Added
|
||||
|
||||
- CMake install target and make install command
|
||||
- Flag to control sample building in Makefile
|
||||
|
||||
### Changed
|
||||
|
||||
- Library name changed to `fimdlp`
|
||||
- Updated version numbers across test files
|
||||
|
||||
### Fixed
|
||||
|
||||
- Version number consistency in tests
|
||||
|
||||
## [2.0.0] - 2024-07-04
|
||||
|
||||
### Added
|
||||
|
||||
- Makefile with build & test actions for easier development
|
||||
- PyTorch (libtorch) integration for tensor operations
|
||||
|
||||
### Changed
|
||||
|
||||
- Major refactoring of build system
|
||||
- Updated build workflows and CI configuration
|
||||
|
||||
### Fixed
|
||||
|
||||
- BinDisc quantile calculation errors (#9)
|
||||
- Error in percentile method calculation
|
||||
- Integer type issues in calculations
|
||||
- Multiple GitHub Actions configuration fixes
|
||||
|
||||
## [1.2.1] - 2024-06-08
|
||||
|
||||
### Added
|
||||
|
||||
- PyTorch tensor methods for discretization
|
||||
- Improved library build system
|
||||
|
||||
### Changed
|
||||
|
||||
- Refactored sample build process
|
||||
|
||||
### Fixed
|
||||
|
||||
- Library creation and linking issues
|
||||
- Multiple GitHub Actions workflow fixes
|
||||
|
||||
## [1.2.0] - 2024-06-05
|
||||
|
||||
### Added
|
||||
|
||||
- **Discretizer** - Abstract base class for all discretization algorithms (#8)
|
||||
- **BinDisc** - K-bins discretization with quantile and uniform strategies (#7)
|
||||
- Transform method to discretize values using existing cut points
|
||||
- Support for multiple datasets in sample program
|
||||
- Docker development container configuration
|
||||
|
||||
### Changed
|
||||
|
||||
- Refactored system types throughout the library
|
||||
- Improved sample program with better dataset handling
|
||||
- Enhanced build system with debug options
|
||||
|
||||
### Fixed
|
||||
|
||||
- Transform method initialization issues
|
||||
- ARFF file attribute name extraction
|
||||
- Sample program library binary separation
|
||||
|
||||
## [1.1.3] - 2024-06-05
|
||||
|
||||
### Added
|
||||
|
||||
- `max_cutpoints` hyperparameter for controlling algorithm complexity
|
||||
- `max_depth` and `min_length` as configurable hyperparameters
|
||||
- Enhanced sample program with hyperparameter support
|
||||
- Additional datasets for testing
|
||||
|
||||
### Changed
|
||||
|
||||
- Improved constructor design and parameter handling
|
||||
- Enhanced test coverage and reporting
|
||||
- Refactored build system configuration
|
||||
|
||||
### Fixed
|
||||
|
||||
- Depth initialization in fit method
|
||||
- Code quality improvements and smell fixes
|
||||
- Exception handling in value cut point calculations
|
||||
|
||||
## [1.1.2] - 2023-04-01
|
||||
|
||||
### Added
|
||||
|
||||
- Comprehensive test suite with GitHub Actions CI
|
||||
- SonarCloud integration for code quality analysis
|
||||
- Enhanced build system with automated testing
|
||||
|
||||
### Changed
|
||||
|
||||
- Improved GitHub Actions workflow configuration
|
||||
- Updated project structure for better maintainability
|
||||
|
||||
### Fixed
|
||||
|
||||
- Build system configuration issues
|
||||
- Test execution and coverage reporting
|
||||
|
||||
## [1.1.1] - 2023-02-22
|
||||
|
||||
### Added
|
||||
|
||||
- Limits header for proper compilation
|
||||
- Enhanced build system support
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated version numbering system
|
||||
- Improved SonarCloud configuration
|
||||
|
||||
### Fixed
|
||||
|
||||
- ValueCutPoint exception handling (removed unnecessary exception)
|
||||
- Build system compatibility issues
|
||||
- GitHub Actions token configuration
|
||||
|
||||
## [1.1.0] - 2023-02-21
|
||||
|
||||
### Added
|
||||
|
||||
- Classic algorithm implementation for performance comparison
|
||||
- Enhanced ValueCutPoint logic with same_values detection
|
||||
- Glass dataset support in sample program
|
||||
- Debug configuration for development
|
||||
|
||||
### Changed
|
||||
|
||||
- Refactored ValueCutPoint algorithm for better accuracy
|
||||
- Improved candidate selection logic
|
||||
- Enhanced sample program with multiple datasets
|
||||
|
||||
### Fixed
|
||||
|
||||
- Sign error in valueCutPoint calculation
|
||||
- Final cut value computation
|
||||
- Duplicate dataset handling in sample
|
||||
|
||||
## [1.0.0.0] - 2022-12-21
|
||||
|
||||
### Added
|
||||
|
||||
- Initial release of MDLP (Minimum Description Length Principle) discretization library
|
||||
- Core CPPFImdlp algorithm implementation based on Fayyad & Irani's paper
|
||||
- Entropy and information gain calculation methods
|
||||
- Sample program demonstrating library usage
|
||||
- CMake build system
|
||||
- Basic test suite
|
||||
- ARFF file format support for datasets
|
||||
|
||||
### Features
|
||||
|
||||
- Recursive discretization using entropy-based criteria
|
||||
- Stable sorting with tie-breaking for identical values
|
||||
- Configurable algorithm parameters
|
||||
- Cross-platform C++ implementation
|
||||
|
||||
---
|
||||
|
||||
## Release Notes
|
||||
|
||||
### Version 2.x
|
||||
|
||||
- **Breaking Changes**: Library renamed to `fimdlp`
|
||||
- **Major Enhancement**: PyTorch integration for improved performance
|
||||
- **New Features**: Comprehensive discretization framework with multiple algorithms
|
||||
|
||||
### Version 1.x
|
||||
|
||||
- **Core Algorithm**: MDLP discretization implementation
|
||||
- **Extensibility**: Hyperparameter support and algorithm variants
|
||||
- **Quality**: Comprehensive testing and CI/CD pipeline
|
||||
|
||||
### Version 1.0.x
|
||||
|
||||
- **Foundation**: Initial stable implementation
|
||||
- **Algorithm**: Core MDLP discretization functionality
|
77
CLAUDE.md
Normal file
77
CLAUDE.md
Normal file
@@ -0,0 +1,77 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
This is a C++ implementation of the MDLP (Minimum Description Length Principle) discretization algorithm based on Fayyad & Irani's paper. The library provides discretization methods for continuous-valued attributes in classification learning.
|
||||
|
||||
## Build System
|
||||
|
||||
The project uses CMake with a Makefile wrapper for common tasks:
|
||||
|
||||
### Common Commands
|
||||
- `make build` - Build release version with sample program
|
||||
- `make test` - Run full test suite with coverage report
|
||||
- `make install` - Install the library
|
||||
|
||||
### Build Configurations
|
||||
- **Release**: Built in `build_release/` directory
|
||||
- **Debug**: Built in `build_debug/` directory (for testing)
|
||||
|
||||
### Dependencies
|
||||
- PyTorch (libtorch) - Required dependency
|
||||
- GoogleTest - Fetched automatically for testing
|
||||
- Coverage tools: lcov, genhtml
|
||||
|
||||
## Code Architecture
|
||||
|
||||
### Core Components
|
||||
|
||||
1. **Discretizer** (`src/Discretizer.h/cpp`) - Abstract base class for all discretizers
|
||||
2. **CPPFImdlp** (`src/CPPFImdlp.h/cpp`) - Main MDLP algorithm implementation
|
||||
3. **BinDisc** (`src/BinDisc.h/cpp`) - K-bins discretization (quantile/uniform strategies)
|
||||
4. **Metrics** (`src/Metrics.h/cpp`) - Entropy and information gain calculations
|
||||
|
||||
### Key Data Types
|
||||
- `samples_t` - Input data samples
|
||||
- `labels_t` - Classification labels
|
||||
- `indices_t` - Index arrays for sorting/processing
|
||||
- `precision_t` - Floating-point precision type
|
||||
|
||||
### Algorithm Flow
|
||||
1. Data is sorted using labels as tie-breakers for identical values
|
||||
2. MDLP recursively finds optimal cut points using entropy-based criteria
|
||||
3. Cut points are validated to ensure meaningful splits
|
||||
4. Transform method maps continuous values to discrete bins
|
||||
|
||||
## Testing
|
||||
|
||||
Tests are built with GoogleTest and include:
|
||||
- `Metrics_unittest` - Entropy/information gain tests
|
||||
- `FImdlp_unittest` - Core MDLP algorithm tests
|
||||
- `BinDisc_unittest` - K-bins discretization tests
|
||||
- `Discretizer_unittest` - Base class functionality tests
|
||||
|
||||
### Running Tests
|
||||
```bash
|
||||
make test # Runs all tests and generates coverage report
|
||||
cd build_debug/tests && ctest # Run tests directly
|
||||
```
|
||||
|
||||
Coverage reports are generated at `build_debug/tests/coverage/index.html`.
|
||||
|
||||
## Sample Usage
|
||||
|
||||
The sample program demonstrates basic usage:
|
||||
```bash
|
||||
build_release/sample/sample -f iris -m 2
|
||||
```
|
||||
|
||||
## Development Notes
|
||||
|
||||
- The library uses PyTorch tensors for efficient numerical operations
|
||||
- Code follows C++17 standards
|
||||
- Coverage is maintained at 100%
|
||||
- The implementation handles edge cases like duplicate values and small intervals
|
||||
- Conan package manager support is available via `conanfile.py`
|
@@ -1,7 +1,81 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(mdlp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
project(fimdlp
|
||||
LANGUAGES CXX
|
||||
DESCRIPTION "Discretization algorithm based on the paper by Fayyad & Irani Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/mdlp"
|
||||
VERSION 2.1.1
|
||||
)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
|
||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
||||
# Find dependencies
|
||||
find_package(Torch CONFIG REQUIRED)
|
||||
|
||||
# Options
|
||||
# -------
|
||||
option(ENABLE_TESTING OFF)
|
||||
option(COVERAGE OFF)
|
||||
|
||||
add_subdirectory(config)
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-elide-constructors")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
message(STATUS "Debug mode")
|
||||
else()
|
||||
message(STATUS "Release mode")
|
||||
endif()
|
||||
|
||||
if (ENABLE_TESTING)
|
||||
message(STATUS "Testing is enabled")
|
||||
enable_testing()
|
||||
set(CODE_COVERAGE ON)
|
||||
set(GCC_COVERAGE_LINK_FLAGS "${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
add_subdirectory(tests)
|
||||
else()
|
||||
message(STATUS "Testing is disabled")
|
||||
endif()
|
||||
|
||||
message(STATUS "Building sample")
|
||||
add_subdirectory(sample)
|
||||
|
||||
include_directories(
|
||||
${fimdlp_SOURCE_DIR}/src
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
|
||||
add_library(fimdlp src/CPPFImdlp.cpp src/Metrics.cpp src/BinDisc.cpp src/Discretizer.cpp)
|
||||
target_link_libraries(fimdlp PRIVATE torch::torch)
|
||||
|
||||
# Installation
|
||||
# ------------
|
||||
include(CMakePackageConfigHelpers)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/fimdlpConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
)
|
||||
|
||||
install(TARGETS fimdlp
|
||||
EXPORT fimdlpTargets
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
|
||||
install(DIRECTORY src/ DESTINATION include/fimdlp FILES_MATCHING PATTERN "*.h")
|
||||
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/config.h DESTINATION include/fimdlp)
|
||||
|
||||
install(EXPORT fimdlpTargets
|
||||
FILE fimdlpTargets.cmake
|
||||
NAMESPACE fimdlp::
|
||||
DESTINATION lib/cmake/fimdlp)
|
||||
|
||||
configure_file(fimdlpConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/fimdlpConfig.cmake" @ONLY)
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/fimdlpConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/fimdlpConfigVersion.cmake"
|
||||
DESTINATION lib/cmake/fimdlp)
|
||||
|
||||
|
153
CONAN_README.md
Normal file
153
CONAN_README.md
Normal file
@@ -0,0 +1,153 @@
|
||||
# Conan Package for fimdlp
|
||||
|
||||
This directory contains the Conan package configuration for the fimdlp library.
|
||||
|
||||
## Dependencies
|
||||
|
||||
The package manages the following dependencies:
|
||||
|
||||
### Build Requirements
|
||||
- **libtorch/2.4.1** - PyTorch C++ library for tensor operations
|
||||
|
||||
### Test Requirements (when testing enabled)
|
||||
- **catch2/3.8.1** - Modern C++ testing framework
|
||||
- **arff-files** - ARFF file format support (included locally in tests/lib/Files/)
|
||||
|
||||
## Building with Conan
|
||||
|
||||
### 1. Install Dependencies and Build
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
conan install . --output-folder=build --build=missing
|
||||
|
||||
# Build the project
|
||||
cd build
|
||||
cmake .. -DCMAKE_TOOLCHAIN_FILE=conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build .
|
||||
```
|
||||
|
||||
### 2. Using the Build Script
|
||||
|
||||
```bash
|
||||
# Build release version
|
||||
./scripts/build_conan.sh
|
||||
|
||||
# Build with tests
|
||||
./scripts/build_conan.sh --test
|
||||
```
|
||||
|
||||
## Creating a Package
|
||||
|
||||
### 1. Create Package Locally
|
||||
|
||||
```bash
|
||||
conan create . --profile:build=default --profile:host=default
|
||||
```
|
||||
|
||||
### 2. Create Package with Options
|
||||
|
||||
```bash
|
||||
# Create with testing enabled
|
||||
conan create . -o enable_testing=True --profile:build=default --profile:host=default
|
||||
|
||||
# Create shared library version
|
||||
conan create . -o shared=True --profile:build=default --profile:host=default
|
||||
```
|
||||
|
||||
### 3. Using the Package Creation Script
|
||||
|
||||
```bash
|
||||
./scripts/create_package.sh
|
||||
```
|
||||
|
||||
## Uploading to Cimmeria
|
||||
|
||||
### 1. Configure Remote
|
||||
|
||||
```bash
|
||||
# Add Cimmeria remote
|
||||
conan remote add cimmeria <cimmeria-server-url>
|
||||
|
||||
# Login to Cimmeria
|
||||
conan remote login cimmeria <username>
|
||||
```
|
||||
|
||||
### 2. Upload Package
|
||||
|
||||
```bash
|
||||
# Upload the package
|
||||
conan upload fimdlp/2.1.0 --remote=cimmeria --all
|
||||
|
||||
# Or use the script (will configure remote instructions if not set up)
|
||||
./scripts/create_package.sh
|
||||
```
|
||||
|
||||
## Using the Package
|
||||
|
||||
### In conanfile.txt
|
||||
|
||||
```ini
|
||||
[requires]
|
||||
fimdlp/2.1.0
|
||||
|
||||
[generators]
|
||||
CMakeDeps
|
||||
CMakeToolchain
|
||||
```
|
||||
|
||||
### In conanfile.py
|
||||
|
||||
```python
|
||||
def requirements(self):
|
||||
self.requires("fimdlp/2.1.0")
|
||||
```
|
||||
|
||||
### In CMakeLists.txt
|
||||
|
||||
```cmake
|
||||
find_package(fimdlp REQUIRED)
|
||||
target_link_libraries(your_target fimdlp::fimdlp)
|
||||
```
|
||||
|
||||
## Package Options
|
||||
|
||||
| Option | Values | Default | Description |
|
||||
|--------|--------|---------|-------------|
|
||||
| shared | True/False | False | Build shared library |
|
||||
| fPIC | True/False | True | Position independent code |
|
||||
| enable_testing | True/False | False | Enable test suite |
|
||||
| enable_sample | True/False | False | Build sample program |
|
||||
|
||||
## Example Usage
|
||||
|
||||
```cpp
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
#include <fimdlp/Metrics.h>
|
||||
|
||||
int main() {
|
||||
// Create MDLP discretizer
|
||||
CPPFImdlp discretizer;
|
||||
|
||||
// Calculate entropy
|
||||
Metrics metrics;
|
||||
std::vector<int> labels = {0, 1, 0, 1, 1};
|
||||
double entropy = metrics.entropy(labels);
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
The package includes comprehensive tests that can be enabled with:
|
||||
|
||||
```bash
|
||||
conan create . -o enable_testing=True
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- C++17 compatible compiler
|
||||
- CMake 3.20 or later
|
||||
- Conan 2.0 or later
|
163
CPPFImdlp.cpp
163
CPPFImdlp.cpp
@@ -1,163 +0,0 @@
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
namespace mdlp {
|
||||
CPPFImdlp::CPPFImdlp(int algorithm):algorithm(algorithm), indices(indices_t()), X(samples_t()), y(labels_t()), metrics(Metrics(y, indices))
|
||||
{
|
||||
}
|
||||
CPPFImdlp::~CPPFImdlp()
|
||||
= default;
|
||||
CPPFImdlp& CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
X = X_;
|
||||
y = y_;
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
if (X.size() == 0 || y.size() == 0) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
indices = sortIndices(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
switch (algorithm) {
|
||||
case 0:
|
||||
computeCutPoints(0, X.size());
|
||||
break;
|
||||
case 1:
|
||||
computeCutPointsAlternative(0, X.size());
|
||||
break;
|
||||
default:
|
||||
throw invalid_argument("algorithm must be 0 or 1");
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
precision_t CPPFImdlp::halfWayValueCutPoint(size_t start, size_t idx)
|
||||
{
|
||||
size_t idxPrev = idx - 1;
|
||||
precision_t previous = X[indices[idxPrev]], actual = X[indices[idx]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
}
|
||||
return (previous + actual) / 2;
|
||||
}
|
||||
tuple<precision_t, size_t> CPPFImdlp::completeValueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t idxPrev = cut - 1;
|
||||
precision_t previous, next, actual;
|
||||
previous = X[indices[idxPrev]];
|
||||
next = actual = X[indices[cut]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
while (idxPrev-- > start && actual == previous) {
|
||||
previous = X[indices[idxPrev]];
|
||||
}
|
||||
// get the last equal value of X in the interval
|
||||
while (actual == X[indices[cut++]] && cut < end);
|
||||
if (previous == actual && cut < end)
|
||||
actual = X[indices[cut]];
|
||||
cut--;
|
||||
return make_tuple((previous + actual) / 2, cut);
|
||||
}
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end)
|
||||
{
|
||||
size_t cut;
|
||||
tuple<precision_t, size_t> result;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
result = completeValueCutPoint(start, cut, end);
|
||||
cut = get<1>(result);
|
||||
cutPoints.push_back(get<0>(result));
|
||||
computeCutPoints(start, cut);
|
||||
computeCutPoints(cut, end);
|
||||
}
|
||||
}
|
||||
void CPPFImdlp::computeCutPointsAlternative(size_t start, size_t end)
|
||||
{
|
||||
size_t cut;
|
||||
if (end - start < 2)
|
||||
return;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
cutPoints.push_back(halfWayValueCutPoint(start, cut));
|
||||
computeCutPointsAlternative(start, cut);
|
||||
computeCutPointsAlternative(cut, end);
|
||||
}
|
||||
}
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amogst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
|
||||
precision_t entropy_left, entropy_right, minEntropy;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (auto idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / elements * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / elements * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k, k1, k2;
|
||||
precision_t ig, delta;
|
||||
precision_t ent, ent1, ent2;
|
||||
auto N = precision_t(end - start);
|
||||
if (N < 2) {
|
||||
return false;
|
||||
}
|
||||
k = metrics.computeNumClasses(start, end);
|
||||
k1 = metrics.computeNumClasses(start, cut);
|
||||
k2 = metrics.computeNumClasses(cut, end);
|
||||
ent = metrics.entropy(start, end);
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2);
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
for (size_t i = 0; i < X_.size(); i++)
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2)
|
||||
{
|
||||
if (X_[i1] == X_[i2]) return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
cutPoints_t CPPFImdlp::getCutPoints()
|
||||
{
|
||||
// Remove duplicates and sort
|
||||
cutPoints_t output(cutPoints.size());
|
||||
set<precision_t> s;
|
||||
unsigned size = cutPoints.size();
|
||||
for (unsigned i = 0; i < size; i++)
|
||||
s.insert(cutPoints[i]);
|
||||
output.assign(s.begin(), s.end());
|
||||
sort(output.begin(), output.end());
|
||||
return output;
|
||||
}
|
||||
}
|
33
CPPFImdlp.h
33
CPPFImdlp.h
@@ -1,33 +0,0 @@
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
#include "typesFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
#include <utility>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
protected:
|
||||
int algorithm;
|
||||
indices_t indices;
|
||||
samples_t X;
|
||||
labels_t y;
|
||||
Metrics metrics;
|
||||
cutPoints_t cutPoints;
|
||||
|
||||
static indices_t sortIndices(samples_t&, labels_t&);
|
||||
void computeCutPoints(size_t, size_t);
|
||||
void computeCutPointsAlternative(size_t, size_t);
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
size_t getCandidate(size_t, size_t);
|
||||
precision_t halfWayValueCutPoint(size_t, size_t);
|
||||
tuple<precision_t, size_t> completeValueCutPoint(size_t, size_t, size_t);
|
||||
public:
|
||||
CPPFImdlp(int algorithm = 0);
|
||||
~CPPFImdlp();
|
||||
CPPFImdlp& fit(samples_t&, labels_t&);
|
||||
samples_t getCutPoints();
|
||||
inline string version() { return "1.0.0"; };
|
||||
};
|
||||
}
|
||||
#endif
|
85
Makefile
Normal file
85
Makefile
Normal file
@@ -0,0 +1,85 @@
|
||||
SHELL := /bin/bash
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: debug release install test conan-create viewcoverage
|
||||
lcov := lcov
|
||||
|
||||
f_debug = build_debug
|
||||
f_release = build_release
|
||||
genhtml = genhtml
|
||||
docscdir = docs
|
||||
|
||||
define build_target
|
||||
@echo ">>> Building the project for $(1)..."
|
||||
@if [ -d $(2) ]; then rm -fr $(2); fi
|
||||
@conan install . --build=missing -of $(2) -s build_type=$(1) $(4)
|
||||
@cmake -S . -B $(2) -DCMAKE_TOOLCHAIN_FILE=$(2)/build/$(1)/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=$(1) -D$(3)
|
||||
@cmake --build $(2) --config $(1) -j 8
|
||||
endef
|
||||
|
||||
debug: ## Build Debug version of the library
|
||||
@$(call build_target,"Debug","$(f_debug)", "ENABLE_TESTING=ON", "-o enable_testing=True")
|
||||
|
||||
release: ## Build Release version of the library
|
||||
@$(call build_target,"Release","$(f_release)", "ENABLE_TESTING=OFF", "")
|
||||
|
||||
install: ## Install the library
|
||||
@echo ">>> Installing the project..."
|
||||
@cmake --build $(f_release) --target install -j 8
|
||||
|
||||
test: ## Build Debug version and run tests
|
||||
@echo ">>> Building Debug version and running tests..."
|
||||
@$(MAKE) debug;
|
||||
@cp -r tests/datasets $(f_debug)/tests/datasets
|
||||
@cd $(f_debug)/tests && ctest --output-on-failure -j 8
|
||||
@echo ">>> Generating coverage report..."
|
||||
@cd $(f_debug)/tests && $(lcov) --capture --directory ../ --demangle-cpp --ignore-errors source,source --ignore-errors mismatch --ignore-errors inconsistent --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'gtest/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info '*/.conan2/*' --ignore-errors unused --output-file coverage.info >/dev/null 2>&1;
|
||||
@genhtml $(f_debug)/tests/coverage.info --demangle-cpp --output-directory $(f_debug)/tests/coverage --title "Discretizer mdlp Coverage Report" -s -k -f --legend
|
||||
@echo "* Coverage report is generated at $(f_debug)/tests/coverage/index.html"
|
||||
@which python || (echo ">>> Please install python"; exit 1)
|
||||
@if [ ! -f $(f_debug)/tests/coverage.info ]; then \
|
||||
echo ">>> No coverage.info file found!"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@echo ">>> Updating coverage badge..."
|
||||
@env python update_coverage.py $(f_debug)/tests
|
||||
@echo ">>> Done"
|
||||
|
||||
viewcoverage: ## View the html coverage report
|
||||
@which $(genhtml) >/dev/null || (echo ">>> Please install lcov (genhtml not found)"; exit 1)
|
||||
@if [ ! -d $(docscdir)/coverage ]; then mkdir -p $(docscdir)/coverage; fi
|
||||
@if [ ! -f $(f_debug)/tests/coverage.info ]; then \
|
||||
echo ">>> No coverage.info file found. Run make coverage first!"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@$(genhtml) $(f_debug)/tests/coverage.info --demangle-cpp --output-directory $(docscdir)/coverage --title "FImdlp Coverage Report" -s -k -f --legend >/dev/null 2>&1;
|
||||
@xdg-open $(docscdir)/coverage/index.html || open $(docscdir)/coverage/index.html 2>/dev/null
|
||||
@echo ">>> Done";
|
||||
|
||||
conan-create: ## Create the conan package
|
||||
@echo ">>> Creating the conan package..."
|
||||
conan create . --build=missing -tf "" -s:a build_type=Release
|
||||
conan create . --build=missing -tf "" -s:a build_type=Debug -o "&:enable_testing=False"
|
||||
@echo ">>> Done"
|
||||
|
||||
help: ## Show help message
|
||||
@IFS=$$'\n' ; \
|
||||
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||
printf "%s\n\n" "Usage: make [task]"; \
|
||||
printf "%-20s %s\n" "task" "help" ; \
|
||||
printf "%-20s %s\n" "------" "----" ; \
|
||||
for help_line in $${help_lines[@]}; do \
|
||||
IFS=$$':' ; \
|
||||
help_split=($$help_line) ; \
|
||||
help_command=`echo $${help_split[0]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
|
||||
help_info=`echo $${help_split[2]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
|
||||
printf '\033[36m'; \
|
||||
printf "%-20s %s" $$help_command ; \
|
||||
printf '\033[0m'; \
|
||||
printf "%s\n" $$help_info; \
|
||||
done
|
65
Metrics.cpp
65
Metrics.cpp
@@ -1,65 +0,0 @@
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_), numClasses(computeNumClasses(0, indices.size())), entropyCache(cacheEnt_t()), igCache(cacheIg_t())
|
||||
{
|
||||
}
|
||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||
{
|
||||
set<int> nClasses;
|
||||
for (auto i = start; i < end; ++i) {
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
return nClasses.size();
|
||||
}
|
||||
void Metrics::setData(labels_t& y_, indices_t& indices_)
|
||||
{
|
||||
indices = indices_;
|
||||
y = y_;
|
||||
numClasses = computeNumClasses(0, indices.size());
|
||||
entropyCache.clear();
|
||||
igCache.clear();
|
||||
}
|
||||
precision_t Metrics::entropy(size_t start, size_t end)
|
||||
{
|
||||
precision_t p, ventropy = 0;
|
||||
int nElements = 0;
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
if (end - start < 2)
|
||||
return 0;
|
||||
if (entropyCache.find(make_tuple(start, end)) != entropyCache.end()) {
|
||||
return entropyCache[make_tuple(start, end)];
|
||||
}
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
nElements++;
|
||||
}
|
||||
for (auto count : counts) {
|
||||
if (count > 0) {
|
||||
p = (precision_t)count / nElements;
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
entropyCache[make_tuple(start, end)] = ventropy;
|
||||
return ventropy;
|
||||
}
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
precision_t iGain;
|
||||
precision_t entropyInterval, entropyLeft, entropyRight;
|
||||
int nElementsLeft = cut - start, nElementsRight = end - cut;
|
||||
int nElements = end - start;
|
||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||
return igCache[make_tuple(start, cut, end)];
|
||||
}
|
||||
entropyInterval = entropy(start, end);
|
||||
entropyLeft = entropy(start, cut);
|
||||
entropyRight = entropy(cut, end);
|
||||
iGain = entropyInterval - ((precision_t)nElementsLeft * entropyLeft + (precision_t)nElementsRight * entropyRight) / nElements;
|
||||
igCache[make_tuple(start, cut, end)] = iGain;
|
||||
return iGain;
|
||||
}
|
||||
|
||||
}
|
20
Metrics.h
20
Metrics.h
@@ -1,20 +0,0 @@
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
#include "typesFImdlp.h"
|
||||
namespace mdlp {
|
||||
class Metrics {
|
||||
protected:
|
||||
labels_t& y;
|
||||
indices_t& indices;
|
||||
int numClasses;
|
||||
cacheEnt_t entropyCache;
|
||||
cacheIg_t igCache;
|
||||
public:
|
||||
Metrics(labels_t&, indices_t&);
|
||||
void setData(labels_t&, indices_t&);
|
||||
int computeNumClasses(size_t, size_t);
|
||||
precision_t entropy(size_t, size_t);
|
||||
precision_t informationGain(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
#endif
|
38
README.md
38
README.md
@@ -1,4 +1,11 @@
|
||||
# mdlp
|
||||
[](https://github.com/rmontanana/mdlp/actions/workflows/build.yml)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
[](html/index.html)
|
||||
[](https://deepwiki.com/rmontanana/mdlp)
|
||||
[](https://doi.org/10.5281/zenodo.14245443)
|
||||
|
||||
# <img src="logo.png" alt="logo" width="50"/> mdlp
|
||||
|
||||
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
||||
|
||||
@@ -7,26 +14,35 @@ The implementation tries to mitigate the problem of different label values with
|
||||
- Sorts the values of the variable using the label values as a tie-breaker
|
||||
- Once found a valid candidate for the split, it checks if the previous value is the same as actual one, and tries to get previous one, or next if the former is not possible.
|
||||
|
||||
The algorithm returns the cut points for the variable.
|
||||
Other features:
|
||||
|
||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||
- Intervals have to have more than two examples to be evaluated (mdlp).
|
||||
|
||||
- The algorithm returns the cut points for the variable.
|
||||
|
||||
- The transform method uses the cut points returning its index in the following way:
|
||||
|
||||
cut[i - 1] <= x < cut[i]
|
||||
|
||||
using the [std::upper_bound](https://en.cppreference.com/w/cpp/algorithm/upper_bound) method
|
||||
|
||||
- K-Bins discretization is also implemented, and "quantile" and "uniform" strategies are available.
|
||||
|
||||
## Sample
|
||||
|
||||
To run the sample, just execute the following commands:
|
||||
|
||||
```bash
|
||||
cd sample
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
./sample iris
|
||||
make build
|
||||
build_release/sample/sample -f iris -m 2
|
||||
build_release/sample/sample -h
|
||||
```
|
||||
|
||||
## Test
|
||||
|
||||
To run the tests, execute the following commands:
|
||||
To run the tests and see coverage (llvm with lcov and genhtml have to be installed), execute the following commands:
|
||||
|
||||
```bash
|
||||
cd tests
|
||||
./test
|
||||
make test
|
||||
```
|
||||
|
525
TECHNICAL_ANALYSIS_REPORT.md
Normal file
525
TECHNICAL_ANALYSIS_REPORT.md
Normal file
@@ -0,0 +1,525 @@
|
||||
# Technical Analysis Report: MDLP Discretization Library
|
||||
|
||||
## Executive Summary
|
||||
|
||||
This document presents a comprehensive technical analysis of the MDLP (Minimum Description Length Principle) discretization library. The analysis covers project structure, code quality, architecture, testing methodology, documentation, and security assessment.
|
||||
|
||||
**Overall Rating: B+ (Good with Notable Issues)**
|
||||
|
||||
The library demonstrates solid software engineering practices with excellent test coverage and clean architectural design, but contains several security vulnerabilities and code quality issues that require attention before production deployment.
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Project Overview](#project-overview)
|
||||
2. [Architecture & Design Analysis](#architecture--design-analysis)
|
||||
3. [Code Quality Assessment](#code-quality-assessment)
|
||||
4. [Testing Framework Analysis](#testing-framework-analysis)
|
||||
5. [Security Analysis](#security-analysis)
|
||||
6. [Documentation & Maintainability](#documentation--maintainability)
|
||||
7. [Build System Evaluation](#build-system-evaluation)
|
||||
8. [Strengths & Weaknesses Summary](#strengths--weaknesses-summary)
|
||||
9. [Recommendations](#recommendations)
|
||||
10. [Risk Assessment](#risk-assessment)
|
||||
|
||||
---
|
||||
|
||||
## Project Overview
|
||||
|
||||
### Description
|
||||
The MDLP discretization library is a C++ implementation of Fayyad & Irani's Multi-Interval Discretization algorithm for continuous-valued attributes in classification learning. The library provides both traditional binning strategies and advanced MDLP-based discretization.
|
||||
|
||||
### Key Features
|
||||
- **MDLP Algorithm**: Implementation of information-theoretic discretization
|
||||
- **Multiple Strategies**: Uniform and quantile-based binning options
|
||||
- **PyTorch Integration**: Native support for PyTorch tensors
|
||||
- **High Performance**: Optimized algorithms with caching mechanisms
|
||||
- **Complete Testing**: 100% code coverage with comprehensive test suite
|
||||
|
||||
### Technology Stack
|
||||
- **Language**: C++17
|
||||
- **Build System**: CMake 3.20+
|
||||
- **Dependencies**: PyTorch (libtorch 2.7.0)
|
||||
- **Testing**: Google Test (GTest)
|
||||
- **Coverage**: lcov/genhtml
|
||||
- **Package Manager**: Conan
|
||||
|
||||
---
|
||||
|
||||
## Architecture & Design Analysis
|
||||
|
||||
### Class Hierarchy
|
||||
|
||||
```
|
||||
Discretizer (Abstract Base Class)
|
||||
├── CPPFImdlp (MDLP Implementation)
|
||||
└── BinDisc (Simple Binning)
|
||||
|
||||
Metrics (Standalone Utility Class)
|
||||
```
|
||||
|
||||
### Design Patterns Identified
|
||||
|
||||
#### ✅ **Well-Implemented Patterns**
|
||||
- **Template Method Pattern**: Base class provides `fit_transform()` while derived classes implement `fit()`
|
||||
- **Facade Pattern**: Unified interface for both C++ vectors and PyTorch tensors
|
||||
- **Composition**: `CPPFImdlp` composes `Metrics` for statistical calculations
|
||||
|
||||
#### ⚠️ **Pattern Issues**
|
||||
- **Strategy Pattern**: `BinDisc` uses enum-based strategy instead of proper object-oriented strategy pattern
|
||||
- **Interface Segregation**: `BinDisc.fit()` ignores `y` parameter, violating interface contract
|
||||
|
||||
### SOLID Principles Adherence
|
||||
|
||||
| Principle | Rating | Notes |
|
||||
|-----------|--------|-------|
|
||||
| **Single Responsibility** | ✅ Good | Each class has clear, focused responsibility |
|
||||
| **Open/Closed** | ✅ Good | Easy to extend with new discretization algorithms |
|
||||
| **Liskov Substitution** | ⚠️ Issues | `BinDisc` doesn't properly handle supervised interface |
|
||||
| **Interface Segregation** | ✅ Good | Focused interfaces, not overly broad |
|
||||
| **Dependency Inversion** | ✅ Good | Depends on abstractions, not implementations |
|
||||
|
||||
### Architectural Strengths
|
||||
- **Clean Separation**: Algorithm logic, metrics, and data handling well-separated
|
||||
- **Extensible Design**: Easy to add new discretization methods
|
||||
- **Multi-Interface Support**: Both C++ native and PyTorch integration
|
||||
- **Performance Optimized**: Caching and efficient data structures
|
||||
|
||||
### Architectural Weaknesses
|
||||
- **Interface Inconsistency**: Mixed supervised/unsupervised interface handling
|
||||
- **Complex Single Methods**: `computeCutPoints()` handles too many responsibilities
|
||||
- **Tight Coupling**: Direct access to internal data structures
|
||||
- **Limited Configuration**: Algorithm parameters scattered across classes
|
||||
|
||||
---
|
||||
|
||||
## Code Quality Assessment
|
||||
|
||||
### Code Style & Standards
|
||||
- **Consistent Naming**: Good use of camelCase and snake_case conventions
|
||||
- **Header Organization**: Proper SPDX licensing and copyright headers
|
||||
- **Type Safety**: Centralized type definitions in `typesFImdlp.h`
|
||||
- **Modern C++**: Good use of C++17 features
|
||||
|
||||
### Critical Code Issues
|
||||
|
||||
#### 🔴 **High Priority Issues**
|
||||
|
||||
**Memory Safety - Unsafe Pointer Operations**
|
||||
```cpp
|
||||
// Location: Discretizer.cpp:35-36
|
||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||
```
|
||||
- **Issue**: Direct pointer arithmetic without bounds checking
|
||||
- **Risk**: Buffer overflow if tensor data is malformed
|
||||
- **Fix**: Add tensor validation before pointer operations
|
||||
|
||||
#### 🟡 **Medium Priority Issues**
|
||||
|
||||
**Integer Underflow Risk**
|
||||
```cpp
|
||||
// Location: CPPFImdlp.cpp:98-100
|
||||
n = cut - 1 - idxPrev; // Could underflow if cut <= idxPrev
|
||||
m = idxNext - cut - 1; // Could underflow if idxNext <= cut
|
||||
```
|
||||
- **Issue**: Size arithmetic without underflow protection
|
||||
- **Risk**: Extremely large values from underflow
|
||||
- **Fix**: Add underflow validation
|
||||
|
||||
**Vector Access Without Bounds Checking**
|
||||
```cpp
|
||||
// Location: Multiple locations
|
||||
X[indices[idx]] // No bounds validation
|
||||
```
|
||||
- **Issue**: Direct vector access using potentially invalid indices
|
||||
- **Risk**: Out-of-bounds memory access
|
||||
- **Fix**: Use `at()` method or add explicit bounds checking
|
||||
|
||||
### Performance Considerations
|
||||
- **Caching Strategy**: Good use of entropy and information gain caching
|
||||
- **Memory Efficiency**: Smart use of indices to avoid data copying
|
||||
- **Algorithmic Complexity**: Efficient O(n log n) sorting with optimized cutpoint selection
|
||||
|
||||
---
|
||||
|
||||
## Testing Framework Analysis
|
||||
|
||||
### Test Organization
|
||||
|
||||
| Test File | Focus Area | Key Features |
|
||||
|-----------|------------|-------------|
|
||||
| `BinDisc_unittest.cpp` | Binning strategies | Parametric testing, multiple bin counts |
|
||||
| `Discretizer_unittest.cpp` | Base interface | PyTorch integration, transform methods |
|
||||
| `FImdlp_unittest.cpp` | MDLP algorithm | Real datasets, comprehensive scenarios |
|
||||
| `Metrics_unittest.cpp` | Statistical calculations | Entropy, information gain validation |
|
||||
|
||||
### Testing Strengths
|
||||
- **100% Code Coverage**: Complete line and branch coverage
|
||||
- **Real Dataset Testing**: Uses Iris, Diabetes, Glass datasets from ARFF files
|
||||
- **Edge Case Coverage**: Empty datasets, constant values, single elements
|
||||
- **Parametric Testing**: Multiple configurations and strategies
|
||||
- **Data-Driven Approach**: Systematic test generation with `tests.txt`
|
||||
- **Multiple APIs**: Tests both C++ vectors and PyTorch tensors
|
||||
|
||||
### Testing Methodology
|
||||
- **Framework**: Google Test with proper fixture usage
|
||||
- **Precision Testing**: Consistent floating-point comparison margins
|
||||
- **Exception Testing**: Proper error condition validation
|
||||
- **Integration Testing**: End-to-end algorithm validation
|
||||
|
||||
### Testing Gaps
|
||||
- **Performance Testing**: No benchmarks or performance regression tests
|
||||
- **Memory Testing**: Limited memory pressure or leak testing
|
||||
- **Thread Safety**: No concurrent access testing
|
||||
- **Fuzzing**: No randomized input testing
|
||||
|
||||
---
|
||||
|
||||
## Security Analysis
|
||||
|
||||
### Overall Security Risk: **MEDIUM**
|
||||
|
||||
### Critical Security Vulnerabilities
|
||||
|
||||
#### 🔴 **HIGH RISK - Memory Safety**
|
||||
|
||||
**Unsafe PyTorch Tensor Operations**
|
||||
- **Location**: `Discretizer.cpp:35-36, 42, 49-50`
|
||||
- **Vulnerability**: Direct pointer arithmetic without validation
|
||||
- **Impact**: Buffer overflow, memory corruption
|
||||
- **Exploit Scenario**: Malformed tensor data causing out-of-bounds access
|
||||
- **Mitigation**:
|
||||
```cpp
|
||||
if (!X_.is_contiguous() || !y_.is_contiguous()) {
|
||||
throw std::invalid_argument("Tensors must be contiguous");
|
||||
}
|
||||
if (X_.dtype() != torch::kFloat32 || y_.dtype() != torch::kInt32) {
|
||||
throw std::invalid_argument("Invalid tensor types");
|
||||
}
|
||||
```
|
||||
|
||||
#### 🟡 **MEDIUM RISK - Input Validation**
|
||||
|
||||
**Insufficient Parameter Validation**
|
||||
- **Location**: Multiple entry points
|
||||
- **Vulnerability**: Missing bounds checking on user inputs
|
||||
- **Impact**: Integer overflow, out-of-bounds access
|
||||
- **Examples**:
|
||||
- `proposed_cuts` parameter without overflow protection
|
||||
- Tensor dimensions not validated
|
||||
- Array indices not bounds-checked
|
||||
|
||||
**Thread Safety Issues**
|
||||
- **Location**: `Metrics` class cache containers
|
||||
- **Vulnerability**: Shared state without synchronization
|
||||
- **Impact**: Race conditions, data corruption
|
||||
- **Mitigation**: Add mutex protection or document thread requirements
|
||||
|
||||
#### 🟢 **LOW RISK - Information Disclosure**
|
||||
|
||||
**Debug Information Leakage**
|
||||
- **Location**: Sample code and test files
|
||||
- **Vulnerability**: Detailed internal data exposure
|
||||
- **Impact**: Minor information disclosure
|
||||
- **Mitigation**: Remove or conditionalize debug output
|
||||
|
||||
### Security Recommendations
|
||||
|
||||
#### Immediate Actions
|
||||
1. **Add Tensor Validation**: Comprehensive validation before pointer operations
|
||||
2. **Implement Bounds Checking**: Explicit validation for all array access
|
||||
3. **Add Overflow Protection**: Safe arithmetic operations
|
||||
|
||||
#### Short-term Actions
|
||||
1. **Enhance Input Validation**: Parameter validation at all public interfaces
|
||||
2. **Add Thread Safety**: Documentation or synchronization mechanisms
|
||||
3. **Update Dependencies**: Ensure PyTorch is current and secure
|
||||
|
||||
---
|
||||
|
||||
## Documentation & Maintainability
|
||||
|
||||
### Current Documentation Status
|
||||
|
||||
#### ✅ **Available Documentation**
|
||||
- **README.md**: Basic usage instructions and build commands
|
||||
- **Code Comments**: SPDX headers and licensing information
|
||||
- **Build Instructions**: CMake configuration and make targets
|
||||
|
||||
#### ❌ **Missing Documentation**
|
||||
- **API Documentation**: No comprehensive API reference
|
||||
- **Algorithm Documentation**: Limited explanation of MDLP implementation
|
||||
- **Usage Examples**: Minimal code examples beyond basic sample
|
||||
- **Configuration Guide**: No detailed parameter explanation
|
||||
- **Architecture Documentation**: No design document or UML diagrams
|
||||
|
||||
### Maintainability Assessment
|
||||
|
||||
#### Strengths
|
||||
- **Clear Code Structure**: Well-organized class hierarchy
|
||||
- **Consistent Style**: Uniform naming and formatting conventions
|
||||
- **Separation of Concerns**: Clear module boundaries
|
||||
- **Version Control**: Proper git repository with meaningful commits
|
||||
|
||||
#### Weaknesses
|
||||
- **Complex Methods**: Some functions handle multiple responsibilities
|
||||
- **Magic Numbers**: Hardcoded values without explanation
|
||||
- **Limited Comments**: Algorithm logic lacks explanatory comments
|
||||
- **Configuration Scattered**: Parameters spread across multiple classes
|
||||
|
||||
### Documentation Recommendations
|
||||
1. **Generate API Documentation**: Use Doxygen for comprehensive API docs
|
||||
2. **Add Algorithm Explanation**: Document MDLP implementation details
|
||||
3. **Create Usage Guide**: Comprehensive examples and tutorials
|
||||
4. **Architecture Document**: High-level design documentation
|
||||
5. **Configuration Reference**: Centralized parameter documentation
|
||||
|
||||
---
|
||||
|
||||
## Build System Evaluation
|
||||
|
||||
### CMake Configuration Analysis
|
||||
|
||||
#### Strengths
|
||||
- **Modern CMake**: Uses version 3.20+ with current best practices
|
||||
- **Multi-Configuration**: Separate debug/release builds
|
||||
- **Dependency Management**: Proper PyTorch integration
|
||||
- **Installation Support**: Complete install targets and package config
|
||||
- **Testing Integration**: CTest integration with coverage
|
||||
|
||||
#### Build Features
|
||||
```cmake
|
||||
# Key configurations
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
find_package(Torch CONFIG REQUIRED)
|
||||
option(ENABLE_TESTING OFF)
|
||||
option(ENABLE_SAMPLE OFF)
|
||||
option(COVERAGE OFF)
|
||||
```
|
||||
|
||||
### Build System Issues
|
||||
|
||||
#### Security Concerns
|
||||
- **Debug Flags**: May affect release builds
|
||||
- **Dependency Versions**: Fixed PyTorch version without security updates
|
||||
|
||||
#### Usability Issues
|
||||
- **Complex Makefile**: Manual build directory management
|
||||
- **Coverage Complexity**: Complex lcov command chain
|
||||
|
||||
### Build Recommendations
|
||||
1. **Simplify Build Process**: Use CMake presets for common configurations
|
||||
2. **Improve Dependency Management**: Flexible version constraints
|
||||
3. **Add Build Validation**: Compiler and platform checks
|
||||
4. **Enhance Documentation**: Detailed build instructions
|
||||
|
||||
---
|
||||
|
||||
## Strengths & Weaknesses Summary
|
||||
|
||||
### 🏆 **Key Strengths**
|
||||
|
||||
#### Technical Excellence
|
||||
- **Algorithmic Correctness**: Faithful implementation of Fayyad & Irani algorithm
|
||||
- **Performance Optimization**: Efficient caching and data structures
|
||||
- **Code Coverage**: 100% test coverage with comprehensive edge cases
|
||||
- **Modern C++**: Good use of C++17 features and best practices
|
||||
|
||||
#### Software Engineering
|
||||
- **Clean Architecture**: Well-structured OOP design with clear separation
|
||||
- **SOLID Principles**: Generally good adherence to design principles
|
||||
- **Multi-Platform**: CMake-based build system for cross-platform support
|
||||
- **Professional Quality**: Proper licensing, version control, CI/CD integration
|
||||
|
||||
#### API Design
|
||||
- **Multiple Interfaces**: Both C++ native and PyTorch tensor support
|
||||
- **Sklearn-like API**: Familiar `fit()`/`transform()`/`fit_transform()` pattern
|
||||
- **Extensible**: Easy to add new discretization algorithms
|
||||
|
||||
### ⚠️ **Critical Weaknesses**
|
||||
|
||||
#### Security Issues
|
||||
- **Memory Safety**: Unsafe pointer operations in PyTorch integration
|
||||
- **Input Validation**: Insufficient bounds checking and parameter validation
|
||||
- **Thread Safety**: Shared state without proper synchronization
|
||||
|
||||
#### Code Quality
|
||||
- **Interface Consistency**: LSP violation in `BinDisc` class
|
||||
- **Method Complexity**: Some functions handle too many responsibilities
|
||||
- **Error Handling**: Inconsistent exception handling patterns
|
||||
|
||||
#### Documentation
|
||||
- **API Documentation**: Minimal inline documentation
|
||||
- **Usage Examples**: Limited practical examples
|
||||
- **Architecture Documentation**: No high-level design documentation
|
||||
|
||||
---
|
||||
|
||||
## Recommendations
|
||||
|
||||
### 🚨 **Immediate Actions (HIGH Priority)**
|
||||
|
||||
#### Security Fixes
|
||||
```cpp
|
||||
// 1. Add tensor validation in Discretizer::fit_t()
|
||||
void Discretizer::fit_t(const torch::Tensor& X_, const torch::Tensor& y_) {
|
||||
// Validate tensor properties
|
||||
if (!X_.is_contiguous() || !y_.is_contiguous()) {
|
||||
throw std::invalid_argument("Tensors must be contiguous");
|
||||
}
|
||||
if (X_.sizes().size() != 1 || y_.sizes().size() != 1) {
|
||||
throw std::invalid_argument("Only 1D tensors supported");
|
||||
}
|
||||
if (X_.dtype() != torch::kFloat32 || y_.dtype() != torch::kInt32) {
|
||||
throw std::invalid_argument("Invalid tensor types");
|
||||
}
|
||||
// ... rest of implementation
|
||||
}
|
||||
```
|
||||
|
||||
```cpp
|
||||
// 2. Add bounds checking for vector access
|
||||
inline precision_t safe_vector_access(const samples_t& vec, size_t idx) {
|
||||
if (idx >= vec.size()) {
|
||||
throw std::out_of_range("Vector index out of bounds");
|
||||
}
|
||||
return vec[idx];
|
||||
}
|
||||
```
|
||||
|
||||
```cpp
|
||||
// 3. Add underflow protection in arithmetic operations
|
||||
size_t safe_subtract(size_t a, size_t b) {
|
||||
if (b > a) {
|
||||
throw std::underflow_error("Subtraction would cause underflow");
|
||||
}
|
||||
return a - b;
|
||||
}
|
||||
```
|
||||
|
||||
### 📋 **Short-term Actions (MEDIUM Priority)**
|
||||
|
||||
#### Code Quality Improvements
|
||||
1. **Fix Interface Consistency**: Separate supervised/unsupervised interfaces
|
||||
2. **Refactor Complex Methods**: Break down `computeCutPoints()` function
|
||||
3. **Standardize Error Handling**: Consistent exception types and messages
|
||||
4. **Add Input Validation**: Comprehensive parameter checking
|
||||
|
||||
#### Thread Safety
|
||||
```cpp
|
||||
// Add thread safety to Metrics class
|
||||
class Metrics {
|
||||
private:
|
||||
mutable std::mutex cache_mutex;
|
||||
cacheEnt_t entropyCache;
|
||||
cacheIg_t igCache;
|
||||
|
||||
public:
|
||||
precision_t entropy(size_t start, size_t end) const {
|
||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||
// ... implementation
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### 📚 **Long-term Actions (LOW Priority)**
|
||||
|
||||
#### Documentation & Usability
|
||||
1. **API Documentation**: Generate comprehensive Doxygen documentation
|
||||
2. **Usage Examples**: Create detailed tutorial and example repository
|
||||
3. **Performance Testing**: Add benchmarking and regression tests
|
||||
4. **Architecture Documentation**: Create design documents and UML diagrams
|
||||
|
||||
#### Code Modernization
|
||||
1. **Strategy Pattern**: Proper implementation for `BinDisc` strategies
|
||||
2. **Configuration Management**: Centralized parameter handling
|
||||
3. **Factory Pattern**: Discretizer creation factory
|
||||
4. **Resource Management**: RAII patterns for memory safety
|
||||
|
||||
---
|
||||
|
||||
## Risk Assessment
|
||||
|
||||
### Risk Priority Matrix
|
||||
|
||||
| Risk Category | High | Medium | Low | Total |
|
||||
|---------------|------|--------|-----|-------|
|
||||
| **Security** | 1 | 7 | 2 | 10 |
|
||||
| **Code Quality** | 2 | 5 | 3 | 10 |
|
||||
| **Maintainability** | 0 | 3 | 4 | 7 |
|
||||
| **Performance** | 0 | 1 | 2 | 3 |
|
||||
| **Total** | **3** | **16** | **11** | **30** |
|
||||
|
||||
### Risk Impact Assessment
|
||||
|
||||
#### Critical Risks (Immediate Attention Required)
|
||||
1. **Memory Safety Vulnerabilities**: Could lead to crashes or security exploits
|
||||
2. **Interface Consistency Issues**: Violates expected behavior contracts
|
||||
3. **Input Validation Gaps**: Potential for crashes with malformed input
|
||||
|
||||
#### Moderate Risks (Address in Next Release)
|
||||
1. **Thread Safety Issues**: Problems in multi-threaded environments
|
||||
2. **Complex Method Design**: Maintenance and debugging difficulties
|
||||
3. **Documentation Gaps**: Reduced adoption and maintainability
|
||||
|
||||
#### Low Risks (Future Improvements)
|
||||
1. **Performance Optimization**: Minor efficiency improvements
|
||||
2. **Code Style Consistency**: Enhanced readability
|
||||
3. **Build System Enhancements**: Improved developer experience
|
||||
|
||||
---
|
||||
|
||||
## Conclusion
|
||||
|
||||
The MDLP discretization library represents a solid implementation of an important machine learning algorithm with excellent test coverage and clean architectural design. However, it requires attention to security vulnerabilities and code quality issues before production deployment.
|
||||
|
||||
### Final Verdict
|
||||
|
||||
**Rating: B+ (Good with Notable Issues)**
|
||||
|
||||
- **Core Algorithm**: Excellent implementation of MDLP with proper mathematical foundations
|
||||
- **Software Engineering**: Good OOP design following most best practices
|
||||
- **Testing**: Exemplary test coverage and methodology
|
||||
- **Security**: Notable vulnerabilities requiring immediate attention
|
||||
- **Documentation**: Adequate but could be significantly improved
|
||||
|
||||
### Deployment Recommendation
|
||||
|
||||
**Not Ready for Production** without addressing HIGH priority security issues, particularly around memory safety and input validation. Once these are resolved, the library would be suitable for production use in most contexts.
|
||||
|
||||
### Next Steps
|
||||
|
||||
1. **Security Audit**: Address all HIGH and MEDIUM priority security issues
|
||||
2. **Code Review**: Implement fixes for interface consistency and method complexity
|
||||
3. **Documentation**: Create comprehensive API documentation and usage guides
|
||||
4. **Testing**: Add performance benchmarks and stress testing
|
||||
5. **Release**: Prepare version 2.1.0 with security and quality improvements
|
||||
|
||||
---
|
||||
|
||||
## Appendix
|
||||
|
||||
### Files Analyzed
|
||||
- `src/CPPFImdlp.h` & `src/CPPFImdlp.cpp` - MDLP algorithm implementation
|
||||
- `src/Discretizer.h` & `src/Discretizer.cpp` - Base class and PyTorch integration
|
||||
- `src/BinDisc.h` & `src/BinDisc.cpp` - Simple binning strategies
|
||||
- `src/Metrics.h` & `src/Metrics.cpp` - Statistical calculations
|
||||
- `src/typesFImdlp.h` - Type definitions
|
||||
- `CMakeLists.txt` - Build configuration
|
||||
- `conanfile.py` - Dependency management
|
||||
- `tests/*` - Comprehensive test suite
|
||||
|
||||
### Analysis Date
|
||||
**Report Generated**: June 27, 2025
|
||||
|
||||
### Tools Used
|
||||
- **Static Analysis**: Manual code review with security focus
|
||||
- **Architecture Analysis**: SOLID principles and design pattern evaluation
|
||||
- **Test Analysis**: Coverage and methodology assessment
|
||||
- **Security Analysis**: Vulnerability assessment with risk prioritization
|
||||
|
||||
---
|
||||
|
||||
*This report provides a comprehensive technical analysis of the MDLP discretization library. For questions or clarifications, please refer to the project repository or contact the development team.*
|
16
conandata.yml
Normal file
16
conandata.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
sources:
|
||||
"2.1.0":
|
||||
url: "https://github.com/rmontanana/mdlp/archive/refs/tags/v2.1.0.tar.gz"
|
||||
sha256: "placeholder_sha256_hash"
|
||||
"2.0.1":
|
||||
url: "https://github.com/rmontanana/mdlp/archive/refs/tags/v2.0.1.tar.gz"
|
||||
sha256: "placeholder_sha256_hash"
|
||||
"2.0.0":
|
||||
url: "https://github.com/rmontanana/mdlp/archive/refs/tags/v2.0.0.tar.gz"
|
||||
sha256: "placeholder_sha256_hash"
|
||||
|
||||
patches:
|
||||
"2.1.0":
|
||||
- patch_file: "patches/001-cmake-fix.patch"
|
||||
patch_description: "Fix CMake configuration for Conan compatibility"
|
||||
patch_type: "portability"
|
111
conanfile.py
Normal file
111
conanfile.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import os
|
||||
import re
|
||||
from conan import ConanFile
|
||||
from conan.tools.cmake import CMakeToolchain, CMake, cmake_layout, CMakeDeps
|
||||
from conan.tools.files import load, copy
|
||||
|
||||
|
||||
class FimdlpConan(ConanFile):
|
||||
name = "fimdlp"
|
||||
version = "X.X.X"
|
||||
license = "MIT"
|
||||
author = "Ricardo Montañana <rmontanana@gmail.com>"
|
||||
url = "https://github.com/rmontanana/mdlp"
|
||||
description = "Discretization algorithm based on the paper by Fayyad & Irani Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning."
|
||||
topics = ("machine-learning", "discretization", "mdlp", "classification")
|
||||
|
||||
# Package configuration
|
||||
settings = "os", "compiler", "build_type", "arch"
|
||||
options = {
|
||||
"shared": [True, False],
|
||||
"fPIC": [True, False],
|
||||
"enable_testing": [True, False],
|
||||
"enable_sample": [True, False],
|
||||
}
|
||||
default_options = {
|
||||
"shared": False,
|
||||
"fPIC": True,
|
||||
"enable_testing": False,
|
||||
"enable_sample": False,
|
||||
}
|
||||
|
||||
# Sources are located in the same place as this recipe, copy them to the recipe
|
||||
exports_sources = "CMakeLists.txt", "src/*", "sample/*", "tests/*", "config/*", "fimdlpConfig.cmake.in"
|
||||
|
||||
def set_version(self):
|
||||
content = load(self, "CMakeLists.txt")
|
||||
version_pattern = re.compile(r'project\s*\([^\)]*VERSION\s+([0-9]+\.[0-9]+\.[0-9]+)', re.IGNORECASE | re.DOTALL)
|
||||
match = version_pattern.search(content)
|
||||
if match:
|
||||
self.version = match.group(1)
|
||||
else:
|
||||
raise Exception("Version not found in CMakeLists.txt")
|
||||
|
||||
def config_options(self):
|
||||
if self.settings.os == "Windows":
|
||||
self.options.rm_safe("fPIC")
|
||||
|
||||
def configure(self):
|
||||
if self.options.shared:
|
||||
self.options.rm_safe("fPIC")
|
||||
|
||||
def requirements(self):
|
||||
# PyTorch dependency for tensor operations
|
||||
self.requires("libtorch/2.7.1")
|
||||
|
||||
def build_requirements(self):
|
||||
self.requires("arff-files/1.2.1") # for tests and sample
|
||||
if self.options.enable_testing:
|
||||
self.test_requires("gtest/1.16.0")
|
||||
|
||||
def layout(self):
|
||||
cmake_layout(self)
|
||||
|
||||
def generate(self):
|
||||
# Generate CMake configuration files
|
||||
deps = CMakeDeps(self)
|
||||
deps.generate()
|
||||
|
||||
tc = CMakeToolchain(self)
|
||||
# Set CMake variables based on options
|
||||
tc.variables["ENABLE_TESTING"] = self.options.enable_testing
|
||||
tc.variables["ENABLE_SAMPLE"] = self.options.enable_sample
|
||||
tc.variables["BUILD_SHARED_LIBS"] = self.options.shared
|
||||
tc.generate()
|
||||
|
||||
def build(self):
|
||||
cmake = CMake(self)
|
||||
cmake.configure()
|
||||
cmake.build()
|
||||
|
||||
# Run tests if enabled
|
||||
if self.options.enable_testing:
|
||||
cmake.test()
|
||||
|
||||
def package(self):
|
||||
# Install using CMake
|
||||
cmake = CMake(self)
|
||||
cmake.install()
|
||||
|
||||
# Copy license file
|
||||
copy(self, "LICENSE", src=self.source_folder, dst=os.path.join(self.package_folder, "licenses"))
|
||||
|
||||
def package_info(self):
|
||||
# Library configuration
|
||||
self.cpp_info.libs = ["fimdlp"]
|
||||
self.cpp_info.includedirs = ["include"]
|
||||
|
||||
# CMake package configuration
|
||||
self.cpp_info.set_property("cmake_file_name", "fimdlp")
|
||||
self.cpp_info.set_property("cmake_target_name", "fimdlp::fimdlp")
|
||||
|
||||
# Compiler features
|
||||
self.cpp_info.cppstd = "17"
|
||||
|
||||
# System libraries (if needed)
|
||||
if self.settings.os in ["Linux", "FreeBSD"]:
|
||||
self.cpp_info.system_libs.append("m") # Math library
|
||||
self.cpp_info.system_libs.append("pthread") # Threading
|
||||
|
||||
# Build information for consumers
|
||||
self.cpp_info.builddirs = ["lib/cmake/fimdlp"]
|
4
config/CMakeLists.txt
Normal file
4
config/CMakeLists.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
configure_file(
|
||||
"config.h.in"
|
||||
"${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES
|
||||
)
|
13
config/config.h.in
Normal file
13
config/config.h.in
Normal file
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR @
|
||||
#define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR @
|
||||
#define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH @
|
||||
|
||||
static constexpr std::string_view project_mdlp_name = "@PROJECT_NAME@";
|
||||
static constexpr std::string_view project_mdlp_version = "@PROJECT_VERSION@";
|
||||
static constexpr std::string_view project_mdlp_description = "@PROJECT_DESCRIPTION@";
|
||||
static constexpr std::string_view git_mdlp_sha = "@GIT_SHA@";
|
2
fimdlpConfig.cmake.in
Normal file
2
fimdlpConfig.cmake.in
Normal file
@@ -0,0 +1,2 @@
|
||||
@PACKAGE_INIT@
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/fimdlpTargets.cmake")
|
47
getversion.py
Normal file
47
getversion.py
Normal file
@@ -0,0 +1,47 @@
|
||||
|
||||
# read the version from the CMakeLists.txt file
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def get_version_from_cmakelists(cmakelists_path):
|
||||
# Read the CMakeLists.txt file
|
||||
try:
|
||||
with open(cmakelists_path, 'r') as file:
|
||||
content = file.read()
|
||||
except IOError as e:
|
||||
print(f"Error reading {cmakelists_path}: {e}")
|
||||
sys.exit(1)
|
||||
# Use regex to find the version line
|
||||
# The regex pattern looks for a line that starts with 'project' and captures the version number
|
||||
# in the format VERSION x.y.z where x, y, and z are digits.
|
||||
# It allows for optional whitespace around the parentheses and the version number.
|
||||
version_pattern = re.compile(
|
||||
r'project\s*\([^\)]*VERSION\s+([0-9]+\.[0-9]+\.[0-9]+)', re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
match = version_pattern.search(content)
|
||||
if match:
|
||||
return match.group(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
def main():
|
||||
# Get the path to the CMakeLists.txt file
|
||||
cmakelists_path = Path(__file__).parent / "CMakeLists.txt"
|
||||
|
||||
# Check if the file exists
|
||||
if not cmakelists_path.exists():
|
||||
print(f"Error: {cmakelists_path} does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Get the version from the CMakeLists.txt file
|
||||
version = get_version_from_cmakelists(cmakelists_path)
|
||||
|
||||
if version:
|
||||
print(f"Version: {version}")
|
||||
else:
|
||||
print("Version not found in CMakeLists.txt.")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,6 +1,12 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(main)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
find_package(arff-files REQUIRED)
|
||||
|
||||
add_executable(sample sample.cpp ../tests/ArffFiles.cpp ../Metrics.cpp ../CPPFImdlp.cpp)
|
||||
include_directories(
|
||||
${fimdlp_SOURCE_DIR}/src
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
${arff-files_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
add_executable(sample sample.cpp)
|
||||
target_link_libraries(sample PRIVATE fimdlp torch::torch arff-files::arff-files)
|
||||
|
@@ -1,55 +1,226 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include "../CPPFImdlp.h"
|
||||
#include "../tests/ArffFiles.h"
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <getopt.h>
|
||||
#include <torch/torch.h>
|
||||
#include <ArffFiles.hpp>
|
||||
#include "Discretizer.h"
|
||||
#include "CPPFImdlp.h"
|
||||
#include "BinDisc.h"
|
||||
|
||||
const string PATH = "tests/datasets/";
|
||||
|
||||
/* print a description of all supported options */
|
||||
void usage(const char* path)
|
||||
{
|
||||
/* take only the last portion of the path */
|
||||
const char* basename = strrchr(path, '/');
|
||||
basename = basename ? basename + 1 : path;
|
||||
|
||||
std::cout << "usage: " << basename << "[OPTION]" << std::endl;
|
||||
std::cout << " -h, --help\t\t Print this help and exit." << std::endl;
|
||||
std::cout
|
||||
<< " -f, --file[=FILENAME]\t {all, diabetes, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors, test}."
|
||||
<< std::endl;
|
||||
std::cout << " -p, --path[=FILENAME]\t folder where the arff dataset is located, default " << PATH << std::endl;
|
||||
std::cout << " -m, --max_depth=INT\t max_depth pased to discretizer. Default = MAX_INT" << std::endl;
|
||||
std::cout
|
||||
<< " -c, --max_cutpoints=FLOAT\t percentage of lines expressed in decimal or integer number or cut points. Default = 0 -> any"
|
||||
<< std::endl;
|
||||
std::cout << " -n, --min_length=INT\t interval min_length pased to discretizer. Default = 3" << std::endl;
|
||||
}
|
||||
|
||||
tuple<string, string, int, int, float> parse_arguments(int argc, char** argv)
|
||||
{
|
||||
string file_name;
|
||||
string path = PATH;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
int min_length = 3;
|
||||
float max_cutpoints = 0;
|
||||
const vector<struct option> long_options = {
|
||||
{"help", no_argument, nullptr, 'h'},
|
||||
{"file", required_argument, nullptr, 'f'},
|
||||
{"path", required_argument, nullptr, 'p'},
|
||||
{"max_depth", required_argument, nullptr, 'm'},
|
||||
{"max_cutpoints", required_argument, nullptr, 'c'},
|
||||
{"min_length", required_argument, nullptr, 'n'},
|
||||
{nullptr, no_argument, nullptr, 0}
|
||||
};
|
||||
while (true) {
|
||||
const auto c = getopt_long(argc, argv, "hf:p:m:c:n:", long_options.data(), nullptr);
|
||||
if (c == -1)
|
||||
break;
|
||||
switch (c) {
|
||||
case 'h':
|
||||
usage(argv[0]);
|
||||
exit(0);
|
||||
case 'f':
|
||||
file_name = string(optarg);
|
||||
break;
|
||||
case 'm':
|
||||
max_depth = stoi(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
min_length = stoi(optarg);
|
||||
break;
|
||||
case 'c':
|
||||
max_cutpoints = stof(optarg);
|
||||
break;
|
||||
case 'p':
|
||||
path = optarg;
|
||||
if (path.back() != '/')
|
||||
path += '/';
|
||||
break;
|
||||
case '?':
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
if (file_name.empty()) {
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
return make_tuple(file_name, path, max_depth, min_length, max_cutpoints);
|
||||
}
|
||||
|
||||
void process_file(const string& path, const string& file_name, bool class_last, int max_depth, int min_length,
|
||||
float max_cutpoints)
|
||||
{
|
||||
ArffFiles file;
|
||||
|
||||
file.load(path + file_name + ".arff", class_last);
|
||||
const auto attributes = file.getAttributes();
|
||||
const auto items = file.getSize();
|
||||
std::cout << "Number of lines: " << items << std::endl;
|
||||
std::cout << "Attributes: " << std::endl;
|
||||
for (auto attribute : attributes) {
|
||||
std::cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << std::endl;
|
||||
}
|
||||
std::cout << "Class name: " << file.getClassName() << std::endl;
|
||||
std::cout << "Class type: " << file.getClassType() << std::endl;
|
||||
std::cout << "Data: " << std::endl;
|
||||
std::vector<mdlp::samples_t>& X = file.getX();
|
||||
mdlp::labels_t& y = file.getY();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (auto feature : X) {
|
||||
std::cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
std::cout << y[i] << std::endl;
|
||||
}
|
||||
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||
size_t total = 0;
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
auto min_max = minmax_element(X[i].begin(), X[i].end());
|
||||
std::cout << "Cut points for feature " << get<0>(attributes[i]) << ": [" << setprecision(3);
|
||||
test.fit(X[i], y);
|
||||
auto cut_points = test.getCutPoints();
|
||||
for (auto item : cut_points) {
|
||||
std::cout << item;
|
||||
if (item != cut_points.back())
|
||||
std::cout << ", ";
|
||||
}
|
||||
total += test.getCutPoints().size();
|
||||
std::cout << "]" << std::endl;
|
||||
std::cout << "Min: " << *min_max.first << " Max: " << *min_max.second << std::endl;
|
||||
std::cout << "--------------------------" << std::endl;
|
||||
}
|
||||
std::cout << "Total cut points ...: " << total << std::endl;
|
||||
std::cout << "Total feature states: " << total + attributes.size() << std::endl;
|
||||
std::cout << "Version ............: " << test.version() << std::endl;
|
||||
std::cout << "Transformed data (vector)..: " << std::endl;
|
||||
test.fit(X[0], y);
|
||||
auto data = test.transform(X[0]);
|
||||
for (int i = 130; i < 135; i++) {
|
||||
std::cout << std::fixed << std::setprecision(1) << X[0][i] << " " << data[i] << std::endl;
|
||||
}
|
||||
auto Xt = torch::tensor(X[0], torch::kFloat32);
|
||||
auto yt = torch::tensor(y, torch::kInt32);
|
||||
//test.fit_t(Xt, yt);
|
||||
auto result = test.fit_transform_t(Xt, yt);
|
||||
std::cout << "Transformed data (torch)...: " << std::endl;
|
||||
for (int i = 130; i < 135; i++) {
|
||||
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<mdlp::precision_t>() << " " << result[i].item<int>() << std::endl;
|
||||
}
|
||||
auto disc = mdlp::BinDisc(3);
|
||||
auto res_v = disc.fit_transform(X[0], y);
|
||||
disc.fit_t(Xt, yt);
|
||||
auto res_t = disc.transform_t(Xt);
|
||||
std::cout << "Transformed data (BinDisc)...: " << std::endl;
|
||||
for (int i = 130; i < 135; i++) {
|
||||
std::cout << std::fixed << std::setprecision(1) << Xt[i].item<mdlp::precision_t>() << " " << res_v[i] << " " << res_t[i].item<int>() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void process_all_files(const map<string, bool>& datasets, const string& path, int max_depth, int min_length,
|
||||
float max_cutpoints)
|
||||
{
|
||||
std::cout << "Results: " << "Max_depth: " << max_depth << " Min_length: " << min_length << " Max_cutpoints: "
|
||||
<< max_cutpoints << std::endl << std::endl;
|
||||
printf("%-20s %4s %4s\n", "Dataset", "Feat", "Cuts Time(ms)");
|
||||
printf("==================== ==== ==== ========\n");
|
||||
for (const auto& dataset : datasets) {
|
||||
ArffFiles file;
|
||||
file.load(path + dataset.first + ".arff", dataset.second);
|
||||
auto attributes = file.getAttributes();
|
||||
std::vector<mdlp::samples_t>& X = file.getX();
|
||||
mdlp::labels_t& y = file.getY();
|
||||
size_t timing = 0;
|
||||
size_t cut_points = 0;
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
auto test = mdlp::CPPFImdlp(min_length, max_depth, max_cutpoints);
|
||||
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
|
||||
test.fit(X[i], y);
|
||||
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
||||
timing += std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count();
|
||||
cut_points += test.getCutPoints().size();
|
||||
}
|
||||
printf("%-20s %4lu %4zu %8zu\n", dataset.first.c_str(), attributes.size(), cut_points, timing);
|
||||
}
|
||||
}
|
||||
|
||||
using namespace std;
|
||||
using namespace mdlp;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
ArffFiles file;
|
||||
vector<string> lines;
|
||||
string path = "../../tests/datasets/";
|
||||
map<string, bool > datasets = {
|
||||
{"mfeat-factors", true},
|
||||
{"iris", true},
|
||||
{"letter", true},
|
||||
{"kdd_JapaneseVowels", false}
|
||||
std::map<std::string, bool> datasets = {
|
||||
{"diabetes", true},
|
||||
{"glass", true},
|
||||
{"iris", true},
|
||||
{"kdd_JapaneseVowels", false},
|
||||
{"letter", true},
|
||||
{"liver-disorders", true},
|
||||
{"mfeat-factors", true},
|
||||
{"test", true}
|
||||
};
|
||||
if (argc != 2 || datasets.find(argv[1]) == datasets.end()) {
|
||||
cout << "Usage: " << argv[0] << " {mfeat-factors, iris, letter, kdd_JapaneseVowels}" << endl;
|
||||
return 1;
|
||||
std::string file_name;
|
||||
std::string path;
|
||||
int max_depth;
|
||||
int min_length;
|
||||
float max_cutpoints;
|
||||
tie(file_name, path, max_depth, min_length, max_cutpoints) = parse_arguments(argc, argv);
|
||||
if (datasets.find(file_name) == datasets.end() && file_name != "all") {
|
||||
std::cout << "Invalid file name: " << file_name << std::endl;
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
file.load(path + argv[1] + ".arff", datasets[argv[1]]);
|
||||
auto attributes = file.getAttributes();
|
||||
int items = file.getSize();
|
||||
cout << "Number of lines: " << items << endl;
|
||||
cout << "Attributes: " << endl;
|
||||
for (auto attribute : attributes) {
|
||||
cout << "Name: " << get<0>(attribute) << " Type: " << get<1>(attribute) << endl;
|
||||
}
|
||||
cout << "Class name: " << file.getClassName() << endl;
|
||||
cout << "Class type: " << file.getClassType() << endl;
|
||||
cout << "Data: " << endl;
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
for (int i = 0; i < 50; i++) {
|
||||
for (auto feature : X) {
|
||||
cout << fixed << setprecision(1) << feature[i] << " ";
|
||||
}
|
||||
cout << y[i] << endl;
|
||||
}
|
||||
mdlp::CPPFImdlp test = mdlp::CPPFImdlp(0);
|
||||
for (auto i = 0; i < attributes.size(); i++) {
|
||||
cout << "Cut points for " << get<0>(attributes[i]) << endl;
|
||||
cout << "--------------------------" << setprecision(3) << endl;
|
||||
test.fit(X[i], y);
|
||||
for (auto item : test.getCutPoints()) {
|
||||
cout << item << endl;
|
||||
}
|
||||
if (file_name == "all")
|
||||
process_all_files(datasets, path, max_depth, min_length, max_cutpoints);
|
||||
else {
|
||||
process_file(path, file_name, datasets[file_name], max_depth, min_length, max_cutpoints);
|
||||
std::cout << "File name ....: " << file_name << std::endl;
|
||||
std::cout << "Max depth ....: " << max_depth << std::endl;
|
||||
std::cout << "Min length ...: " << min_length << std::endl;
|
||||
std::cout << "Max cutpoints : " << max_cutpoints << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
25
scripts/build_conan.sh
Executable file
25
scripts/build_conan.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Build script for fimdlp using Conan
|
||||
set -e
|
||||
|
||||
echo "Building fimdlp with Conan..."
|
||||
|
||||
# Clean previous builds
|
||||
rm -rf build_conan
|
||||
|
||||
# Install dependencies and build
|
||||
conan install . --output-folder=build_conan --build=missing --profile:build=default --profile:host=default
|
||||
|
||||
# Build the project
|
||||
cd build_conan
|
||||
cmake .. -DCMAKE_TOOLCHAIN_FILE=conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build .
|
||||
|
||||
echo "Build completed successfully!"
|
||||
|
||||
# Run tests if requested
|
||||
if [ "$1" = "--test" ]; then
|
||||
echo "Running tests..."
|
||||
ctest --output-on-failure
|
||||
fi
|
33
scripts/create_package.sh
Executable file
33
scripts/create_package.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to create and upload fimdlp Conan package
|
||||
set -e
|
||||
|
||||
PACKAGE_NAME="fimdlp"
|
||||
PACKAGE_VERSION="2.1.0"
|
||||
REMOTE_NAME="cimmeria"
|
||||
|
||||
echo "Creating Conan package for $PACKAGE_NAME/$PACKAGE_VERSION..."
|
||||
|
||||
# Create the package
|
||||
conan create . --profile:build=default --profile:host=default
|
||||
|
||||
echo "Package created successfully!"
|
||||
|
||||
# Test the package
|
||||
echo "Testing package..."
|
||||
conan test test_package $PACKAGE_NAME/$PACKAGE_VERSION@ --profile:build=default --profile:host=default
|
||||
|
||||
echo "Package tested successfully!"
|
||||
|
||||
# Upload to Cimmeria (if remote is configured)
|
||||
if conan remote list | grep -q "$REMOTE_NAME"; then
|
||||
echo "Uploading package to $REMOTE_NAME..."
|
||||
conan upload $PACKAGE_NAME/$PACKAGE_VERSION --remote=$REMOTE_NAME --all
|
||||
echo "Package uploaded to $REMOTE_NAME successfully!"
|
||||
else
|
||||
echo "Remote '$REMOTE_NAME' not configured. To upload the package:"
|
||||
echo "1. Add the remote: conan remote add $REMOTE_NAME <cimmeria-url>"
|
||||
echo "2. Login: conan remote login $REMOTE_NAME <username>"
|
||||
echo "3. Upload: conan upload $PACKAGE_NAME/$PACKAGE_VERSION --remote=$REMOTE_NAME --all"
|
||||
fi
|
14
sonar-project.properties
Normal file
14
sonar-project.properties
Normal file
@@ -0,0 +1,14 @@
|
||||
sonar.projectKey=rmontanana_mdlp
|
||||
sonar.organization=rmontanana
|
||||
|
||||
# This is the name and version displayed in the SonarCloud UI.
|
||||
sonar.projectName=mdlp
|
||||
sonar.projectVersion=2.0.1
|
||||
# sonar.test.exclusions=tests/**
|
||||
# sonar.tests=tests/
|
||||
# sonar.coverage.exclusions=tests/**,sample/**
|
||||
# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.
|
||||
#sonar.sources=.
|
||||
|
||||
# Encoding of the source code. Default is default system encoding
|
||||
sonar.sourceEncoding=UTF-8
|
125
src/BinDisc.cpp
Normal file
125
src/BinDisc.cpp
Normal file
@@ -0,0 +1,125 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include "BinDisc.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
BinDisc::BinDisc(int n_bins, strategy_t strategy) :
|
||||
Discretizer(), n_bins{ n_bins }, strategy{ strategy }
|
||||
{
|
||||
if (n_bins < 3) {
|
||||
throw std::invalid_argument("n_bins must be greater than 2");
|
||||
}
|
||||
}
|
||||
BinDisc::~BinDisc() = default;
|
||||
void BinDisc::fit(samples_t& X)
|
||||
{
|
||||
// Input validation
|
||||
if (X.empty()) {
|
||||
throw std::invalid_argument("Input data X cannot be empty");
|
||||
}
|
||||
if (X.size() < static_cast<size_t>(n_bins)) {
|
||||
throw std::invalid_argument("Input data size must be at least equal to n_bins");
|
||||
}
|
||||
|
||||
cutPoints.clear();
|
||||
if (strategy == strategy_t::QUANTILE) {
|
||||
direction = bound_dir_t::RIGHT;
|
||||
fit_quantile(X);
|
||||
} else if (strategy == strategy_t::UNIFORM) {
|
||||
direction = bound_dir_t::RIGHT;
|
||||
fit_uniform(X);
|
||||
}
|
||||
}
|
||||
void BinDisc::fit(samples_t& X, labels_t& y)
|
||||
{
|
||||
if (X.empty()) {
|
||||
throw std::invalid_argument("X cannot be empty");
|
||||
}
|
||||
|
||||
// BinDisc is inherently unsupervised, but we validate inputs for consistency
|
||||
// Note: y parameter is validated but not used in binning strategy
|
||||
fit(X);
|
||||
}
|
||||
std::vector<precision_t> BinDisc::linspace(precision_t start, precision_t end, int num)
|
||||
{
|
||||
// Input validation
|
||||
if (num < 2) {
|
||||
throw std::invalid_argument("Number of points must be at least 2 for linspace");
|
||||
}
|
||||
if (std::isnan(start) || std::isnan(end)) {
|
||||
throw std::invalid_argument("Start and end values cannot be NaN");
|
||||
}
|
||||
if (std::isinf(start) || std::isinf(end)) {
|
||||
throw std::invalid_argument("Start and end values cannot be infinite");
|
||||
}
|
||||
|
||||
if (start == end) {
|
||||
return { start, end };
|
||||
}
|
||||
precision_t delta = (end - start) / static_cast<precision_t>(num - 1);
|
||||
std::vector<precision_t> linspc;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
precision_t val = start + delta * static_cast<precision_t>(i);
|
||||
linspc.push_back(val);
|
||||
}
|
||||
return linspc;
|
||||
}
|
||||
size_t clip(const size_t n, const size_t lower, const size_t upper)
|
||||
{
|
||||
return std::max(lower, std::min(n, upper));
|
||||
}
|
||||
std::vector<precision_t> BinDisc::percentile(samples_t& data, const std::vector<precision_t>& percentiles)
|
||||
{
|
||||
// Input validation
|
||||
if (data.empty()) {
|
||||
throw std::invalid_argument("Data cannot be empty for percentile calculation");
|
||||
}
|
||||
if (percentiles.empty()) {
|
||||
throw std::invalid_argument("Percentiles cannot be empty");
|
||||
}
|
||||
|
||||
// Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html
|
||||
std::vector<precision_t> results;
|
||||
bool first = true;
|
||||
results.reserve(percentiles.size());
|
||||
for (auto percentile : percentiles) {
|
||||
const auto i = static_cast<size_t>(std::floor(static_cast<precision_t>(data.size() - 1) * percentile / 100.));
|
||||
const auto indexLower = clip(i, 0, data.size() - 2);
|
||||
const precision_t percentI = static_cast<precision_t>(indexLower) / static_cast<precision_t>(data.size() - 1);
|
||||
const precision_t fraction =
|
||||
(percentile / 100.0 - percentI) /
|
||||
(static_cast<precision_t>(indexLower + 1) / static_cast<precision_t>(data.size() - 1) - percentI);
|
||||
if (const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; value != results.back() || first) // first needed as results.back() return is undefined for empty vectors
|
||||
results.push_back(value);
|
||||
first = false;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
void BinDisc::fit_quantile(const samples_t& X)
|
||||
{
|
||||
auto quantiles = linspace(0.0, 100.0, n_bins + 1);
|
||||
auto data = X;
|
||||
std::sort(data.begin(), data.end());
|
||||
if (data.front() == data.back() || data.size() == 1) {
|
||||
// if X is constant, pass any two given points that shall be ignored in transform
|
||||
cutPoints.push_back(data.front());
|
||||
cutPoints.push_back(data.front());
|
||||
return;
|
||||
}
|
||||
cutPoints = percentile(data, quantiles);
|
||||
}
|
||||
void BinDisc::fit_uniform(const samples_t& X)
|
||||
{
|
||||
auto [vmin, vmax] = std::minmax_element(X.begin(), X.end());
|
||||
cutPoints = linspace(*vmin, *vmax, n_bins + 1);
|
||||
}
|
||||
}
|
36
src/BinDisc.h
Normal file
36
src/BinDisc.h
Normal file
@@ -0,0 +1,36 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef BINDISC_H
|
||||
#define BINDISC_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
#include "Discretizer.h"
|
||||
#include <string>
|
||||
|
||||
namespace mdlp {
|
||||
enum class strategy_t {
|
||||
UNIFORM,
|
||||
QUANTILE
|
||||
};
|
||||
class BinDisc : public Discretizer {
|
||||
public:
|
||||
BinDisc(int n_bins = 3, strategy_t strategy = strategy_t::UNIFORM);
|
||||
~BinDisc();
|
||||
// y is included for compatibility with the Discretizer interface
|
||||
void fit(samples_t& X_, labels_t& y) override;
|
||||
void fit(samples_t& X);
|
||||
protected:
|
||||
std::vector<precision_t> linspace(precision_t start, precision_t end, int num);
|
||||
std::vector<precision_t> percentile(samples_t& data, const std::vector<precision_t>& percentiles);
|
||||
private:
|
||||
void fit_uniform(const samples_t&);
|
||||
void fit_quantile(const samples_t&);
|
||||
int n_bins;
|
||||
strategy_t strategy;
|
||||
};
|
||||
}
|
||||
#endif
|
237
src/CPPFImdlp.cpp
Normal file
237
src/CPPFImdlp.cpp
Normal file
@@ -0,0 +1,237 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include <stdexcept>
|
||||
#include "CPPFImdlp.h"
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) :
|
||||
Discretizer(),
|
||||
min_length(min_length_),
|
||||
max_depth(max_depth_),
|
||||
proposed_cuts(proposed)
|
||||
{
|
||||
// Input validation for constructor parameters
|
||||
if (min_length_ < 3) {
|
||||
throw std::invalid_argument("min_length must be greater than 2");
|
||||
}
|
||||
if (max_depth_ < 1) {
|
||||
throw std::invalid_argument("max_depth must be greater than 0");
|
||||
}
|
||||
if (proposed < 0.0f) {
|
||||
throw std::invalid_argument("proposed_cuts must be non-negative");
|
||||
}
|
||||
|
||||
direction = bound_dir_t::RIGHT;
|
||||
}
|
||||
|
||||
size_t CPPFImdlp::compute_max_num_cut_points() const
|
||||
{
|
||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||
if (proposed_cuts == 0) {
|
||||
return numeric_limits<size_t>::max();
|
||||
}
|
||||
if (proposed_cuts > static_cast<precision_t>(X.size())) {
|
||||
throw invalid_argument("wrong proposed num_cuts value");
|
||||
}
|
||||
if (proposed_cuts < 1)
|
||||
return static_cast<size_t>(round(static_cast<precision_t>(X.size()) * proposed_cuts));
|
||||
return static_cast<size_t>(proposed_cuts); // The 2 extra cutpoints should not be considered here as this parameter is considered before they are added
|
||||
}
|
||||
|
||||
void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
X = X_;
|
||||
y = y_;
|
||||
num_cut_points = compute_max_num_cut_points();
|
||||
depth = 0;
|
||||
discretizedData.clear();
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw std::invalid_argument("X and y must have the same size: " + std::to_string(X.size()) + " != " + std::to_string(y.size()));
|
||||
}
|
||||
if (X.empty() || y.empty()) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
indices = sortIndices(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
computeCutPoints(0, X.size(), 1);
|
||||
sort(cutPoints.begin(), cutPoints.end());
|
||||
if (num_cut_points > 0) {
|
||||
// Select the best (with lower entropy) cut points
|
||||
while (cutPoints.size() > num_cut_points) {
|
||||
resizeCutPoints();
|
||||
}
|
||||
}
|
||||
// Insert first & last X value to the cutpoints as them shall be ignored in transform
|
||||
auto [vmin, vmax] = std::minmax_element(X.begin(), X.end());
|
||||
cutPoints.push_back(*vmax);
|
||||
cutPoints.insert(cutPoints.begin(), *vmin);
|
||||
}
|
||||
|
||||
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t n;
|
||||
size_t m;
|
||||
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
|
||||
size_t idxNext = cut + 1 < end ? cut + 1 : cut;
|
||||
bool backWall; // true if duplicates reach beginning of the interval
|
||||
precision_t previous;
|
||||
precision_t actual;
|
||||
precision_t next;
|
||||
previous = safe_X_access(idxPrev);
|
||||
actual = safe_X_access(cut);
|
||||
next = safe_X_access(idxNext);
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
// get the first equal value of X in the interval
|
||||
while (idxPrev > start && actual == previous) {
|
||||
--idxPrev;
|
||||
previous = safe_X_access(idxPrev);
|
||||
}
|
||||
backWall = idxPrev == start && actual == previous;
|
||||
// get the last equal value of X in the interval
|
||||
while (idxNext < end - 1 && actual == next) {
|
||||
++idxNext;
|
||||
next = safe_X_access(idxNext);
|
||||
}
|
||||
// # of duplicates before cutpoint
|
||||
n = safe_subtract(safe_subtract(cut, 1), idxPrev);
|
||||
// # of duplicates after cutpoint
|
||||
m = idxNext - cut - 1;
|
||||
// Decide which values to use
|
||||
if (backWall) {
|
||||
m = int(idxNext - cut - 1) < 0 ? 0 : m; // Ensure m right
|
||||
cut = cut + m + 1;
|
||||
} else {
|
||||
cut = safe_subtract(cut, n);
|
||||
}
|
||||
actual = safe_X_access(cut);
|
||||
return { (actual + previous) / 2, cut };
|
||||
}
|
||||
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
|
||||
{
|
||||
size_t cut;
|
||||
pair<precision_t, size_t> result;
|
||||
// Check if the interval length and the depth are Ok
|
||||
if (end < start || safe_subtract(end, start) < min_length || depth_ > max_depth)
|
||||
return;
|
||||
depth = depth_ > depth ? depth_ : depth;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
result = valueCutPoint(start, cut, end);
|
||||
cut = result.second;
|
||||
cutPoints.push_back(result.first);
|
||||
computeCutPoints(start, cut, depth_ + 1);
|
||||
computeCutPoints(cut, end, depth_ + 1);
|
||||
}
|
||||
}
|
||||
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amongst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max();
|
||||
size_t elements = safe_subtract(end, start);
|
||||
bool sameValues = true;
|
||||
precision_t entropy_left;
|
||||
precision_t entropy_right;
|
||||
precision_t minEntropy;
|
||||
// Check if all the values of the variable in the interval are the same
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
if (safe_X_access(idx) != safe_X_access(start)) {
|
||||
sameValues = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sameValues)
|
||||
return candidate;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (safe_y_access(idx) == safe_y_access(idx - 1))
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / static_cast<precision_t>(elements) * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / static_cast<precision_t>(elements) * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k;
|
||||
int k1;
|
||||
int k2;
|
||||
precision_t ig;
|
||||
precision_t delta;
|
||||
precision_t ent;
|
||||
precision_t ent1;
|
||||
precision_t ent2;
|
||||
auto N = precision_t(safe_subtract(end, start));
|
||||
k = metrics.computeNumClasses(start, end);
|
||||
k1 = metrics.computeNumClasses(start, cut);
|
||||
k2 = metrics.computeNumClasses(cut, end);
|
||||
ent = metrics.entropy(start, end);
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
||||
if (i1 >= X_.size() || i2 >= X_.size() || i1 >= y_.size() || i2 >= y_.size()) {
|
||||
throw std::out_of_range("Index out of bounds in sort comparison");
|
||||
}
|
||||
if (X_[i1] == X_[i2])
|
||||
return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
void CPPFImdlp::resizeCutPoints()
|
||||
{
|
||||
//Compute entropy of each of the whole cutpoint set and discards the biggest value
|
||||
precision_t maxEntropy = 0;
|
||||
precision_t entropy;
|
||||
size_t maxEntropyIdx = 0;
|
||||
size_t begin = 0;
|
||||
size_t end;
|
||||
for (size_t idx = 0; idx < cutPoints.size(); idx++) {
|
||||
end = begin;
|
||||
while (end < indices.size() && safe_X_access(end) < cutPoints[idx] && end < X.size())
|
||||
end++;
|
||||
entropy = metrics.entropy(begin, end);
|
||||
if (entropy > maxEntropy) {
|
||||
maxEntropy = entropy;
|
||||
maxEntropyIdx = idx;
|
||||
}
|
||||
begin = end;
|
||||
}
|
||||
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
||||
}
|
||||
|
||||
}
|
73
src/CPPFImdlp.h
Normal file
73
src/CPPFImdlp.h
Normal file
@@ -0,0 +1,73 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include "Metrics.h"
|
||||
#include "Discretizer.h"
|
||||
|
||||
namespace mdlp {
|
||||
class CPPFImdlp : public Discretizer {
|
||||
public:
|
||||
CPPFImdlp() = default;
|
||||
CPPFImdlp(size_t min_length_, int max_depth_, float proposed);
|
||||
virtual ~CPPFImdlp() = default;
|
||||
void fit(samples_t& X_, labels_t& y_) override;
|
||||
inline int get_depth() const { return depth; };
|
||||
protected:
|
||||
size_t min_length = 3;
|
||||
int depth = 0;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
float proposed_cuts = 0;
|
||||
indices_t indices = indices_t();
|
||||
samples_t X = samples_t();
|
||||
labels_t y = labels_t();
|
||||
Metrics metrics = Metrics(y, indices);
|
||||
size_t num_cut_points = numeric_limits<size_t>::max();
|
||||
static indices_t sortIndices(samples_t&, labels_t&);
|
||||
void computeCutPoints(size_t, size_t, int);
|
||||
void resizeCutPoints();
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
size_t getCandidate(size_t, size_t);
|
||||
size_t compute_max_num_cut_points() const;
|
||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||
inline precision_t safe_X_access(size_t idx) const
|
||||
{
|
||||
if (idx >= indices.size()) {
|
||||
throw std::out_of_range("Index out of bounds for indices array");
|
||||
}
|
||||
size_t real_idx = indices[idx];
|
||||
if (real_idx >= X.size()) {
|
||||
throw std::out_of_range("Index out of bounds for X array");
|
||||
}
|
||||
return X[real_idx];
|
||||
}
|
||||
inline label_t safe_y_access(size_t idx) const
|
||||
{
|
||||
if (idx >= indices.size()) {
|
||||
throw std::out_of_range("Index out of bounds for indices array");
|
||||
}
|
||||
size_t real_idx = indices[idx];
|
||||
if (real_idx >= y.size()) {
|
||||
throw std::out_of_range("Index out of bounds for y array");
|
||||
}
|
||||
return y[real_idx];
|
||||
}
|
||||
inline size_t safe_subtract(size_t a, size_t b) const
|
||||
{
|
||||
if (b > a) {
|
||||
throw std::underflow_error("Subtraction would cause underflow");
|
||||
}
|
||||
return a - b;
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
107
src/Discretizer.cpp
Normal file
107
src/Discretizer.cpp
Normal file
@@ -0,0 +1,107 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include "Discretizer.h"
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
labels_t& Discretizer::transform(const samples_t& data)
|
||||
{
|
||||
// Input validation
|
||||
if (data.empty()) {
|
||||
throw std::invalid_argument("Data for transformation cannot be empty");
|
||||
}
|
||||
if (cutPoints.size() < 2) {
|
||||
throw std::runtime_error("Discretizer not fitted yet or no valid cut points found");
|
||||
}
|
||||
|
||||
discretizedData.clear();
|
||||
discretizedData.reserve(data.size());
|
||||
// CutPoints always have at least two items
|
||||
// Have to ignore first and last cut points provided
|
||||
auto first = cutPoints.begin() + 1;
|
||||
auto last = cutPoints.end() - 1;
|
||||
auto bound = direction == bound_dir_t::LEFT ? std::lower_bound<std::vector<precision_t>::iterator, precision_t> : std::upper_bound<std::vector<precision_t>::iterator, precision_t>;
|
||||
for (const precision_t& item : data) {
|
||||
auto pos = bound(first, last, item);
|
||||
auto number = pos - first;
|
||||
discretizedData.push_back(static_cast<label_t>(number));
|
||||
}
|
||||
return discretizedData;
|
||||
}
|
||||
labels_t& Discretizer::fit_transform(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
fit(X_, y_);
|
||||
return transform(X_);
|
||||
}
|
||||
void Discretizer::fit_t(const torch::Tensor& X_, const torch::Tensor& y_)
|
||||
{
|
||||
// Validate tensor properties for security
|
||||
if (X_.sizes().size() != 1 || y_.sizes().size() != 1) {
|
||||
throw std::invalid_argument("Only 1D tensors supported");
|
||||
}
|
||||
if (X_.dtype() != torch::kFloat32) {
|
||||
throw std::invalid_argument("X tensor must be Float32 type");
|
||||
}
|
||||
if (y_.dtype() != torch::kInt32) {
|
||||
throw std::invalid_argument("y tensor must be Int32 type");
|
||||
}
|
||||
if (X_.numel() != y_.numel()) {
|
||||
throw std::invalid_argument("X and y tensors must have same number of elements");
|
||||
}
|
||||
if (X_.numel() == 0) {
|
||||
throw std::invalid_argument("Tensors cannot be empty");
|
||||
}
|
||||
|
||||
auto num_elements = X_.numel();
|
||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||
fit(X, y);
|
||||
}
|
||||
torch::Tensor Discretizer::transform_t(const torch::Tensor& X_)
|
||||
{
|
||||
// Validate tensor properties for security
|
||||
if (X_.sizes().size() != 1) {
|
||||
throw std::invalid_argument("Only 1D tensors supported");
|
||||
}
|
||||
if (X_.dtype() != torch::kFloat32) {
|
||||
throw std::invalid_argument("X tensor must be Float32 type");
|
||||
}
|
||||
if (X_.numel() == 0) {
|
||||
throw std::invalid_argument("Tensor cannot be empty");
|
||||
}
|
||||
|
||||
auto num_elements = X_.numel();
|
||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||
auto result = transform(X);
|
||||
return torch::tensor(result, torch_label_t);
|
||||
}
|
||||
torch::Tensor Discretizer::fit_transform_t(const torch::Tensor& X_, const torch::Tensor& y_)
|
||||
{
|
||||
// Validate tensor properties for security
|
||||
if (X_.sizes().size() != 1 || y_.sizes().size() != 1) {
|
||||
throw std::invalid_argument("Only 1D tensors supported");
|
||||
}
|
||||
if (X_.dtype() != torch::kFloat32) {
|
||||
throw std::invalid_argument("X tensor must be Float32 type");
|
||||
}
|
||||
if (y_.dtype() != torch::kInt32) {
|
||||
throw std::invalid_argument("y tensor must be Int32 type");
|
||||
}
|
||||
if (X_.numel() != y_.numel()) {
|
||||
throw std::invalid_argument("X and y tensors must have same number of elements");
|
||||
}
|
||||
if (X_.numel() == 0) {
|
||||
throw std::invalid_argument("Tensors cannot be empty");
|
||||
}
|
||||
|
||||
auto num_elements = X_.numel();
|
||||
samples_t X(X_.data_ptr<precision_t>(), X_.data_ptr<precision_t>() + num_elements);
|
||||
labels_t y(y_.data_ptr<int>(), y_.data_ptr<int>() + num_elements);
|
||||
auto result = fit_transform(X, y);
|
||||
return torch::tensor(result, torch_label_t);
|
||||
}
|
||||
}
|
40
src/Discretizer.h
Normal file
40
src/Discretizer.h
Normal file
@@ -0,0 +1,40 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef DISCRETIZER_H
|
||||
#define DISCRETIZER_H
|
||||
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "typesFImdlp.h"
|
||||
#include <torch/torch.h>
|
||||
#include "config.h"
|
||||
|
||||
namespace mdlp {
|
||||
enum class bound_dir_t {
|
||||
LEFT,
|
||||
RIGHT
|
||||
};
|
||||
const auto torch_label_t = torch::kInt32;
|
||||
class Discretizer {
|
||||
public:
|
||||
Discretizer() = default;
|
||||
virtual ~Discretizer() = default;
|
||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
||||
virtual void fit(samples_t& X_, labels_t& y_) = 0;
|
||||
labels_t& transform(const samples_t& data);
|
||||
labels_t& fit_transform(samples_t& X_, labels_t& y_);
|
||||
void fit_t(const torch::Tensor& X_, const torch::Tensor& y_);
|
||||
torch::Tensor transform_t(const torch::Tensor& X_);
|
||||
torch::Tensor fit_transform_t(const torch::Tensor& X_, const torch::Tensor& y_);
|
||||
static inline std::string version() { return { project_mdlp_version.begin(), project_mdlp_version.end() }; };
|
||||
protected:
|
||||
labels_t discretizedData = labels_t();
|
||||
cutPoints_t cutPoints; // At least two cutpoints must be provided, the first and the last will be ignored in transform
|
||||
bound_dir_t direction; // used in transform
|
||||
};
|
||||
}
|
||||
#endif
|
112
src/Metrics.cpp
Normal file
112
src/Metrics.cpp
Normal file
@@ -0,0 +1,112 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
Metrics::Metrics(labels_t& y_, indices_t& indices_) : y(y_), indices(indices_),
|
||||
numClasses(computeNumClasses(0, indices_.size()))
|
||||
{
|
||||
}
|
||||
|
||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||
{
|
||||
set<int> nClasses;
|
||||
for (auto i = start; i < end; ++i) {
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
return static_cast<int>(nClasses.size());
|
||||
}
|
||||
|
||||
void Metrics::setData(const labels_t& y_, const indices_t& indices_)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||
indices = indices_;
|
||||
y = y_;
|
||||
numClasses = computeNumClasses(0, indices.size());
|
||||
entropyCache.clear();
|
||||
igCache.clear();
|
||||
}
|
||||
|
||||
precision_t Metrics::entropy(size_t start, size_t end)
|
||||
{
|
||||
if (end - start < 2)
|
||||
return 0;
|
||||
|
||||
// Check cache first with read lock
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||
if (entropyCache.find({ start, end }) != entropyCache.end()) {
|
||||
return entropyCache[{start, end}];
|
||||
}
|
||||
}
|
||||
|
||||
// Compute entropy outside of lock
|
||||
precision_t p;
|
||||
precision_t ventropy = 0;
|
||||
int nElements = 0;
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
nElements++;
|
||||
}
|
||||
for (auto count : counts) {
|
||||
if (count > 0) {
|
||||
p = static_cast<precision_t>(count) / static_cast<precision_t>(nElements);
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
|
||||
// Update cache with write lock
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||
entropyCache[{start, end}] = ventropy;
|
||||
}
|
||||
|
||||
return ventropy;
|
||||
}
|
||||
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
// Check cache first with read lock
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||
return igCache[make_tuple(start, cut, end)];
|
||||
}
|
||||
}
|
||||
|
||||
// Compute information gain outside of lock
|
||||
precision_t iGain;
|
||||
precision_t entropyInterval;
|
||||
precision_t entropyLeft;
|
||||
precision_t entropyRight;
|
||||
size_t nElementsLeft = cut - start;
|
||||
size_t nElementsRight = end - cut;
|
||||
size_t nElements = end - start;
|
||||
|
||||
entropyInterval = entropy(start, end);
|
||||
entropyLeft = entropy(start, cut);
|
||||
entropyRight = entropy(cut, end);
|
||||
iGain = entropyInterval -
|
||||
(static_cast<precision_t>(nElementsLeft) * entropyLeft +
|
||||
static_cast<precision_t>(nElementsRight) * entropyRight) /
|
||||
static_cast<precision_t>(nElements);
|
||||
|
||||
// Update cache with write lock
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||
igCache[make_tuple(start, cut, end)] = iGain;
|
||||
}
|
||||
|
||||
return iGain;
|
||||
}
|
||||
|
||||
}
|
30
src/Metrics.h
Normal file
30
src/Metrics.h
Normal file
@@ -0,0 +1,30 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
#include <mutex>
|
||||
|
||||
namespace mdlp {
|
||||
class Metrics {
|
||||
protected:
|
||||
labels_t& y;
|
||||
indices_t& indices;
|
||||
int numClasses;
|
||||
mutable std::mutex cache_mutex;
|
||||
cacheEnt_t entropyCache = cacheEnt_t();
|
||||
cacheIg_t igCache = cacheIg_t();
|
||||
public:
|
||||
Metrics(labels_t&, indices_t&);
|
||||
void setData(const labels_t&, const indices_t&);
|
||||
int computeNumClasses(size_t, size_t);
|
||||
precision_t entropy(size_t, size_t);
|
||||
precision_t informationGain(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
#endif
|
25
src/typesFImdlp.h
Normal file
25
src/typesFImdlp.h
Normal file
@@ -0,0 +1,25 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
typedef float precision_t;
|
||||
typedef int label_t;
|
||||
typedef std::vector<precision_t> samples_t;
|
||||
typedef std::vector<label_t> labels_t;
|
||||
typedef std::vector<size_t> indices_t;
|
||||
typedef std::vector<precision_t> cutPoints_t;
|
||||
typedef std::map<std::pair<int, int>, precision_t> cacheEnt_t;
|
||||
typedef std::map<std::tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
9
test_consumer/CMakeLists.txt
Normal file
9
test_consumer/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(test_fimdlp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
find_package(fimdlp REQUIRED)
|
||||
|
||||
add_executable(test_fimdlp test_fimdlp.cpp)
|
||||
target_link_libraries(test_fimdlp fimdlp::fimdlp)
|
9
test_consumer/CMakeUserPresets.json
Normal file
9
test_consumer/CMakeUserPresets.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"version": 4,
|
||||
"vendor": {
|
||||
"conan": {}
|
||||
},
|
||||
"include": [
|
||||
"build/Release/generators/CMakePresets.json"
|
||||
]
|
||||
}
|
9
test_consumer/conanfile.txt
Normal file
9
test_consumer/conanfile.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
[requires]
|
||||
fimdlp/2.0.1
|
||||
|
||||
[generators]
|
||||
CMakeDeps
|
||||
CMakeToolchain
|
||||
|
||||
[layout]
|
||||
cmake_layout
|
39
test_consumer/test_fimdlp.cpp
Normal file
39
test_consumer/test_fimdlp.cpp
Normal file
@@ -0,0 +1,39 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
#include <fimdlp/BinDisc.h>
|
||||
|
||||
int main() {
|
||||
std::cout << "Testing FIMDLP package..." << std::endl;
|
||||
|
||||
// Test data - simple continuous values with binary classification
|
||||
mdlp::samples_t data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
|
||||
mdlp::labels_t labels = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1};
|
||||
|
||||
std::cout << "Created test data with " << data.size() << " samples" << std::endl;
|
||||
|
||||
// Test MDLP discretizer
|
||||
mdlp::CPPFImdlp discretizer;
|
||||
discretizer.fit(data, labels);
|
||||
|
||||
auto cut_points = discretizer.getCutPoints();
|
||||
std::cout << "MDLP found " << cut_points.size() << " cut points" << std::endl;
|
||||
|
||||
for (size_t i = 0; i < cut_points.size(); ++i) {
|
||||
std::cout << "Cut point " << i << ": " << cut_points[i] << std::endl;
|
||||
}
|
||||
|
||||
// Test BinDisc discretizer
|
||||
mdlp::BinDisc bin_discretizer(3, mdlp::strategy_t::UNIFORM); // 3 bins, uniform strategy
|
||||
bin_discretizer.fit(data, labels);
|
||||
|
||||
auto bin_cut_points = bin_discretizer.getCutPoints();
|
||||
std::cout << "BinDisc found " << bin_cut_points.size() << " cut points" << std::endl;
|
||||
|
||||
for (size_t i = 0; i < bin_cut_points.size(); ++i) {
|
||||
std::cout << "Bin cut point " << i << ": " << bin_cut_points[i] << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "FIMDLP package test completed successfully!" << std::endl;
|
||||
return 0;
|
||||
}
|
9
test_package/CMakeLists.txt
Normal file
9
test_package/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(test_fimdlp)
|
||||
|
||||
find_package(fimdlp REQUIRED)
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
add_executable(test_fimdlp src/test_fimdlp.cpp)
|
||||
target_link_libraries(test_fimdlp fimdlp::fimdlp torch::torch)
|
||||
target_compile_features(test_fimdlp PRIVATE cxx_std_17)
|
10
test_package/CMakeUserPresets.json
Normal file
10
test_package/CMakeUserPresets.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"version": 4,
|
||||
"vendor": {
|
||||
"conan": {}
|
||||
},
|
||||
"include": [
|
||||
"build/gcc-14-x86_64-gnu17-release/generators/CMakePresets.json",
|
||||
"build/gcc-14-x86_64-gnu17-debug/generators/CMakePresets.json"
|
||||
]
|
||||
}
|
28
test_package/conanfile.py
Normal file
28
test_package/conanfile.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import os
|
||||
from conan import ConanFile
|
||||
from conan.tools.cmake import CMake, cmake_layout
|
||||
from conan.tools.build import can_run
|
||||
|
||||
|
||||
class FimdlpTestConan(ConanFile):
|
||||
settings = "os", "compiler", "build_type", "arch"
|
||||
# VirtualBuildEnv and VirtualRunEnv can be avoided if "tools.env:CONAN_RUN_TESTS" is false
|
||||
generators = "CMakeDeps", "CMakeToolchain", "VirtualRunEnv"
|
||||
apply_env = False # avoid the default VirtualBuildEnv from the base class
|
||||
test_type = "explicit"
|
||||
|
||||
def requirements(self):
|
||||
self.requires(self.tested_reference_str)
|
||||
|
||||
def layout(self):
|
||||
cmake_layout(self)
|
||||
|
||||
def build(self):
|
||||
cmake = CMake(self)
|
||||
cmake.configure()
|
||||
cmake.build()
|
||||
|
||||
def test(self):
|
||||
if can_run(self):
|
||||
cmd = os.path.join(self.cpp.build.bindir, "test_fimdlp")
|
||||
self.run(cmd, env="conanrun")
|
27
test_package/src/test_fimdlp.cpp
Normal file
27
test_package/src/test_fimdlp.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
#include <fimdlp/Metrics.h>
|
||||
|
||||
int main() {
|
||||
std::cout << "Testing fimdlp library..." << std::endl;
|
||||
|
||||
// Simple test of the library
|
||||
try {
|
||||
// Test Metrics class
|
||||
Metrics metrics;
|
||||
std::vector<int> labels = {0, 0, 1, 1, 0, 1};
|
||||
double entropy = metrics.entropy(labels);
|
||||
std::cout << "Entropy calculated: " << entropy << std::endl;
|
||||
|
||||
// Test CPPFImdlp creation
|
||||
CPPFImdlp discretizer;
|
||||
std::cout << "CPPFImdlp instance created successfully" << std::endl;
|
||||
|
||||
std::cout << "fimdlp library test completed successfully!" << std::endl;
|
||||
return 0;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Error testing fimdlp library: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
@@ -1,116 +0,0 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
ArffFiles::ArffFiles()
|
||||
{
|
||||
}
|
||||
vector<string> ArffFiles::getLines()
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
unsigned long int ArffFiles::getSize()
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
vector<tuple<string, string>> ArffFiles::getAttributes()
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
string ArffFiles::getClassName()
|
||||
{
|
||||
return className;
|
||||
}
|
||||
string ArffFiles::getClassType()
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
vector<vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
void ArffFiles::load(string fileName, bool classLast)
|
||||
{
|
||||
ifstream file(fileName);
|
||||
string keyword, attribute, type;
|
||||
if (file.is_open()) {
|
||||
string line;
|
||||
while (getline(file, line)) {
|
||||
if (line[0] == '%' || line.empty() || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
|
||||
stringstream ss(line);
|
||||
ss >> keyword >> attribute >> type;
|
||||
attributes.push_back(make_tuple(attribute, type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw invalid_argument("No attributes found");
|
||||
if (classLast) {
|
||||
className = get<0>(attributes.back());
|
||||
classType = get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
} else {
|
||||
className = get<0>(attributes.front());
|
||||
classType = get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
}
|
||||
generateDataset(classLast);
|
||||
} else
|
||||
throw invalid_argument("Unable to open file");
|
||||
}
|
||||
void ArffFiles::generateDataset(bool classLast)
|
||||
{
|
||||
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
|
||||
vector<string> yy = vector<string>(lines.size(), "");
|
||||
int labelIndex = classLast ? attributes.size() : 0;
|
||||
for (int i = 0; i < lines.size(); i++) {
|
||||
stringstream ss(lines[i]);
|
||||
string value;
|
||||
int pos = 0, xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
string ArffFiles::trim(const string& source)
|
||||
{
|
||||
string s(source);
|
||||
s.erase(0, s.find_first_not_of(" \n\r\t"));
|
||||
s.erase(s.find_last_not_of(" \n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
|
||||
{
|
||||
vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
map<string, int> labelMap;
|
||||
int i = 0;
|
||||
for (string label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
@@ -1,28 +0,0 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
using namespace std;
|
||||
class ArffFiles {
|
||||
private:
|
||||
vector<string> lines;
|
||||
vector<tuple<string, string>> attributes;
|
||||
string className, classType;
|
||||
vector<vector<float>> X;
|
||||
vector<int> y;
|
||||
void generateDataset(bool);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(string, bool = true);
|
||||
vector<string> getLines();
|
||||
unsigned long int getSize();
|
||||
string getClassName();
|
||||
string getClassType();
|
||||
string trim(const string&);
|
||||
vector<vector<float>>& getX();
|
||||
vector<int>& getY();
|
||||
vector<tuple<string, string>> getAttributes();
|
||||
vector<int> factorize(const vector<string>& labels_t);
|
||||
};
|
||||
#endif
|
473
tests/BinDisc_unittest.cpp
Normal file
473
tests/BinDisc_unittest.cpp
Normal file
@@ -0,0 +1,473 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "gtest/gtest.h"
|
||||
#include <ArffFiles.hpp>
|
||||
#include "BinDisc.h"
|
||||
#include "Experiments.hpp"
|
||||
#include <cmath>
|
||||
|
||||
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
|
||||
try { \
|
||||
stmt; \
|
||||
} catch (const etype& ex) { \
|
||||
EXPECT_EQ(whatstring, std::string(ex.what())); \
|
||||
throw; \
|
||||
} \
|
||||
, etype)
|
||||
|
||||
namespace mdlp {
|
||||
const float margin = 1e-4;
|
||||
static std::string set_data_path()
|
||||
{
|
||||
std::string path = "datasets/";
|
||||
std::ifstream file(path + "iris.arff");
|
||||
if (file.is_open()) {
|
||||
file.close();
|
||||
return path;
|
||||
}
|
||||
return "tests/datasets/";
|
||||
}
|
||||
const std::string data_path = set_data_path();
|
||||
class TestBinDisc3U : public BinDisc, public testing::Test {
|
||||
public:
|
||||
TestBinDisc3U(int n_bins = 3) : BinDisc(n_bins, strategy_t::UNIFORM) {};
|
||||
};
|
||||
class TestBinDisc3Q : public BinDisc, public testing::Test {
|
||||
public:
|
||||
TestBinDisc3Q(int n_bins = 3) : BinDisc(n_bins, strategy_t::QUANTILE) {};
|
||||
};
|
||||
class TestBinDisc4U : public BinDisc, public testing::Test {
|
||||
public:
|
||||
TestBinDisc4U(int n_bins = 4) : BinDisc(n_bins, strategy_t::UNIFORM) {};
|
||||
};
|
||||
class TestBinDisc4Q : public BinDisc, public testing::Test {
|
||||
public:
|
||||
TestBinDisc4Q(int n_bins = 4) : BinDisc(n_bins, strategy_t::QUANTILE) {};
|
||||
};
|
||||
TEST_F(TestBinDisc3U, Easy3BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||
auto y = labels_t();
|
||||
fit(X, y);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(3.66667, cuts.at(1), margin);
|
||||
EXPECT_NEAR(6.33333, cuts.at(2), margin);
|
||||
EXPECT_NEAR(9.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, Easy3BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts[0], margin);
|
||||
EXPECT_NEAR(3.666667, cuts[1], margin);
|
||||
EXPECT_NEAR(6.333333, cuts[2], margin);
|
||||
EXPECT_NEAR(9, cuts[3], margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, X10BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, X10BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, X11BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.33333, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.66667, cuts.at(2), margin);
|
||||
EXPECT_NEAR(11.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, X11BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.33333, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.66667, cuts.at(2), margin);
|
||||
EXPECT_NEAR(11.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, ConstantUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(2, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1, cuts.at(1), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, ConstantQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(2, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1, cuts.at(1), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 0, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, EmptyUniform)
|
||||
{
|
||||
samples_t X = {};
|
||||
EXPECT_THROW(fit(X), std::invalid_argument);
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, EmptyQuantile)
|
||||
{
|
||||
samples_t X = {};
|
||||
EXPECT_THROW(fit(X), std::invalid_argument);
|
||||
}
|
||||
TEST(TestBinDisc3, ExceptionNumberBins)
|
||||
{
|
||||
EXPECT_THROW(BinDisc(2), std::invalid_argument);
|
||||
}
|
||||
TEST_F(TestBinDisc3U, EasyRepeated)
|
||||
{
|
||||
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(4, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1.66667, cuts.at(1), margin);
|
||||
EXPECT_NEAR(2.33333, cuts.at(2), margin);
|
||||
EXPECT_NEAR(3.0, cuts.at(3), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||
}
|
||||
TEST_F(TestBinDisc3Q, EasyRepeated)
|
||||
{
|
||||
samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(3, cuts.size());
|
||||
EXPECT_NEAR(1, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1.66667, cuts.at(1), margin);
|
||||
EXPECT_NEAR(3.0, cuts.at(2), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
ASSERT_EQ(3.0, X[0]); // X is not modified
|
||||
}
|
||||
TEST_F(TestBinDisc4U, Easy4BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(3.75, cuts.at(1), margin);
|
||||
EXPECT_NEAR(6.5, cuts.at(2), margin);
|
||||
EXPECT_NEAR(9.25, cuts.at(3), margin);
|
||||
EXPECT_NEAR(12.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, Easy4BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(3.75, cuts.at(1), margin);
|
||||
EXPECT_NEAR(6.5, cuts.at(2), margin);
|
||||
EXPECT_NEAR(9.25, cuts.at(3), margin);
|
||||
EXPECT_NEAR(12.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4U, X13BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||
EXPECT_NEAR(13.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, X13BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.0, cuts.at(3), margin);
|
||||
EXPECT_NEAR(13.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4U, X14BinsUniform)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.25, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.5, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.75, cuts.at(3), margin);
|
||||
EXPECT_NEAR(14.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, X14BinsQuantile)
|
||||
{
|
||||
samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.25, cuts.at(1), margin);
|
||||
EXPECT_NEAR(7.5, cuts.at(2), margin);
|
||||
EXPECT_NEAR(10.75, cuts.at(3), margin);
|
||||
EXPECT_NEAR(14.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4U, X15BinsUniform)
|
||||
{
|
||||
samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.5, cuts.at(1), margin);
|
||||
EXPECT_NEAR(8, cuts.at(2), margin);
|
||||
EXPECT_NEAR(11.5, cuts.at(3), margin);
|
||||
EXPECT_NEAR(15.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, X15BinsQuantile)
|
||||
{
|
||||
samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 };
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(1.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(4.5, cuts.at(1), margin);
|
||||
EXPECT_NEAR(8, cuts.at(2), margin);
|
||||
EXPECT_NEAR(11.5, cuts.at(3), margin);
|
||||
EXPECT_NEAR(15.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4U, RepeatedValuesUniform)
|
||||
{
|
||||
samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 };
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(0.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(2.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(3.0, cuts.at(3), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST_F(TestBinDisc4Q, RepeatedValuesQuantile)
|
||||
{
|
||||
samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 };
|
||||
// 0 1 2 3 4 5 6 7 8 9
|
||||
fit(X);
|
||||
auto cuts = getCutPoints();
|
||||
ASSERT_EQ(5, cuts.size());
|
||||
EXPECT_NEAR(0.0, cuts.at(0), margin);
|
||||
EXPECT_NEAR(1.0, cuts.at(1), margin);
|
||||
EXPECT_NEAR(2.0, cuts.at(2), margin);
|
||||
EXPECT_NEAR(3.0, cuts.at(3), margin);
|
||||
EXPECT_NEAR(4.0, cuts.at(4), margin);
|
||||
auto labels = transform(X);
|
||||
labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||
EXPECT_EQ(expected, labels);
|
||||
}
|
||||
TEST(TestBinDiscGeneric, Fileset)
|
||||
{
|
||||
Experiments exps(data_path + "tests.txt");
|
||||
int num = 0;
|
||||
while (exps.is_next()) {
|
||||
++num;
|
||||
Experiment exp = exps.next();
|
||||
BinDisc disc(exp.n_bins_, exp.strategy_[0] == 'Q' ? strategy_t::QUANTILE : strategy_t::UNIFORM);
|
||||
std::vector<precision_t> test;
|
||||
if (exp.type_ == experiment_t::RANGE) {
|
||||
for (float i = exp.from_; i < exp.to_; i += exp.step_) {
|
||||
test.push_back(i);
|
||||
}
|
||||
} else {
|
||||
test = exp.dataset_;
|
||||
}
|
||||
// show_vector(test, "Test");
|
||||
auto empty = std::vector<int>();
|
||||
auto Xt = disc.fit_transform(test, empty);
|
||||
auto cuts = disc.getCutPoints();
|
||||
EXPECT_EQ(exp.discretized_data_.size(), Xt.size());
|
||||
auto flag = false;
|
||||
size_t n_errors = 0;
|
||||
if (num < 40) {
|
||||
//
|
||||
// Check discretization of only the first 40 tests as after we cannot ensure the same codification due to precision problems
|
||||
//
|
||||
for (int i = 0; i < exp.discretized_data_.size(); ++i) {
|
||||
if (exp.discretized_data_.at(i) != Xt.at(i)) {
|
||||
if (!flag) {
|
||||
if (exp.type_ == experiment_t::RANGE)
|
||||
std::cout << "+Exp #: " << num << " From: " << exp.from_ << " To: " << exp.to_ << " Step: " << exp.step_ << " Bins: " << exp.n_bins_ << " Strategy: " << exp.strategy_ << std::endl;
|
||||
else {
|
||||
std::cout << "+Exp #: " << num << " strategy: " << exp.strategy_ << " " << " n_bins: " << exp.n_bins_ << " ";
|
||||
show_vector(exp.dataset_, "Dataset");
|
||||
}
|
||||
show_vector(cuts, "Cuts");
|
||||
std::cout << "Error at " << i << " test[i]=" << test.at(i) << " Expected: " << exp.discretized_data_.at(i) << " Got: " << Xt.at(i) << std::endl;
|
||||
flag = true;
|
||||
EXPECT_EQ(exp.discretized_data_.at(i), Xt.at(i));
|
||||
}
|
||||
n_errors++;
|
||||
}
|
||||
}
|
||||
if (flag) {
|
||||
std::cout << "*** Found " << n_errors << " mistakes in this experiment dataset" << std::endl;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(exp.cutpoints_.size(), cuts.size());
|
||||
for (int i = 0; i < exp.cutpoints_.size(); ++i) {
|
||||
EXPECT_NEAR(exp.cutpoints_.at(i), cuts.at(i), margin);
|
||||
}
|
||||
}
|
||||
// std::cout << "* Number of experiments tested: " << num << std::endl;
|
||||
}
|
||||
|
||||
TEST_F(TestBinDisc3U, FitDataSizeTooSmall)
|
||||
{
|
||||
// Test when data size is smaller than n_bins
|
||||
samples_t X = { 1.0, 2.0 }; // Only 2 elements for 3 bins
|
||||
EXPECT_THROW_WITH_MESSAGE(fit(X), std::invalid_argument, "Input data size must be at least equal to n_bins");
|
||||
}
|
||||
|
||||
TEST_F(TestBinDisc3Q, FitDataSizeTooSmall)
|
||||
{
|
||||
// Test when data size is smaller than n_bins
|
||||
samples_t X = { 1.0, 2.0 }; // Only 2 elements for 3 bins
|
||||
EXPECT_THROW_WITH_MESSAGE(fit(X), std::invalid_argument, "Input data size must be at least equal to n_bins");
|
||||
}
|
||||
|
||||
TEST_F(TestBinDisc3U, FitWithYEmptyX)
|
||||
{
|
||||
// Test fit(X, y) with empty X
|
||||
samples_t X = {};
|
||||
labels_t y = { 1, 2, 3 };
|
||||
EXPECT_THROW_WITH_MESSAGE(fit(X, y), std::invalid_argument, "X cannot be empty");
|
||||
}
|
||||
|
||||
TEST_F(TestBinDisc3U, LinspaceInvalidNumPoints)
|
||||
{
|
||||
// Test linspace with num < 2
|
||||
EXPECT_THROW_WITH_MESSAGE(linspace(0.0f, 1.0f, 1), std::invalid_argument, "Number of points must be at least 2 for linspace");
|
||||
}
|
||||
|
||||
TEST_F(TestBinDisc3U, LinspaceNaNValues)
|
||||
{
|
||||
// Test linspace with NaN values
|
||||
float nan_val = std::numeric_limits<float>::quiet_NaN();
|
||||
EXPECT_THROW_WITH_MESSAGE(linspace(nan_val, 1.0f, 3), std::invalid_argument, "Start and end values cannot be NaN");
|
||||
EXPECT_THROW_WITH_MESSAGE(linspace(0.0f, nan_val, 3), std::invalid_argument, "Start and end values cannot be NaN");
|
||||
}
|
||||
|
||||
TEST_F(TestBinDisc3U, LinspaceInfiniteValues)
|
||||
{
|
||||
// Test linspace with infinite values
|
||||
float inf_val = std::numeric_limits<float>::infinity();
|
||||
EXPECT_THROW_WITH_MESSAGE(linspace(inf_val, 1.0f, 3), std::invalid_argument, "Start and end values cannot be infinite");
|
||||
EXPECT_THROW_WITH_MESSAGE(linspace(0.0f, inf_val, 3), std::invalid_argument, "Start and end values cannot be infinite");
|
||||
}
|
||||
|
||||
TEST_F(TestBinDisc3U, PercentileEmptyData)
|
||||
{
|
||||
// Test percentile with empty data
|
||||
samples_t empty_data = {};
|
||||
std::vector<precision_t> percentiles = { 25.0f, 50.0f, 75.0f };
|
||||
EXPECT_THROW_WITH_MESSAGE(percentile(empty_data, percentiles), std::invalid_argument, "Data cannot be empty for percentile calculation");
|
||||
}
|
||||
|
||||
TEST_F(TestBinDisc3U, PercentileEmptyPercentiles)
|
||||
{
|
||||
// Test percentile with empty percentiles
|
||||
samples_t data = { 1.0f, 2.0f, 3.0f };
|
||||
std::vector<precision_t> empty_percentiles = {};
|
||||
EXPECT_THROW_WITH_MESSAGE(percentile(data, empty_percentiles), std::invalid_argument, "Percentiles cannot be empty");
|
||||
}
|
||||
}
|
@@ -1,32 +1,40 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
project(FImdlp)
|
||||
|
||||
# GoogleTest requires at least C++14
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
include(FetchContent)
|
||||
find_package(arff-files REQUIRED)
|
||||
find_package(GTest REQUIRED)
|
||||
find_package(Torch CONFIG REQUIRED)
|
||||
|
||||
include_directories(${GTEST_INCLUDE_DIRS})
|
||||
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||
include_directories(
|
||||
${libtorch_INCLUDE_DIRS_DEBUG}
|
||||
${fimdlp_SOURCE_DIR}/src
|
||||
${arff-files_INCLUDE_DIRS}
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
|
||||
enable_testing()
|
||||
|
||||
add_executable(Metrics_unittest ../Metrics.cpp Metrics_unittest.cpp)
|
||||
add_executable(FImdlp_unittest ../CPPFImdlp.cpp ../ArffFiles.cpp ../Metrics.cpp FImdlp_unittest.cpp)
|
||||
add_executable(Metrics_unittest ${fimdlp_SOURCE_DIR}/src/Metrics.cpp Metrics_unittest.cpp)
|
||||
target_link_libraries(Metrics_unittest GTest::gtest_main)
|
||||
target_link_libraries(FImdlp_unittest GTest::gtest_main)
|
||||
target_compile_options(Metrics_unittest PRIVATE --coverage)
|
||||
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
||||
target_link_options(Metrics_unittest PRIVATE --coverage)
|
||||
|
||||
add_executable(FImdlp_unittest FImdlp_unittest.cpp
|
||||
${fimdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${fimdlp_SOURCE_DIR}/src/Metrics.cpp ${fimdlp_SOURCE_DIR}/src/Discretizer.cpp)
|
||||
target_link_libraries(FImdlp_unittest GTest::gtest_main torch::torch)
|
||||
target_compile_options(FImdlp_unittest PRIVATE --coverage)
|
||||
target_link_options(FImdlp_unittest PRIVATE --coverage)
|
||||
|
||||
add_executable(BinDisc_unittest BinDisc_unittest.cpp ${fimdlp_SOURCE_DIR}/src/BinDisc.cpp ${fimdlp_SOURCE_DIR}/src/Discretizer.cpp)
|
||||
target_link_libraries(BinDisc_unittest GTest::gtest_main torch::torch)
|
||||
target_compile_options(BinDisc_unittest PRIVATE --coverage)
|
||||
target_link_options(BinDisc_unittest PRIVATE --coverage)
|
||||
|
||||
add_executable(Discretizer_unittest Discretizer_unittest.cpp
|
||||
${fimdlp_SOURCE_DIR}/src/BinDisc.cpp ${fimdlp_SOURCE_DIR}/src/CPPFImdlp.cpp ${fimdlp_SOURCE_DIR}/src/Metrics.cpp ${fimdlp_SOURCE_DIR}/src/Discretizer.cpp )
|
||||
target_link_libraries(Discretizer_unittest GTest::gtest_main torch::torch)
|
||||
target_compile_options(Discretizer_unittest PRIVATE --coverage)
|
||||
target_link_options(Discretizer_unittest PRIVATE --coverage)
|
||||
|
||||
include(GoogleTest)
|
||||
|
||||
gtest_discover_tests(Metrics_unittest)
|
||||
gtest_discover_tests(FImdlp_unittest)
|
||||
|
||||
gtest_discover_tests(BinDisc_unittest)
|
||||
gtest_discover_tests(Discretizer_unittest)
|
388
tests/Discretizer_unittest.cpp
Normal file
388
tests/Discretizer_unittest.cpp
Normal file
@@ -0,0 +1,388 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <ArffFiles.hpp>
|
||||
#include "gtest/gtest.h"
|
||||
#include "Discretizer.h"
|
||||
#include "BinDisc.h"
|
||||
#include "CPPFImdlp.h"
|
||||
|
||||
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
|
||||
try { \
|
||||
stmt; \
|
||||
} catch (const etype& ex) { \
|
||||
EXPECT_EQ(whatstring, std::string(ex.what())); \
|
||||
throw; \
|
||||
} \
|
||||
, etype)
|
||||
|
||||
namespace mdlp {
|
||||
const float margin = 1e-4;
|
||||
static std::string set_data_path()
|
||||
{
|
||||
std::string path = "tests/datasets/";
|
||||
std::ifstream file(path + "iris.arff");
|
||||
if (file.is_open()) {
|
||||
file.close();
|
||||
return path;
|
||||
}
|
||||
return "datasets/";
|
||||
}
|
||||
const std::string data_path = set_data_path();
|
||||
const labels_t iris_quantile = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
|
||||
TEST(Discretizer, Version)
|
||||
{
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
auto version = disc->version();
|
||||
delete disc;
|
||||
EXPECT_EQ("2.1.1", version);
|
||||
}
|
||||
TEST(Discretizer, BinIrisUniform)
|
||||
{
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
file.load(data_path + "iris.arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
auto y = labels_t();
|
||||
disc->fit(X[0], y);
|
||||
auto Xt = disc->transform(X[0]);
|
||||
labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
|
||||
delete disc;
|
||||
EXPECT_EQ(expected, Xt);
|
||||
}
|
||||
TEST(Discretizer, BinIrisQuantile)
|
||||
{
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||
file.load(data_path + "iris.arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
auto y = labels_t();
|
||||
disc->fit(X[0], y);
|
||||
auto Xt = disc->transform(X[0]);
|
||||
delete disc;
|
||||
EXPECT_EQ(iris_quantile, Xt);
|
||||
}
|
||||
|
||||
TEST(Discretizer, BinIrisQuantileTorch)
|
||||
{
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||
file.load(data_path + "iris.arff", true);
|
||||
auto X = file.getX();
|
||||
auto y = file.getY();
|
||||
auto X_torch = torch::tensor(X[0], torch::kFloat32);
|
||||
auto yt = torch::tensor(y, torch::kInt32);
|
||||
disc->fit_t(X_torch, yt);
|
||||
torch::Tensor Xt = disc->transform_t(X_torch);
|
||||
delete disc;
|
||||
EXPECT_EQ(iris_quantile.size(), Xt.size(0));
|
||||
for (int i = 0; i < iris_quantile.size(); ++i) {
|
||||
EXPECT_EQ(iris_quantile.at(i), Xt[i].item<int>());
|
||||
}
|
||||
}
|
||||
TEST(Discretizer, BinIrisQuantileTorchFit_transform)
|
||||
{
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE);
|
||||
file.load(data_path + "iris.arff", true);
|
||||
auto X = file.getX();
|
||||
auto y = file.getY();
|
||||
auto X_torch = torch::tensor(X[0], torch::kFloat32);
|
||||
auto yt = torch::tensor(y, torch::kInt32);
|
||||
torch::Tensor Xt = disc->fit_transform_t(X_torch, yt);
|
||||
delete disc;
|
||||
EXPECT_EQ(iris_quantile.size(), Xt.size(0));
|
||||
for (int i = 0; i < iris_quantile.size(); ++i) {
|
||||
EXPECT_EQ(iris_quantile.at(i), Xt[i].item<int>());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Discretizer, FImdlpIris)
|
||||
{
|
||||
auto labelsq = {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
1,
|
||||
3,
|
||||
1,
|
||||
2,
|
||||
0,
|
||||
3,
|
||||
1,
|
||||
0,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
1,
|
||||
3,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
0,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
1,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
3,
|
||||
2,
|
||||
2,
|
||||
};
|
||||
labels_t expected = {
|
||||
5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5,
|
||||
5, 3, 5, 4, 5, 4, 4, 4, 4, 0, 1, 1, 4, 0, 2, 0, 0, 3, 0, 2, 2, 4,
|
||||
3, 0, 0, 0, 4, 1, 0, 1, 2, 3, 1, 3, 2, 0, 0, 0, 0, 0, 3, 5, 4, 0,
|
||||
3, 0, 0, 3, 0, 0, 0, 3, 2, 2, 0, 1, 4, 0, 3, 2, 3, 3, 0, 2, 0, 5,
|
||||
4, 0, 3, 0, 1, 4, 3, 5, 0, 0, 4, 1, 1, 0, 4, 4, 1, 3, 1, 3, 1, 5,
|
||||
1, 1, 0, 3, 5, 4, 3, 4, 4, 4, 0, 4, 4, 3, 0, 3, 5, 3
|
||||
};
|
||||
ArffFiles file;
|
||||
Discretizer* disc = new CPPFImdlp();
|
||||
file.load(data_path + "iris.arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
disc->fit(X[1], y);
|
||||
auto computed = disc->transform(X[1]);
|
||||
delete disc;
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
EXPECT_EQ(computed[i], expected[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Discretizer, TransformEmptyData)
|
||||
{
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
samples_t empty_data = {};
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->transform(empty_data), std::invalid_argument, "Data for transformation cannot be empty");
|
||||
delete disc;
|
||||
}
|
||||
|
||||
TEST(Discretizer, TransformNotFitted)
|
||||
{
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
samples_t data = { 1.0f, 2.0f, 3.0f };
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->transform(data), std::runtime_error, "Discretizer not fitted yet or no valid cut points found");
|
||||
delete disc;
|
||||
}
|
||||
|
||||
TEST(Discretizer, TensorValidationFit)
|
||||
{
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
|
||||
auto X = torch::tensor({ 1.0f, 2.0f, 3.0f }, torch::kFloat32);
|
||||
auto y = torch::tensor({ 1, 2, 3 }, torch::kInt32);
|
||||
|
||||
// Test non-1D tensors
|
||||
auto X_2d = torch::tensor({ {1.0f, 2.0f}, {3.0f, 4.0f} }, torch::kFloat32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_t(X_2d, y), std::invalid_argument, "Only 1D tensors supported");
|
||||
|
||||
auto y_2d = torch::tensor({ {1, 2}, {3, 4} }, torch::kInt32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_t(X, y_2d), std::invalid_argument, "Only 1D tensors supported");
|
||||
|
||||
// Test wrong tensor types
|
||||
auto X_int = torch::tensor({ 1, 2, 3 }, torch::kInt32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_t(X_int, y), std::invalid_argument, "X tensor must be Float32 type");
|
||||
|
||||
auto y_float = torch::tensor({ 1.0f, 2.0f, 3.0f }, torch::kFloat32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_t(X, y_float), std::invalid_argument, "y tensor must be Int32 type");
|
||||
|
||||
// Test mismatched sizes
|
||||
auto y_short = torch::tensor({ 1, 2 }, torch::kInt32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_t(X, y_short), std::invalid_argument, "X and y tensors must have same number of elements");
|
||||
|
||||
// Test empty tensors
|
||||
auto X_empty = torch::tensor({}, torch::kFloat32);
|
||||
auto y_empty = torch::tensor({}, torch::kInt32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_t(X_empty, y_empty), std::invalid_argument, "Tensors cannot be empty");
|
||||
|
||||
delete disc;
|
||||
}
|
||||
|
||||
TEST(Discretizer, TensorValidationTransform)
|
||||
{
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
|
||||
// First fit with valid data
|
||||
auto X_fit = torch::tensor({ 1.0f, 2.0f, 3.0f, 4.0f }, torch::kFloat32);
|
||||
auto y_fit = torch::tensor({ 1, 2, 3, 4 }, torch::kInt32);
|
||||
disc->fit_t(X_fit, y_fit);
|
||||
|
||||
// Test non-1D tensor
|
||||
auto X_2d = torch::tensor({ {1.0f, 2.0f}, {3.0f, 4.0f} }, torch::kFloat32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->transform_t(X_2d), std::invalid_argument, "Only 1D tensors supported");
|
||||
|
||||
// Test wrong tensor type
|
||||
auto X_int = torch::tensor({ 1, 2, 3 }, torch::kInt32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->transform_t(X_int), std::invalid_argument, "X tensor must be Float32 type");
|
||||
|
||||
// Test empty tensor
|
||||
auto X_empty = torch::tensor({}, torch::kFloat32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->transform_t(X_empty), std::invalid_argument, "Tensor cannot be empty");
|
||||
|
||||
delete disc;
|
||||
}
|
||||
|
||||
TEST(Discretizer, TensorValidationFitTransform)
|
||||
{
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
|
||||
auto X = torch::tensor({ 1.0f, 2.0f, 3.0f }, torch::kFloat32);
|
||||
auto y = torch::tensor({ 1, 2, 3 }, torch::kInt32);
|
||||
|
||||
// Test non-1D tensors
|
||||
auto X_2d = torch::tensor({ {1.0f, 2.0f}, {3.0f, 4.0f} }, torch::kFloat32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_transform_t(X_2d, y), std::invalid_argument, "Only 1D tensors supported");
|
||||
|
||||
auto y_2d = torch::tensor({ {1, 2}, {3, 4} }, torch::kInt32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_transform_t(X, y_2d), std::invalid_argument, "Only 1D tensors supported");
|
||||
|
||||
// Test wrong tensor types
|
||||
auto X_int = torch::tensor({ 1, 2, 3 }, torch::kInt32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_transform_t(X_int, y), std::invalid_argument, "X tensor must be Float32 type");
|
||||
|
||||
auto y_float = torch::tensor({ 1.0f, 2.0f, 3.0f }, torch::kFloat32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_transform_t(X, y_float), std::invalid_argument, "y tensor must be Int32 type");
|
||||
|
||||
// Test mismatched sizes
|
||||
auto y_short = torch::tensor({ 1, 2 }, torch::kInt32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_transform_t(X, y_short), std::invalid_argument, "X and y tensors must have same number of elements");
|
||||
|
||||
// Test empty tensors
|
||||
auto X_empty = torch::tensor({}, torch::kFloat32);
|
||||
auto y_empty = torch::tensor({}, torch::kInt32);
|
||||
EXPECT_THROW_WITH_MESSAGE(disc->fit_transform_t(X_empty, y_empty), std::invalid_argument, "Tensors cannot be empty");
|
||||
|
||||
delete disc;
|
||||
}
|
||||
}
|
139
tests/Experiments.hpp
Normal file
139
tests/Experiments.hpp
Normal file
@@ -0,0 +1,139 @@
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#ifndef EXPERIMENTS_HPP
|
||||
#define EXPERIMENTS_HPP
|
||||
#include<sstream>
|
||||
#include<iostream>
|
||||
#include<string>
|
||||
#include<fstream>
|
||||
#include<vector>
|
||||
#include<tuple>
|
||||
#include "typesFImdlp.h"
|
||||
|
||||
template <typename T>
|
||||
void show_vector(const std::vector<T>& data, std::string title)
|
||||
{
|
||||
std::cout << title << ": ";
|
||||
std::string sep = "";
|
||||
for (const auto& d : data) {
|
||||
std::cout << sep << d;
|
||||
sep = ", ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
enum class experiment_t {
|
||||
RANGE,
|
||||
VECTOR
|
||||
};
|
||||
class Experiment {
|
||||
public:
|
||||
Experiment(float from_, float to_, float step_, int n_bins, std::string strategy, std::vector<int> data_discretized, std::vector<mdlp::precision_t> cutpoints) :
|
||||
from_{ from_ }, to_{ to_ }, step_{ step_ }, n_bins_{ n_bins }, strategy_{ strategy }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::RANGE }
|
||||
{
|
||||
validate_strategy();
|
||||
|
||||
}
|
||||
Experiment(std::vector<mdlp::precision_t> dataset, int n_bins, std::string strategy, std::vector<int> data_discretized, std::vector<mdlp::precision_t> cutpoints) :
|
||||
n_bins_{ n_bins }, strategy_{ strategy }, dataset_{ dataset }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::VECTOR }
|
||||
{
|
||||
validate_strategy();
|
||||
}
|
||||
void validate_strategy()
|
||||
{
|
||||
if (strategy_ != "Q" && strategy_ != "U") {
|
||||
throw std::invalid_argument("Invalid strategy " + strategy_);
|
||||
}
|
||||
}
|
||||
float from_;
|
||||
float to_;
|
||||
float step_;
|
||||
int n_bins_;
|
||||
std::string strategy_;
|
||||
std::vector<mdlp::precision_t> dataset_;
|
||||
std::vector<int> discretized_data_;
|
||||
std::vector<mdlp::precision_t> cutpoints_;
|
||||
experiment_t type_;
|
||||
};
|
||||
class Experiments {
|
||||
public:
|
||||
Experiments(const std::string filename) : filename{ filename }
|
||||
{
|
||||
test_file.open(filename);
|
||||
if (!test_file.is_open()) {
|
||||
throw std::runtime_error("File " + filename + " not found");
|
||||
}
|
||||
exp_end = false;
|
||||
}
|
||||
~Experiments()
|
||||
{
|
||||
test_file.close();
|
||||
}
|
||||
bool end() const
|
||||
{
|
||||
return exp_end;
|
||||
}
|
||||
bool is_next()
|
||||
{
|
||||
while (std::getline(test_file, line) && line[0] == '#');
|
||||
if (test_file.eof()) {
|
||||
exp_end = true;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
Experiment next()
|
||||
{
|
||||
return parse_experiment(line);
|
||||
}
|
||||
private:
|
||||
std::tuple<float, float, float, int, std::string> parse_header(const std::string& line)
|
||||
{
|
||||
std::istringstream iss(line);
|
||||
std::string from_, to_, step_, n_bins, strategy;
|
||||
iss >> from_ >> to_ >> step_ >> n_bins >> strategy;
|
||||
return { std::stof(from_), std::stof(to_), std::stof(step_), std::stoi(n_bins), strategy };
|
||||
}
|
||||
template <typename T>
|
||||
std::vector<T> parse_vector(const std::string& line)
|
||||
{
|
||||
std::istringstream iss(line);
|
||||
std::vector<T> data;
|
||||
std::string d;
|
||||
while (iss >> d) {
|
||||
data.push_back(std::is_same<T, float>::value ? std::stof(d) : std::stoi(d));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
Experiment parse_experiment(std::string& line)
|
||||
{
|
||||
// Read experiment lines
|
||||
std::string experiment, data, cuts, strategy;
|
||||
std::getline(test_file, experiment);
|
||||
std::getline(test_file, data);
|
||||
std::getline(test_file, cuts);
|
||||
// split data into variables
|
||||
float from_, to_, step_;
|
||||
int n_bins;
|
||||
std::vector<mdlp::precision_t> dataset;
|
||||
auto data_discretized = parse_vector<int>(data);
|
||||
auto cutpoints = parse_vector<mdlp::precision_t>(cuts);
|
||||
if (line == "RANGE") {
|
||||
tie(from_, to_, step_, n_bins, strategy) = parse_header(experiment);
|
||||
return Experiment{ from_, to_, step_, n_bins, strategy, data_discretized, cutpoints };
|
||||
}
|
||||
strategy = experiment.substr(0, 1);
|
||||
n_bins = std::stoi(experiment.substr(1, 1));
|
||||
data = experiment.substr(3, experiment.size() - 4);
|
||||
dataset = parse_vector<mdlp::precision_t>(data);
|
||||
return Experiment(dataset, n_bins, strategy, data_discretized, cutpoints);
|
||||
}
|
||||
std::ifstream test_file;
|
||||
std::string filename;
|
||||
std::string line;
|
||||
bool exp_end;
|
||||
};
|
||||
#endif
|
@@ -1,186 +1,427 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "../Metrics.h"
|
||||
#include "../CPPFImdlp.h"
|
||||
#include "ArffFiles.h"
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <ArffFiles.hpp>
|
||||
#include "gtest/gtest.h"
|
||||
#include "Metrics.h"
|
||||
#include "CPPFImdlp.h"
|
||||
|
||||
#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) EXPECT_THROW( \
|
||||
try { \
|
||||
stmt; \
|
||||
} catch (const etype& ex) { \
|
||||
EXPECT_EQ(whatstring, std::string(ex.what())); \
|
||||
throw; \
|
||||
} \
|
||||
, etype)
|
||||
|
||||
namespace mdlp {
|
||||
class TestFImdlp: public CPPFImdlp, public testing::Test {
|
||||
class TestFImdlp : public CPPFImdlp, public testing::Test {
|
||||
public:
|
||||
precision_t precision = 0.000001;
|
||||
TestFImdlp(): CPPFImdlp() {}
|
||||
void SetUp()
|
||||
precision_t precision = 0.000001f;
|
||||
|
||||
TestFImdlp() : CPPFImdlp() {}
|
||||
|
||||
string data_path;
|
||||
|
||||
void SetUp() override
|
||||
{
|
||||
X = { 4.7, 4.7, 4.7, 4.7, 4.8, 4.8, 4.8, 4.8, 4.9, 4.95, 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
X = { 4.7f, 4.7f, 4.7f, 4.7f, 4.8f, 4.8f, 4.8f, 4.8f, 4.9f, 4.95f, 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f,
|
||||
6.0f, 5.1f, 5.9f };
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
algorithm = false;
|
||||
fit(X, y);
|
||||
data_path = set_data_path();
|
||||
}
|
||||
void setalgorithm(bool value)
|
||||
|
||||
static string set_data_path()
|
||||
{
|
||||
algorithm = value;
|
||||
string path = "datasets/";
|
||||
ifstream file(path + "iris.arff");
|
||||
if (file.is_open()) {
|
||||
file.close();
|
||||
return path;
|
||||
}
|
||||
return "tests/datasets/";
|
||||
}
|
||||
|
||||
void checkSortedVector()
|
||||
{
|
||||
indices_t testSortedIndices = sortIndices(X, y);
|
||||
precision_t prev = X[testSortedIndices[0]];
|
||||
for (auto i = 0; i < X.size(); ++i) {
|
||||
for (unsigned long i = 0; i < X.size(); ++i) {
|
||||
EXPECT_EQ(testSortedIndices[i], indices[i]);
|
||||
EXPECT_LE(prev, X[testSortedIndices[i]]);
|
||||
prev = X[testSortedIndices[i]];
|
||||
}
|
||||
}
|
||||
void checkCutPoints(cutPoints_t& expected)
|
||||
|
||||
void checkCutPoints(cutPoints_t& computed, cutPoints_t& expected) const
|
||||
{
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(cutPoints.size(), expectedSize);
|
||||
for (auto i = 0; i < cutPoints.size(); i++) {
|
||||
EXPECT_NEAR(cutPoints[i], expected[i], precision);
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
// cout << "(" << computed[i] << ", " << expected[i] << ") ";
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
}
|
||||
template<typename T, typename A>
|
||||
void checkVectors(std::vector<T, A> const& expected, std::vector<T, A> const& computed)
|
||||
|
||||
bool test_result(const samples_t& X_, size_t cut, float midPoint, size_t limit, const string& title)
|
||||
{
|
||||
EXPECT_EQ(expected.size(), computed.size());
|
||||
ASSERT_EQ(expected.size(), computed.size());
|
||||
for (auto i = 0; i < expected.size(); i++) {
|
||||
EXPECT_NEAR(expected[i], computed[i], precision);
|
||||
pair<precision_t, size_t> result;
|
||||
labels_t y_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
X = X_;
|
||||
y = y_;
|
||||
indices = sortIndices(X, y);
|
||||
// cout << "* " << title << endl;
|
||||
result = valueCutPoint(0, cut, 10);
|
||||
EXPECT_NEAR(result.first, midPoint, precision);
|
||||
EXPECT_EQ(result.second, limit);
|
||||
return true;
|
||||
}
|
||||
|
||||
void test_dataset(CPPFImdlp& test, const string& filename, vector<cutPoints_t>& expected,
|
||||
vector<int>& depths) const
|
||||
{
|
||||
ArffFiles file;
|
||||
file.load(data_path + filename + ".arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
test.fit(X[feature], y);
|
||||
EXPECT_EQ(test.get_depth(), depths[feature]);
|
||||
auto computed = test.getCutPoints();
|
||||
// cout << "Feature " << feature << ": ";
|
||||
checkCutPoints(computed, expected[feature]);
|
||||
// cout << endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorEmptyDataset)
|
||||
{
|
||||
X = samples_t();
|
||||
y = labels_t();
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorIncorrectAlgorithm)
|
||||
{
|
||||
algorithm = 2;
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have at least one element");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorDifferentSize)
|
||||
{
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2 };
|
||||
EXPECT_THROW(fit(X, y), std::invalid_argument);
|
||||
EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size: " + std::to_string(X.size()) + " != " + std::to_string(y.size()));
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorMinLength)
|
||||
{
|
||||
EXPECT_THROW_WITH_MESSAGE(CPPFImdlp(2, 10, 0), invalid_argument, "min_length must be greater than 2");
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorMaxDepth)
|
||||
{
|
||||
EXPECT_THROW_WITH_MESSAGE(CPPFImdlp(3, 0, 0), invalid_argument, "max_depth must be greater than 0");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, JoinFit)
|
||||
{
|
||||
samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 };
|
||||
labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 };
|
||||
cutPoints_t expected = { 1.0, 1.5f, 2.5f, 4.0 };
|
||||
fit(X_, y_);
|
||||
auto computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
checkCutPoints(computed, expected);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, FitErrorMinCutPoints)
|
||||
{
|
||||
EXPECT_THROW_WITH_MESSAGE(CPPFImdlp(3, 10, -1), invalid_argument, "proposed_cuts must be non-negative");
|
||||
}
|
||||
TEST_F(TestFImdlp, FitErrorMaxCutPoints)
|
||||
{
|
||||
auto test = CPPFImdlp(3, 1, 8);
|
||||
samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 };
|
||||
labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 };
|
||||
EXPECT_THROW_WITH_MESSAGE(test.fit(X_, y_), invalid_argument, "wrong proposed num_cuts value");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SortIndices)
|
||||
{
|
||||
X = { 5.7, 5.3, 5.2, 5.1, 5.0, 5.6, 5.1, 6.0, 5.1, 5.9 };
|
||||
X = { 5.7f, 5.3f, 5.2f, 5.1f, 5.0f, 5.6f, 5.1f, 6.0f, 5.1f, 5.9f };
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
indices = { 4, 3, 6, 8, 2, 1, 5, 0, 9, 7 };
|
||||
checkSortedVector();
|
||||
X = { 5.77, 5.88, 5.99 };
|
||||
X = { 5.77f, 5.88f, 5.99f };
|
||||
y = { 1, 2, 1 };
|
||||
indices = { 0, 1, 2 };
|
||||
checkSortedVector();
|
||||
X = { 5.33, 5.22, 5.11 };
|
||||
X = { 5.33f, 5.22f, 5.11f };
|
||||
y = { 1, 2, 1 };
|
||||
indices = { 2, 1, 0 };
|
||||
checkSortedVector();
|
||||
X = { 5.33, 5.22, 5.33 };
|
||||
X = { 5.33f, 5.22f, 5.33f };
|
||||
y = { 2, 2, 1 };
|
||||
indices = { 1, 2, 0 };
|
||||
}
|
||||
TEST_F(TestFImdlp, TestArtificialDatasetAlternative)
|
||||
|
||||
TEST_F(TestFImdlp, SortIndicesOutOfBounds)
|
||||
{
|
||||
algorithm = 1;
|
||||
fit(X, y);
|
||||
computeCutPoints(0, 20);
|
||||
cutPoints_t expected = { 5.0500001907348633 };
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
// Test for out of bounds exception in sortIndices
|
||||
samples_t X_long = { 1.0f, 2.0f, 3.0f };
|
||||
labels_t y_short = { 1, 2 };
|
||||
EXPECT_THROW_WITH_MESSAGE(sortIndices(X_long, y_short), std::out_of_range, "Index out of bounds in sort comparison");
|
||||
}
|
||||
|
||||
|
||||
TEST_F(TestFImdlp, TestShortDatasets)
|
||||
{
|
||||
vector<precision_t> computed;
|
||||
X = { 1 };
|
||||
y = { 1 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 2);
|
||||
X = { 1, 3 };
|
||||
y = { 1, 2 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 2);
|
||||
X = { 2, 4 };
|
||||
y = { 1, 2 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 2);
|
||||
X = { 1, 2, 3 };
|
||||
y = { 1, 2, 2 };
|
||||
fit(X, y);
|
||||
computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), 3);
|
||||
EXPECT_NEAR(computed[0], 1, precision);
|
||||
EXPECT_NEAR(computed[1], 1.5, precision);
|
||||
EXPECT_NEAR(computed[2], 3, precision);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, TestArtificialDataset)
|
||||
{
|
||||
algorithm = 0;
|
||||
fit(X, y);
|
||||
computeCutPoints(0, 20);
|
||||
cutPoints_t expected = { 5.0500001907348633 };
|
||||
cutPoints_t expected = { 4.7, 5.05, 6.0 };
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
computed = getCutPoints();
|
||||
int expectedSize = expected.size();
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[i], precision);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, TestIris)
|
||||
{
|
||||
ArffFiles file;
|
||||
string path = "../datasets/";
|
||||
|
||||
file.load(path + "iris.arff", true);
|
||||
int items = file.getSize();
|
||||
vector<samples_t>& X = file.getX();
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.4499998092651367, 6.25 },
|
||||
{ 2.8499999046325684, 3, 3.0499999523162842, 3.3499999046325684 },
|
||||
{ 2.4500000476837158, 4.75, 5.0500001907348633 },
|
||||
{ 0.80000001192092896, 1.4500000476837158, 1.75 }
|
||||
{4.3, 5.45f, 5.75f, 7.9},
|
||||
{2, 2.75f, 2.85f, 2.95f, 3.05f, 3.35f, 4.4},
|
||||
{1, 2.45f, 4.75f, 5.05f, 6.9},
|
||||
{0.1, 0.8f, 1.75f, 2.5}
|
||||
};
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
algorithm = 0;
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
fit(X[feature], y);
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected[feature].size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[feature][i], precision);
|
||||
}
|
||||
}
|
||||
vector<int> depths = { 3, 5, 4, 3 };
|
||||
auto test = CPPFImdlp();
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
TEST_F(TestFImdlp, TestIrisAlternative)
|
||||
{
|
||||
ArffFiles file;
|
||||
string path = "../datasets/";
|
||||
|
||||
file.load(path + "iris.arff", true);
|
||||
int items = file.getSize();
|
||||
vector<samples_t>& X = file.getX();
|
||||
vector<cutPoints_t> expected = {
|
||||
{ 5.4499998092651367, 5.75 },
|
||||
{ 2.8499999046325684, 3.3499999046325684 },
|
||||
{ 2.4500000476837158, 4.75 },
|
||||
{ 0.80000001192092896, 1.75 }
|
||||
};
|
||||
labels_t& y = file.getY();
|
||||
auto attributes = file.getAttributes();
|
||||
algorithm = 1;
|
||||
for (auto feature = 0; feature < attributes.size(); feature++) {
|
||||
fit(X[feature], y);
|
||||
vector<precision_t> computed = getCutPoints();
|
||||
EXPECT_EQ(computed.size(), expected[feature].size());
|
||||
for (auto i = 0; i < computed.size(); i++) {
|
||||
EXPECT_NEAR(computed[i], expected[feature][i], precision);
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsGCase)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
algorithm = 0;
|
||||
expected = { 1.5 };
|
||||
samples_t X_ = { 0, 1, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2 };
|
||||
expected = { 0, 1.5, 2 };
|
||||
samples_t X_ = { 0, 1, 2, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2, 2 };
|
||||
fit(X_, y_);
|
||||
checkCutPoints(expected);
|
||||
auto computed = getCutPoints();
|
||||
checkCutPoints(computed, expected);
|
||||
}
|
||||
TEST_F(TestFImdlp, ComputeCutPointsAlternativeGCase)
|
||||
|
||||
TEST_F(TestFImdlp, ValueCutPoint)
|
||||
{
|
||||
cutPoints_t expected;
|
||||
expected = { 1.5 };
|
||||
algorithm = true;
|
||||
samples_t X_ = { 0, 1, 2, 2 };
|
||||
labels_t y_ = { 1, 1, 1, 2 };
|
||||
fit(X_, y_);
|
||||
checkCutPoints(expected);
|
||||
// Case titles as stated in the doc
|
||||
samples_t X1a{ 3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X1a, 6, 7.3f / 2, 6, "1a");
|
||||
samples_t X2a = { 3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X2a, 6, 7.1f / 2, 4, "2a");
|
||||
samples_t X2b = { 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X2b, 6, 7.5f / 2, 7, "2b");
|
||||
samples_t X3a = { 3.f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X3a, 4, 7.1f / 2, 4, "3a");
|
||||
samples_t X3b = { 3.1f, 3.2f, 3.3f, 3.4f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f };
|
||||
test_result(X3b, 4, 7.1f / 2, 4, "3b");
|
||||
samples_t X4a = { 3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.9f, 4.0f };
|
||||
test_result(X4a, 4, 6.9f / 2, 2, "4a");
|
||||
samples_t X4b = { 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.8f, 3.9f, 4.0f };
|
||||
test_result(X4b, 4, 7.5f / 2, 7, "4b");
|
||||
samples_t X4c = { 3.1f, 3.2f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f, 3.7f };
|
||||
test_result(X4c, 4, 6.9f / 2, 2, "4c");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MaxDepth)
|
||||
{
|
||||
// Set max_depth to 1
|
||||
auto test = CPPFImdlp(3, 1, 0);
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 7.9},
|
||||
{2, 3.35f, 4.4},
|
||||
{1, 2.45f, 6.9},
|
||||
{0.1, 0.8f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 1, 1, 1, 1 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MinLength)
|
||||
{
|
||||
auto test = CPPFImdlp(75, 100, 0);
|
||||
// Set min_length to 75
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 5.75f, 7.9},
|
||||
{2, 2.85f, 3.35f, 4.4},
|
||||
{1, 2.45f, 4.75f, 6.9},
|
||||
{0.1, 0.8f, 1.75f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 3, 2, 2, 2 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MinLengthMaxDepth)
|
||||
{
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 0);
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 5.75f, 7.9},
|
||||
{2, 2.85f, 3.35f, 4.4},
|
||||
{1, 2.45f, 4.75f, 6.9},
|
||||
{0.1, 0.8f, 1.75f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 2, 2, 2, 2 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MaxCutPointsInteger)
|
||||
{
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 1);
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 7.9},
|
||||
{2, 2.85f, 4.4},
|
||||
{1, 2.45f, 6.9},
|
||||
{0.1, 0.8f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 2, 2, 2, 2 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, MaxCutPointsFloat)
|
||||
{
|
||||
// Set min_length to 75
|
||||
auto test = CPPFImdlp(75, 2, 0.2f);
|
||||
vector<cutPoints_t> expected = {
|
||||
{4.3, 5.45f, 5.75f, 7.9},
|
||||
{2, 2.85f, 3.35f, 4.4},
|
||||
{1, 2.45f, 4.75f, 6.9},
|
||||
{0.1, 0.8f, 1.75f, 2.5}
|
||||
};
|
||||
vector<int> depths = { 2, 2, 2, 2 };
|
||||
test_dataset(test, "iris", expected, depths);
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, ProposedCuts)
|
||||
{
|
||||
vector<pair<float, size_t>> proposed_list = { {0.1f, 2},
|
||||
{0.5f, 10},
|
||||
{0.07f, 1},
|
||||
{1.0f, 1},
|
||||
{2.0f, 2} };
|
||||
size_t expected;
|
||||
size_t computed;
|
||||
for (auto proposed_item : proposed_list) {
|
||||
tie(proposed_cuts, expected) = proposed_item;
|
||||
computed = compute_max_num_cut_points();
|
||||
ASSERT_EQ(expected, computed);
|
||||
}
|
||||
}
|
||||
TEST_F(TestFImdlp, TransformTest)
|
||||
{
|
||||
labels_t expected = {
|
||||
5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5,
|
||||
5, 3, 5, 4, 5, 4, 4, 4, 4, 0, 1, 1, 4, 0, 2, 0, 0, 3, 0, 2, 2, 4,
|
||||
3, 0, 0, 0, 4, 1, 0, 1, 2, 3, 1, 3, 2, 0, 0, 0, 0, 0, 3, 5, 4, 0,
|
||||
3, 0, 0, 3, 0, 0, 0, 3, 2, 2, 0, 1, 4, 0, 3, 2, 3, 3, 0, 2, 0, 5,
|
||||
4, 0, 3, 0, 1, 4, 3, 5, 0, 0, 4, 1, 1, 0, 4, 4, 1, 3, 1, 3, 1, 5,
|
||||
1, 1, 0, 3, 5, 4, 3, 4, 4, 4, 0, 4, 4, 3, 0, 3, 5, 3
|
||||
};
|
||||
ArffFiles file;
|
||||
file.load(data_path + "iris.arff", true);
|
||||
vector<samples_t>& X = file.getX();
|
||||
labels_t& y = file.getY();
|
||||
fit(X[1], y);
|
||||
auto computed = transform(X[1]);
|
||||
EXPECT_EQ(computed.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed.size(); i++) {
|
||||
EXPECT_EQ(computed[i], expected[i]);
|
||||
}
|
||||
auto computed_ft = fit_transform(X[1], y);
|
||||
EXPECT_EQ(computed_ft.size(), expected.size());
|
||||
for (unsigned long i = 0; i < computed_ft.size(); i++) {
|
||||
EXPECT_EQ(computed_ft[i], expected[i]);
|
||||
}
|
||||
}
|
||||
TEST_F(TestFImdlp, SafeXAccessIndexOutOfBounds)
|
||||
{
|
||||
// Test safe_X_access with index out of bounds for indices array
|
||||
X = { 1.0f, 2.0f, 3.0f };
|
||||
y = { 1, 2, 3 };
|
||||
indices = { 0, 1 }; // shorter than expected
|
||||
|
||||
// This should trigger the first exception in safe_X_access (idx >= indices.size())
|
||||
EXPECT_THROW_WITH_MESSAGE(safe_X_access(2), std::out_of_range, "Index out of bounds for indices array");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SafeXAccessXOutOfBounds)
|
||||
{
|
||||
// Test safe_X_access with real_idx out of bounds for X array
|
||||
X = { 1.0f, 2.0f }; // shorter array
|
||||
y = { 1, 2, 3 };
|
||||
indices = { 0, 1, 5 }; // indices[2] = 5 is out of bounds for X
|
||||
|
||||
// This should trigger the second exception in safe_X_access (real_idx >= X.size())
|
||||
EXPECT_THROW_WITH_MESSAGE(safe_X_access(2), std::out_of_range, "Index out of bounds for X array");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SafeYAccessIndexOutOfBounds)
|
||||
{
|
||||
// Test safe_y_access with index out of bounds for indices array
|
||||
X = { 1.0f, 2.0f, 3.0f };
|
||||
y = { 1, 2, 3 };
|
||||
indices = { 0, 1 }; // shorter than expected
|
||||
|
||||
// This should trigger the first exception in safe_y_access (idx >= indices.size())
|
||||
EXPECT_THROW_WITH_MESSAGE(safe_y_access(2), std::out_of_range, "Index out of bounds for indices array");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SafeYAccessYOutOfBounds)
|
||||
{
|
||||
// Test safe_y_access with real_idx out of bounds for y array
|
||||
X = { 1.0f, 2.0f, 3.0f };
|
||||
y = { 1, 2 }; // shorter array
|
||||
indices = { 0, 1, 5 }; // indices[2] = 5 is out of bounds for y
|
||||
|
||||
// This should trigger the second exception in safe_y_access (real_idx >= y.size())
|
||||
EXPECT_THROW_WITH_MESSAGE(safe_y_access(2), std::out_of_range, "Index out of bounds for y array");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SafeSubtractUnderflow)
|
||||
{
|
||||
// Test safe_subtract with underflow condition (b > a)
|
||||
EXPECT_THROW_WITH_MESSAGE(safe_subtract(3, 5), std::underflow_error, "Subtraction would cause underflow");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@@ -1,23 +1,27 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "../Metrics.h"
|
||||
// ****************************************************************
|
||||
// SPDX - FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX - FileType: SOURCE
|
||||
// SPDX - License - Identifier: MIT
|
||||
// ****************************************************************
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "Metrics.h"
|
||||
|
||||
namespace mdlp {
|
||||
class TestMetrics: public Metrics, public testing::Test {
|
||||
class TestMetrics : public Metrics, public testing::Test {
|
||||
public:
|
||||
labels_t y;
|
||||
samples_t X;
|
||||
indices_t indices;
|
||||
precision_t precision = 0.000001;
|
||||
labels_t y_ = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
indices_t indices_ = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
precision_t precision = 1e-6;
|
||||
|
||||
TestMetrics(): Metrics(y, indices) {}
|
||||
void SetUp()
|
||||
TestMetrics() : Metrics(y_, indices_) {};
|
||||
|
||||
void SetUp() override
|
||||
{
|
||||
y = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 };
|
||||
indices = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
setData(y, indices);
|
||||
setData(y_, indices_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestMetrics, NumClasses)
|
||||
{
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
@@ -25,19 +29,31 @@ namespace mdlp {
|
||||
EXPECT_EQ(2, computeNumClasses(0, 10));
|
||||
EXPECT_EQ(2, computeNumClasses(8, 10));
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, Entropy)
|
||||
{
|
||||
EXPECT_EQ(1, entropy(0, 10));
|
||||
EXPECT_EQ(0, entropy(0, 5));
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.468996, entropy(0, 10), precision);
|
||||
ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, EntropyDouble)
|
||||
{
|
||||
y = { 0, 0, 1, 2, 3 };
|
||||
samples_t expected_entropies = { 0.0, 0.0, 0.91829583, 1.5, 1.4575424759098898 };
|
||||
for (auto idx = 0; idx < y.size(); ++idx) {
|
||||
ASSERT_NEAR(expected_entropies[idx], entropy(0, idx + 1), precision);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, InformationGain)
|
||||
{
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision);
|
||||
ASSERT_NEAR(1, informationGain(0, 5, 10), precision); // For cache
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.108032, informationGain(0, 5, 10), precision);
|
||||
ASSERT_NEAR(0.108032f, informationGain(0, 5, 10), precision);
|
||||
}
|
||||
}
|
||||
|
@@ -1,4 +0,0 @@
|
||||
rm -fr lcoverage/*
|
||||
lcov --capture --directory ./ --output-file lcoverage/main_coverage.info
|
||||
genhtml lcoverage/main_coverage.info --output-directory lcoverage
|
||||
open lcoverage/index.html
|
863
tests/datasets/diabetes.arff
Executable file
863
tests/datasets/diabetes.arff
Executable file
@@ -0,0 +1,863 @@
|
||||
% 1. Title: Pima Indians Diabetes Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Original owners: National Institute of Diabetes and Digestive and
|
||||
% Kidney Diseases
|
||||
% (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu)
|
||||
% Research Center, RMI Group Leader
|
||||
% Applied Physics Laboratory
|
||||
% The Johns Hopkins University
|
||||
% Johns Hopkins Road
|
||||
% Laurel, MD 20707
|
||||
% (301) 953-6231
|
||||
% (c) Date received: 9 May 1990
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% 1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \&
|
||||
% Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast
|
||||
% the onset of diabetes mellitus. In {\it Proceedings of the Symposium
|
||||
% on Computer Applications and Medical Care} (pp. 261--265). IEEE
|
||||
% Computer Society Press.
|
||||
%
|
||||
% The diagnostic, binary-valued variable investigated is whether the
|
||||
% patient shows signs of diabetes according to World Health Organization
|
||||
% criteria (i.e., if the 2 hour post-load plasma glucose was at least
|
||||
% 200 mg/dl at any survey examination or if found during routine medical
|
||||
% care). The population lives near Phoenix, Arizona, USA.
|
||||
%
|
||||
% Results: Their ADAP algorithm makes a real-valued prediction between
|
||||
% 0 and 1. This was transformed into a binary decision using a cutoff of
|
||||
% 0.448. Using 576 training instances, the sensitivity and specificity
|
||||
% of their algorithm was 76% on the remaining 192 instances.
|
||||
%
|
||||
% 4. Relevant Information:
|
||||
% Several constraints were placed on the selection of these instances from
|
||||
% a larger database. In particular, all patients here are females at
|
||||
% least 21 years old of Pima Indian heritage. ADAP is an adaptive learning
|
||||
% routine that generates and executes digital analogs of perceptron-like
|
||||
% devices. It is a unique algorithm; see the paper for details.
|
||||
%
|
||||
% 5. Number of Instances: 768
|
||||
%
|
||||
% 6. Number of Attributes: 8 plus class
|
||||
%
|
||||
% 7. For Each Attribute: (all numeric-valued)
|
||||
% 1. Number of times pregnant
|
||||
% 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
|
||||
% 3. Diastolic blood pressure (mm Hg)
|
||||
% 4. Triceps skin fold thickness (mm)
|
||||
% 5. 2-Hour serum insulin (mu U/ml)
|
||||
% 6. Body mass index (weight in kg/(height in m)^2)
|
||||
% 7. Diabetes pedigree function
|
||||
% 8. Age (years)
|
||||
% 9. Class variable (0 or 1)
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% 9. Class Distribution: (class value 1 is interpreted as "tested positive for
|
||||
% diabetes")
|
||||
%
|
||||
% Class Value Number of instances
|
||||
% 0 500
|
||||
% 1 268
|
||||
%
|
||||
% 10. Brief statistical analysis:
|
||||
%
|
||||
% Attribute number: Mean: Standard Deviation:
|
||||
% 1. 3.8 3.4
|
||||
% 2. 120.9 32.0
|
||||
% 3. 69.1 19.4
|
||||
% 4. 20.5 16.0
|
||||
% 5. 79.8 115.2
|
||||
% 6. 32.0 7.9
|
||||
% 7. 0.5 0.3
|
||||
% 8. 33.2 11.8
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
% Relabeled values in attribute 'class'
|
||||
% From: 0 To: tested_negative
|
||||
% From: 1 To: tested_positive
|
||||
%
|
||||
@relation pima_diabetes
|
||||
@attribute 'preg' real
|
||||
@attribute 'plas' real
|
||||
@attribute 'pres' real
|
||||
@attribute 'skin' real
|
||||
@attribute 'insu' real
|
||||
@attribute 'mass' real
|
||||
@attribute 'pedi' real
|
||||
@attribute 'age' real
|
||||
@attribute 'class' { tested_negative, tested_positive}
|
||||
@data
|
||||
6,148,72,35,0,33.6,0.627,50,tested_positive
|
||||
1,85,66,29,0,26.6,0.351,31,tested_negative
|
||||
8,183,64,0,0,23.3,0.672,32,tested_positive
|
||||
1,89,66,23,94,28.1,0.167,21,tested_negative
|
||||
0,137,40,35,168,43.1,2.288,33,tested_positive
|
||||
5,116,74,0,0,25.6,0.201,30,tested_negative
|
||||
3,78,50,32,88,31,0.248,26,tested_positive
|
||||
10,115,0,0,0,35.3,0.134,29,tested_negative
|
||||
2,197,70,45,543,30.5,0.158,53,tested_positive
|
||||
8,125,96,0,0,0,0.232,54,tested_positive
|
||||
4,110,92,0,0,37.6,0.191,30,tested_negative
|
||||
10,168,74,0,0,38,0.537,34,tested_positive
|
||||
10,139,80,0,0,27.1,1.441,57,tested_negative
|
||||
1,189,60,23,846,30.1,0.398,59,tested_positive
|
||||
5,166,72,19,175,25.8,0.587,51,tested_positive
|
||||
7,100,0,0,0,30,0.484,32,tested_positive
|
||||
0,118,84,47,230,45.8,0.551,31,tested_positive
|
||||
7,107,74,0,0,29.6,0.254,31,tested_positive
|
||||
1,103,30,38,83,43.3,0.183,33,tested_negative
|
||||
1,115,70,30,96,34.6,0.529,32,tested_positive
|
||||
3,126,88,41,235,39.3,0.704,27,tested_negative
|
||||
8,99,84,0,0,35.4,0.388,50,tested_negative
|
||||
7,196,90,0,0,39.8,0.451,41,tested_positive
|
||||
9,119,80,35,0,29,0.263,29,tested_positive
|
||||
11,143,94,33,146,36.6,0.254,51,tested_positive
|
||||
10,125,70,26,115,31.1,0.205,41,tested_positive
|
||||
7,147,76,0,0,39.4,0.257,43,tested_positive
|
||||
1,97,66,15,140,23.2,0.487,22,tested_negative
|
||||
13,145,82,19,110,22.2,0.245,57,tested_negative
|
||||
5,117,92,0,0,34.1,0.337,38,tested_negative
|
||||
5,109,75,26,0,36,0.546,60,tested_negative
|
||||
3,158,76,36,245,31.6,0.851,28,tested_positive
|
||||
3,88,58,11,54,24.8,0.267,22,tested_negative
|
||||
6,92,92,0,0,19.9,0.188,28,tested_negative
|
||||
10,122,78,31,0,27.6,0.512,45,tested_negative
|
||||
4,103,60,33,192,24,0.966,33,tested_negative
|
||||
11,138,76,0,0,33.2,0.42,35,tested_negative
|
||||
9,102,76,37,0,32.9,0.665,46,tested_positive
|
||||
2,90,68,42,0,38.2,0.503,27,tested_positive
|
||||
4,111,72,47,207,37.1,1.39,56,tested_positive
|
||||
3,180,64,25,70,34,0.271,26,tested_negative
|
||||
7,133,84,0,0,40.2,0.696,37,tested_negative
|
||||
7,106,92,18,0,22.7,0.235,48,tested_negative
|
||||
9,171,110,24,240,45.4,0.721,54,tested_positive
|
||||
7,159,64,0,0,27.4,0.294,40,tested_negative
|
||||
0,180,66,39,0,42,1.893,25,tested_positive
|
||||
1,146,56,0,0,29.7,0.564,29,tested_negative
|
||||
2,71,70,27,0,28,0.586,22,tested_negative
|
||||
7,103,66,32,0,39.1,0.344,31,tested_positive
|
||||
7,105,0,0,0,0,0.305,24,tested_negative
|
||||
1,103,80,11,82,19.4,0.491,22,tested_negative
|
||||
1,101,50,15,36,24.2,0.526,26,tested_negative
|
||||
5,88,66,21,23,24.4,0.342,30,tested_negative
|
||||
8,176,90,34,300,33.7,0.467,58,tested_positive
|
||||
7,150,66,42,342,34.7,0.718,42,tested_negative
|
||||
1,73,50,10,0,23,0.248,21,tested_negative
|
||||
7,187,68,39,304,37.7,0.254,41,tested_positive
|
||||
0,100,88,60,110,46.8,0.962,31,tested_negative
|
||||
0,146,82,0,0,40.5,1.781,44,tested_negative
|
||||
0,105,64,41,142,41.5,0.173,22,tested_negative
|
||||
2,84,0,0,0,0,0.304,21,tested_negative
|
||||
8,133,72,0,0,32.9,0.27,39,tested_positive
|
||||
5,44,62,0,0,25,0.587,36,tested_negative
|
||||
2,141,58,34,128,25.4,0.699,24,tested_negative
|
||||
7,114,66,0,0,32.8,0.258,42,tested_positive
|
||||
5,99,74,27,0,29,0.203,32,tested_negative
|
||||
0,109,88,30,0,32.5,0.855,38,tested_positive
|
||||
2,109,92,0,0,42.7,0.845,54,tested_negative
|
||||
1,95,66,13,38,19.6,0.334,25,tested_negative
|
||||
4,146,85,27,100,28.9,0.189,27,tested_negative
|
||||
2,100,66,20,90,32.9,0.867,28,tested_positive
|
||||
5,139,64,35,140,28.6,0.411,26,tested_negative
|
||||
13,126,90,0,0,43.4,0.583,42,tested_positive
|
||||
4,129,86,20,270,35.1,0.231,23,tested_negative
|
||||
1,79,75,30,0,32,0.396,22,tested_negative
|
||||
1,0,48,20,0,24.7,0.14,22,tested_negative
|
||||
7,62,78,0,0,32.6,0.391,41,tested_negative
|
||||
5,95,72,33,0,37.7,0.37,27,tested_negative
|
||||
0,131,0,0,0,43.2,0.27,26,tested_positive
|
||||
2,112,66,22,0,25,0.307,24,tested_negative
|
||||
3,113,44,13,0,22.4,0.14,22,tested_negative
|
||||
2,74,0,0,0,0,0.102,22,tested_negative
|
||||
7,83,78,26,71,29.3,0.767,36,tested_negative
|
||||
0,101,65,28,0,24.6,0.237,22,tested_negative
|
||||
5,137,108,0,0,48.8,0.227,37,tested_positive
|
||||
2,110,74,29,125,32.4,0.698,27,tested_negative
|
||||
13,106,72,54,0,36.6,0.178,45,tested_negative
|
||||
2,100,68,25,71,38.5,0.324,26,tested_negative
|
||||
15,136,70,32,110,37.1,0.153,43,tested_positive
|
||||
1,107,68,19,0,26.5,0.165,24,tested_negative
|
||||
1,80,55,0,0,19.1,0.258,21,tested_negative
|
||||
4,123,80,15,176,32,0.443,34,tested_negative
|
||||
7,81,78,40,48,46.7,0.261,42,tested_negative
|
||||
4,134,72,0,0,23.8,0.277,60,tested_positive
|
||||
2,142,82,18,64,24.7,0.761,21,tested_negative
|
||||
6,144,72,27,228,33.9,0.255,40,tested_negative
|
||||
2,92,62,28,0,31.6,0.13,24,tested_negative
|
||||
1,71,48,18,76,20.4,0.323,22,tested_negative
|
||||
6,93,50,30,64,28.7,0.356,23,tested_negative
|
||||
1,122,90,51,220,49.7,0.325,31,tested_positive
|
||||
1,163,72,0,0,39,1.222,33,tested_positive
|
||||
1,151,60,0,0,26.1,0.179,22,tested_negative
|
||||
0,125,96,0,0,22.5,0.262,21,tested_negative
|
||||
1,81,72,18,40,26.6,0.283,24,tested_negative
|
||||
2,85,65,0,0,39.6,0.93,27,tested_negative
|
||||
1,126,56,29,152,28.7,0.801,21,tested_negative
|
||||
1,96,122,0,0,22.4,0.207,27,tested_negative
|
||||
4,144,58,28,140,29.5,0.287,37,tested_negative
|
||||
3,83,58,31,18,34.3,0.336,25,tested_negative
|
||||
0,95,85,25,36,37.4,0.247,24,tested_positive
|
||||
3,171,72,33,135,33.3,0.199,24,tested_positive
|
||||
8,155,62,26,495,34,0.543,46,tested_positive
|
||||
1,89,76,34,37,31.2,0.192,23,tested_negative
|
||||
4,76,62,0,0,34,0.391,25,tested_negative
|
||||
7,160,54,32,175,30.5,0.588,39,tested_positive
|
||||
4,146,92,0,0,31.2,0.539,61,tested_positive
|
||||
5,124,74,0,0,34,0.22,38,tested_positive
|
||||
5,78,48,0,0,33.7,0.654,25,tested_negative
|
||||
4,97,60,23,0,28.2,0.443,22,tested_negative
|
||||
4,99,76,15,51,23.2,0.223,21,tested_negative
|
||||
0,162,76,56,100,53.2,0.759,25,tested_positive
|
||||
6,111,64,39,0,34.2,0.26,24,tested_negative
|
||||
2,107,74,30,100,33.6,0.404,23,tested_negative
|
||||
5,132,80,0,0,26.8,0.186,69,tested_negative
|
||||
0,113,76,0,0,33.3,0.278,23,tested_positive
|
||||
1,88,30,42,99,55,0.496,26,tested_positive
|
||||
3,120,70,30,135,42.9,0.452,30,tested_negative
|
||||
1,118,58,36,94,33.3,0.261,23,tested_negative
|
||||
1,117,88,24,145,34.5,0.403,40,tested_positive
|
||||
0,105,84,0,0,27.9,0.741,62,tested_positive
|
||||
4,173,70,14,168,29.7,0.361,33,tested_positive
|
||||
9,122,56,0,0,33.3,1.114,33,tested_positive
|
||||
3,170,64,37,225,34.5,0.356,30,tested_positive
|
||||
8,84,74,31,0,38.3,0.457,39,tested_negative
|
||||
2,96,68,13,49,21.1,0.647,26,tested_negative
|
||||
2,125,60,20,140,33.8,0.088,31,tested_negative
|
||||
0,100,70,26,50,30.8,0.597,21,tested_negative
|
||||
0,93,60,25,92,28.7,0.532,22,tested_negative
|
||||
0,129,80,0,0,31.2,0.703,29,tested_negative
|
||||
5,105,72,29,325,36.9,0.159,28,tested_negative
|
||||
3,128,78,0,0,21.1,0.268,55,tested_negative
|
||||
5,106,82,30,0,39.5,0.286,38,tested_negative
|
||||
2,108,52,26,63,32.5,0.318,22,tested_negative
|
||||
10,108,66,0,0,32.4,0.272,42,tested_positive
|
||||
4,154,62,31,284,32.8,0.237,23,tested_negative
|
||||
0,102,75,23,0,0,0.572,21,tested_negative
|
||||
9,57,80,37,0,32.8,0.096,41,tested_negative
|
||||
2,106,64,35,119,30.5,1.4,34,tested_negative
|
||||
5,147,78,0,0,33.7,0.218,65,tested_negative
|
||||
2,90,70,17,0,27.3,0.085,22,tested_negative
|
||||
1,136,74,50,204,37.4,0.399,24,tested_negative
|
||||
4,114,65,0,0,21.9,0.432,37,tested_negative
|
||||
9,156,86,28,155,34.3,1.189,42,tested_positive
|
||||
1,153,82,42,485,40.6,0.687,23,tested_negative
|
||||
8,188,78,0,0,47.9,0.137,43,tested_positive
|
||||
7,152,88,44,0,50,0.337,36,tested_positive
|
||||
2,99,52,15,94,24.6,0.637,21,tested_negative
|
||||
1,109,56,21,135,25.2,0.833,23,tested_negative
|
||||
2,88,74,19,53,29,0.229,22,tested_negative
|
||||
17,163,72,41,114,40.9,0.817,47,tested_positive
|
||||
4,151,90,38,0,29.7,0.294,36,tested_negative
|
||||
7,102,74,40,105,37.2,0.204,45,tested_negative
|
||||
0,114,80,34,285,44.2,0.167,27,tested_negative
|
||||
2,100,64,23,0,29.7,0.368,21,tested_negative
|
||||
0,131,88,0,0,31.6,0.743,32,tested_positive
|
||||
6,104,74,18,156,29.9,0.722,41,tested_positive
|
||||
3,148,66,25,0,32.5,0.256,22,tested_negative
|
||||
4,120,68,0,0,29.6,0.709,34,tested_negative
|
||||
4,110,66,0,0,31.9,0.471,29,tested_negative
|
||||
3,111,90,12,78,28.4,0.495,29,tested_negative
|
||||
6,102,82,0,0,30.8,0.18,36,tested_positive
|
||||
6,134,70,23,130,35.4,0.542,29,tested_positive
|
||||
2,87,0,23,0,28.9,0.773,25,tested_negative
|
||||
1,79,60,42,48,43.5,0.678,23,tested_negative
|
||||
2,75,64,24,55,29.7,0.37,33,tested_negative
|
||||
8,179,72,42,130,32.7,0.719,36,tested_positive
|
||||
6,85,78,0,0,31.2,0.382,42,tested_negative
|
||||
0,129,110,46,130,67.1,0.319,26,tested_positive
|
||||
5,143,78,0,0,45,0.19,47,tested_negative
|
||||
5,130,82,0,0,39.1,0.956,37,tested_positive
|
||||
6,87,80,0,0,23.2,0.084,32,tested_negative
|
||||
0,119,64,18,92,34.9,0.725,23,tested_negative
|
||||
1,0,74,20,23,27.7,0.299,21,tested_negative
|
||||
5,73,60,0,0,26.8,0.268,27,tested_negative
|
||||
4,141,74,0,0,27.6,0.244,40,tested_negative
|
||||
7,194,68,28,0,35.9,0.745,41,tested_positive
|
||||
8,181,68,36,495,30.1,0.615,60,tested_positive
|
||||
1,128,98,41,58,32,1.321,33,tested_positive
|
||||
8,109,76,39,114,27.9,0.64,31,tested_positive
|
||||
5,139,80,35,160,31.6,0.361,25,tested_positive
|
||||
3,111,62,0,0,22.6,0.142,21,tested_negative
|
||||
9,123,70,44,94,33.1,0.374,40,tested_negative
|
||||
7,159,66,0,0,30.4,0.383,36,tested_positive
|
||||
11,135,0,0,0,52.3,0.578,40,tested_positive
|
||||
8,85,55,20,0,24.4,0.136,42,tested_negative
|
||||
5,158,84,41,210,39.4,0.395,29,tested_positive
|
||||
1,105,58,0,0,24.3,0.187,21,tested_negative
|
||||
3,107,62,13,48,22.9,0.678,23,tested_positive
|
||||
4,109,64,44,99,34.8,0.905,26,tested_positive
|
||||
4,148,60,27,318,30.9,0.15,29,tested_positive
|
||||
0,113,80,16,0,31,0.874,21,tested_negative
|
||||
1,138,82,0,0,40.1,0.236,28,tested_negative
|
||||
0,108,68,20,0,27.3,0.787,32,tested_negative
|
||||
2,99,70,16,44,20.4,0.235,27,tested_negative
|
||||
6,103,72,32,190,37.7,0.324,55,tested_negative
|
||||
5,111,72,28,0,23.9,0.407,27,tested_negative
|
||||
8,196,76,29,280,37.5,0.605,57,tested_positive
|
||||
5,162,104,0,0,37.7,0.151,52,tested_positive
|
||||
1,96,64,27,87,33.2,0.289,21,tested_negative
|
||||
7,184,84,33,0,35.5,0.355,41,tested_positive
|
||||
2,81,60,22,0,27.7,0.29,25,tested_negative
|
||||
0,147,85,54,0,42.8,0.375,24,tested_negative
|
||||
7,179,95,31,0,34.2,0.164,60,tested_negative
|
||||
0,140,65,26,130,42.6,0.431,24,tested_positive
|
||||
9,112,82,32,175,34.2,0.26,36,tested_positive
|
||||
12,151,70,40,271,41.8,0.742,38,tested_positive
|
||||
5,109,62,41,129,35.8,0.514,25,tested_positive
|
||||
6,125,68,30,120,30,0.464,32,tested_negative
|
||||
5,85,74,22,0,29,1.224,32,tested_positive
|
||||
5,112,66,0,0,37.8,0.261,41,tested_positive
|
||||
0,177,60,29,478,34.6,1.072,21,tested_positive
|
||||
2,158,90,0,0,31.6,0.805,66,tested_positive
|
||||
7,119,0,0,0,25.2,0.209,37,tested_negative
|
||||
7,142,60,33,190,28.8,0.687,61,tested_negative
|
||||
1,100,66,15,56,23.6,0.666,26,tested_negative
|
||||
1,87,78,27,32,34.6,0.101,22,tested_negative
|
||||
0,101,76,0,0,35.7,0.198,26,tested_negative
|
||||
3,162,52,38,0,37.2,0.652,24,tested_positive
|
||||
4,197,70,39,744,36.7,2.329,31,tested_negative
|
||||
0,117,80,31,53,45.2,0.089,24,tested_negative
|
||||
4,142,86,0,0,44,0.645,22,tested_positive
|
||||
6,134,80,37,370,46.2,0.238,46,tested_positive
|
||||
1,79,80,25,37,25.4,0.583,22,tested_negative
|
||||
4,122,68,0,0,35,0.394,29,tested_negative
|
||||
3,74,68,28,45,29.7,0.293,23,tested_negative
|
||||
4,171,72,0,0,43.6,0.479,26,tested_positive
|
||||
7,181,84,21,192,35.9,0.586,51,tested_positive
|
||||
0,179,90,27,0,44.1,0.686,23,tested_positive
|
||||
9,164,84,21,0,30.8,0.831,32,tested_positive
|
||||
0,104,76,0,0,18.4,0.582,27,tested_negative
|
||||
1,91,64,24,0,29.2,0.192,21,tested_negative
|
||||
4,91,70,32,88,33.1,0.446,22,tested_negative
|
||||
3,139,54,0,0,25.6,0.402,22,tested_positive
|
||||
6,119,50,22,176,27.1,1.318,33,tested_positive
|
||||
2,146,76,35,194,38.2,0.329,29,tested_negative
|
||||
9,184,85,15,0,30,1.213,49,tested_positive
|
||||
10,122,68,0,0,31.2,0.258,41,tested_negative
|
||||
0,165,90,33,680,52.3,0.427,23,tested_negative
|
||||
9,124,70,33,402,35.4,0.282,34,tested_negative
|
||||
1,111,86,19,0,30.1,0.143,23,tested_negative
|
||||
9,106,52,0,0,31.2,0.38,42,tested_negative
|
||||
2,129,84,0,0,28,0.284,27,tested_negative
|
||||
2,90,80,14,55,24.4,0.249,24,tested_negative
|
||||
0,86,68,32,0,35.8,0.238,25,tested_negative
|
||||
12,92,62,7,258,27.6,0.926,44,tested_positive
|
||||
1,113,64,35,0,33.6,0.543,21,tested_positive
|
||||
3,111,56,39,0,30.1,0.557,30,tested_negative
|
||||
2,114,68,22,0,28.7,0.092,25,tested_negative
|
||||
1,193,50,16,375,25.9,0.655,24,tested_negative
|
||||
11,155,76,28,150,33.3,1.353,51,tested_positive
|
||||
3,191,68,15,130,30.9,0.299,34,tested_negative
|
||||
3,141,0,0,0,30,0.761,27,tested_positive
|
||||
4,95,70,32,0,32.1,0.612,24,tested_negative
|
||||
3,142,80,15,0,32.4,0.2,63,tested_negative
|
||||
4,123,62,0,0,32,0.226,35,tested_positive
|
||||
5,96,74,18,67,33.6,0.997,43,tested_negative
|
||||
0,138,0,0,0,36.3,0.933,25,tested_positive
|
||||
2,128,64,42,0,40,1.101,24,tested_negative
|
||||
0,102,52,0,0,25.1,0.078,21,tested_negative
|
||||
2,146,0,0,0,27.5,0.24,28,tested_positive
|
||||
10,101,86,37,0,45.6,1.136,38,tested_positive
|
||||
2,108,62,32,56,25.2,0.128,21,tested_negative
|
||||
3,122,78,0,0,23,0.254,40,tested_negative
|
||||
1,71,78,50,45,33.2,0.422,21,tested_negative
|
||||
13,106,70,0,0,34.2,0.251,52,tested_negative
|
||||
2,100,70,52,57,40.5,0.677,25,tested_negative
|
||||
7,106,60,24,0,26.5,0.296,29,tested_positive
|
||||
0,104,64,23,116,27.8,0.454,23,tested_negative
|
||||
5,114,74,0,0,24.9,0.744,57,tested_negative
|
||||
2,108,62,10,278,25.3,0.881,22,tested_negative
|
||||
0,146,70,0,0,37.9,0.334,28,tested_positive
|
||||
10,129,76,28,122,35.9,0.28,39,tested_negative
|
||||
7,133,88,15,155,32.4,0.262,37,tested_negative
|
||||
7,161,86,0,0,30.4,0.165,47,tested_positive
|
||||
2,108,80,0,0,27,0.259,52,tested_positive
|
||||
7,136,74,26,135,26,0.647,51,tested_negative
|
||||
5,155,84,44,545,38.7,0.619,34,tested_negative
|
||||
1,119,86,39,220,45.6,0.808,29,tested_positive
|
||||
4,96,56,17,49,20.8,0.34,26,tested_negative
|
||||
5,108,72,43,75,36.1,0.263,33,tested_negative
|
||||
0,78,88,29,40,36.9,0.434,21,tested_negative
|
||||
0,107,62,30,74,36.6,0.757,25,tested_positive
|
||||
2,128,78,37,182,43.3,1.224,31,tested_positive
|
||||
1,128,48,45,194,40.5,0.613,24,tested_positive
|
||||
0,161,50,0,0,21.9,0.254,65,tested_negative
|
||||
6,151,62,31,120,35.5,0.692,28,tested_negative
|
||||
2,146,70,38,360,28,0.337,29,tested_positive
|
||||
0,126,84,29,215,30.7,0.52,24,tested_negative
|
||||
14,100,78,25,184,36.6,0.412,46,tested_positive
|
||||
8,112,72,0,0,23.6,0.84,58,tested_negative
|
||||
0,167,0,0,0,32.3,0.839,30,tested_positive
|
||||
2,144,58,33,135,31.6,0.422,25,tested_positive
|
||||
5,77,82,41,42,35.8,0.156,35,tested_negative
|
||||
5,115,98,0,0,52.9,0.209,28,tested_positive
|
||||
3,150,76,0,0,21,0.207,37,tested_negative
|
||||
2,120,76,37,105,39.7,0.215,29,tested_negative
|
||||
10,161,68,23,132,25.5,0.326,47,tested_positive
|
||||
0,137,68,14,148,24.8,0.143,21,tested_negative
|
||||
0,128,68,19,180,30.5,1.391,25,tested_positive
|
||||
2,124,68,28,205,32.9,0.875,30,tested_positive
|
||||
6,80,66,30,0,26.2,0.313,41,tested_negative
|
||||
0,106,70,37,148,39.4,0.605,22,tested_negative
|
||||
2,155,74,17,96,26.6,0.433,27,tested_positive
|
||||
3,113,50,10,85,29.5,0.626,25,tested_negative
|
||||
7,109,80,31,0,35.9,1.127,43,tested_positive
|
||||
2,112,68,22,94,34.1,0.315,26,tested_negative
|
||||
3,99,80,11,64,19.3,0.284,30,tested_negative
|
||||
3,182,74,0,0,30.5,0.345,29,tested_positive
|
||||
3,115,66,39,140,38.1,0.15,28,tested_negative
|
||||
6,194,78,0,0,23.5,0.129,59,tested_positive
|
||||
4,129,60,12,231,27.5,0.527,31,tested_negative
|
||||
3,112,74,30,0,31.6,0.197,25,tested_positive
|
||||
0,124,70,20,0,27.4,0.254,36,tested_positive
|
||||
13,152,90,33,29,26.8,0.731,43,tested_positive
|
||||
2,112,75,32,0,35.7,0.148,21,tested_negative
|
||||
1,157,72,21,168,25.6,0.123,24,tested_negative
|
||||
1,122,64,32,156,35.1,0.692,30,tested_positive
|
||||
10,179,70,0,0,35.1,0.2,37,tested_negative
|
||||
2,102,86,36,120,45.5,0.127,23,tested_positive
|
||||
6,105,70,32,68,30.8,0.122,37,tested_negative
|
||||
8,118,72,19,0,23.1,1.476,46,tested_negative
|
||||
2,87,58,16,52,32.7,0.166,25,tested_negative
|
||||
1,180,0,0,0,43.3,0.282,41,tested_positive
|
||||
12,106,80,0,0,23.6,0.137,44,tested_negative
|
||||
1,95,60,18,58,23.9,0.26,22,tested_negative
|
||||
0,165,76,43,255,47.9,0.259,26,tested_negative
|
||||
0,117,0,0,0,33.8,0.932,44,tested_negative
|
||||
5,115,76,0,0,31.2,0.343,44,tested_positive
|
||||
9,152,78,34,171,34.2,0.893,33,tested_positive
|
||||
7,178,84,0,0,39.9,0.331,41,tested_positive
|
||||
1,130,70,13,105,25.9,0.472,22,tested_negative
|
||||
1,95,74,21,73,25.9,0.673,36,tested_negative
|
||||
1,0,68,35,0,32,0.389,22,tested_negative
|
||||
5,122,86,0,0,34.7,0.29,33,tested_negative
|
||||
8,95,72,0,0,36.8,0.485,57,tested_negative
|
||||
8,126,88,36,108,38.5,0.349,49,tested_negative
|
||||
1,139,46,19,83,28.7,0.654,22,tested_negative
|
||||
3,116,0,0,0,23.5,0.187,23,tested_negative
|
||||
3,99,62,19,74,21.8,0.279,26,tested_negative
|
||||
5,0,80,32,0,41,0.346,37,tested_positive
|
||||
4,92,80,0,0,42.2,0.237,29,tested_negative
|
||||
4,137,84,0,0,31.2,0.252,30,tested_negative
|
||||
3,61,82,28,0,34.4,0.243,46,tested_negative
|
||||
1,90,62,12,43,27.2,0.58,24,tested_negative
|
||||
3,90,78,0,0,42.7,0.559,21,tested_negative
|
||||
9,165,88,0,0,30.4,0.302,49,tested_positive
|
||||
1,125,50,40,167,33.3,0.962,28,tested_positive
|
||||
13,129,0,30,0,39.9,0.569,44,tested_positive
|
||||
12,88,74,40,54,35.3,0.378,48,tested_negative
|
||||
1,196,76,36,249,36.5,0.875,29,tested_positive
|
||||
5,189,64,33,325,31.2,0.583,29,tested_positive
|
||||
5,158,70,0,0,29.8,0.207,63,tested_negative
|
||||
5,103,108,37,0,39.2,0.305,65,tested_negative
|
||||
4,146,78,0,0,38.5,0.52,67,tested_positive
|
||||
4,147,74,25,293,34.9,0.385,30,tested_negative
|
||||
5,99,54,28,83,34,0.499,30,tested_negative
|
||||
6,124,72,0,0,27.6,0.368,29,tested_positive
|
||||
0,101,64,17,0,21,0.252,21,tested_negative
|
||||
3,81,86,16,66,27.5,0.306,22,tested_negative
|
||||
1,133,102,28,140,32.8,0.234,45,tested_positive
|
||||
3,173,82,48,465,38.4,2.137,25,tested_positive
|
||||
0,118,64,23,89,0,1.731,21,tested_negative
|
||||
0,84,64,22,66,35.8,0.545,21,tested_negative
|
||||
2,105,58,40,94,34.9,0.225,25,tested_negative
|
||||
2,122,52,43,158,36.2,0.816,28,tested_negative
|
||||
12,140,82,43,325,39.2,0.528,58,tested_positive
|
||||
0,98,82,15,84,25.2,0.299,22,tested_negative
|
||||
1,87,60,37,75,37.2,0.509,22,tested_negative
|
||||
4,156,75,0,0,48.3,0.238,32,tested_positive
|
||||
0,93,100,39,72,43.4,1.021,35,tested_negative
|
||||
1,107,72,30,82,30.8,0.821,24,tested_negative
|
||||
0,105,68,22,0,20,0.236,22,tested_negative
|
||||
1,109,60,8,182,25.4,0.947,21,tested_negative
|
||||
1,90,62,18,59,25.1,1.268,25,tested_negative
|
||||
1,125,70,24,110,24.3,0.221,25,tested_negative
|
||||
1,119,54,13,50,22.3,0.205,24,tested_negative
|
||||
5,116,74,29,0,32.3,0.66,35,tested_positive
|
||||
8,105,100,36,0,43.3,0.239,45,tested_positive
|
||||
5,144,82,26,285,32,0.452,58,tested_positive
|
||||
3,100,68,23,81,31.6,0.949,28,tested_negative
|
||||
1,100,66,29,196,32,0.444,42,tested_negative
|
||||
5,166,76,0,0,45.7,0.34,27,tested_positive
|
||||
1,131,64,14,415,23.7,0.389,21,tested_negative
|
||||
4,116,72,12,87,22.1,0.463,37,tested_negative
|
||||
4,158,78,0,0,32.9,0.803,31,tested_positive
|
||||
2,127,58,24,275,27.7,1.6,25,tested_negative
|
||||
3,96,56,34,115,24.7,0.944,39,tested_negative
|
||||
0,131,66,40,0,34.3,0.196,22,tested_positive
|
||||
3,82,70,0,0,21.1,0.389,25,tested_negative
|
||||
3,193,70,31,0,34.9,0.241,25,tested_positive
|
||||
4,95,64,0,0,32,0.161,31,tested_positive
|
||||
6,137,61,0,0,24.2,0.151,55,tested_negative
|
||||
5,136,84,41,88,35,0.286,35,tested_positive
|
||||
9,72,78,25,0,31.6,0.28,38,tested_negative
|
||||
5,168,64,0,0,32.9,0.135,41,tested_positive
|
||||
2,123,48,32,165,42.1,0.52,26,tested_negative
|
||||
4,115,72,0,0,28.9,0.376,46,tested_positive
|
||||
0,101,62,0,0,21.9,0.336,25,tested_negative
|
||||
8,197,74,0,0,25.9,1.191,39,tested_positive
|
||||
1,172,68,49,579,42.4,0.702,28,tested_positive
|
||||
6,102,90,39,0,35.7,0.674,28,tested_negative
|
||||
1,112,72,30,176,34.4,0.528,25,tested_negative
|
||||
1,143,84,23,310,42.4,1.076,22,tested_negative
|
||||
1,143,74,22,61,26.2,0.256,21,tested_negative
|
||||
0,138,60,35,167,34.6,0.534,21,tested_positive
|
||||
3,173,84,33,474,35.7,0.258,22,tested_positive
|
||||
1,97,68,21,0,27.2,1.095,22,tested_negative
|
||||
4,144,82,32,0,38.5,0.554,37,tested_positive
|
||||
1,83,68,0,0,18.2,0.624,27,tested_negative
|
||||
3,129,64,29,115,26.4,0.219,28,tested_positive
|
||||
1,119,88,41,170,45.3,0.507,26,tested_negative
|
||||
2,94,68,18,76,26,0.561,21,tested_negative
|
||||
0,102,64,46,78,40.6,0.496,21,tested_negative
|
||||
2,115,64,22,0,30.8,0.421,21,tested_negative
|
||||
8,151,78,32,210,42.9,0.516,36,tested_positive
|
||||
4,184,78,39,277,37,0.264,31,tested_positive
|
||||
0,94,0,0,0,0,0.256,25,tested_negative
|
||||
1,181,64,30,180,34.1,0.328,38,tested_positive
|
||||
0,135,94,46,145,40.6,0.284,26,tested_negative
|
||||
1,95,82,25,180,35,0.233,43,tested_positive
|
||||
2,99,0,0,0,22.2,0.108,23,tested_negative
|
||||
3,89,74,16,85,30.4,0.551,38,tested_negative
|
||||
1,80,74,11,60,30,0.527,22,tested_negative
|
||||
2,139,75,0,0,25.6,0.167,29,tested_negative
|
||||
1,90,68,8,0,24.5,1.138,36,tested_negative
|
||||
0,141,0,0,0,42.4,0.205,29,tested_positive
|
||||
12,140,85,33,0,37.4,0.244,41,tested_negative
|
||||
5,147,75,0,0,29.9,0.434,28,tested_negative
|
||||
1,97,70,15,0,18.2,0.147,21,tested_negative
|
||||
6,107,88,0,0,36.8,0.727,31,tested_negative
|
||||
0,189,104,25,0,34.3,0.435,41,tested_positive
|
||||
2,83,66,23,50,32.2,0.497,22,tested_negative
|
||||
4,117,64,27,120,33.2,0.23,24,tested_negative
|
||||
8,108,70,0,0,30.5,0.955,33,tested_positive
|
||||
4,117,62,12,0,29.7,0.38,30,tested_positive
|
||||
0,180,78,63,14,59.4,2.42,25,tested_positive
|
||||
1,100,72,12,70,25.3,0.658,28,tested_negative
|
||||
0,95,80,45,92,36.5,0.33,26,tested_negative
|
||||
0,104,64,37,64,33.6,0.51,22,tested_positive
|
||||
0,120,74,18,63,30.5,0.285,26,tested_negative
|
||||
1,82,64,13,95,21.2,0.415,23,tested_negative
|
||||
2,134,70,0,0,28.9,0.542,23,tested_positive
|
||||
0,91,68,32,210,39.9,0.381,25,tested_negative
|
||||
2,119,0,0,0,19.6,0.832,72,tested_negative
|
||||
2,100,54,28,105,37.8,0.498,24,tested_negative
|
||||
14,175,62,30,0,33.6,0.212,38,tested_positive
|
||||
1,135,54,0,0,26.7,0.687,62,tested_negative
|
||||
5,86,68,28,71,30.2,0.364,24,tested_negative
|
||||
10,148,84,48,237,37.6,1.001,51,tested_positive
|
||||
9,134,74,33,60,25.9,0.46,81,tested_negative
|
||||
9,120,72,22,56,20.8,0.733,48,tested_negative
|
||||
1,71,62,0,0,21.8,0.416,26,tested_negative
|
||||
8,74,70,40,49,35.3,0.705,39,tested_negative
|
||||
5,88,78,30,0,27.6,0.258,37,tested_negative
|
||||
10,115,98,0,0,24,1.022,34,tested_negative
|
||||
0,124,56,13,105,21.8,0.452,21,tested_negative
|
||||
0,74,52,10,36,27.8,0.269,22,tested_negative
|
||||
0,97,64,36,100,36.8,0.6,25,tested_negative
|
||||
8,120,0,0,0,30,0.183,38,tested_positive
|
||||
6,154,78,41,140,46.1,0.571,27,tested_negative
|
||||
1,144,82,40,0,41.3,0.607,28,tested_negative
|
||||
0,137,70,38,0,33.2,0.17,22,tested_negative
|
||||
0,119,66,27,0,38.8,0.259,22,tested_negative
|
||||
7,136,90,0,0,29.9,0.21,50,tested_negative
|
||||
4,114,64,0,0,28.9,0.126,24,tested_negative
|
||||
0,137,84,27,0,27.3,0.231,59,tested_negative
|
||||
2,105,80,45,191,33.7,0.711,29,tested_positive
|
||||
7,114,76,17,110,23.8,0.466,31,tested_negative
|
||||
8,126,74,38,75,25.9,0.162,39,tested_negative
|
||||
4,132,86,31,0,28,0.419,63,tested_negative
|
||||
3,158,70,30,328,35.5,0.344,35,tested_positive
|
||||
0,123,88,37,0,35.2,0.197,29,tested_negative
|
||||
4,85,58,22,49,27.8,0.306,28,tested_negative
|
||||
0,84,82,31,125,38.2,0.233,23,tested_negative
|
||||
0,145,0,0,0,44.2,0.63,31,tested_positive
|
||||
0,135,68,42,250,42.3,0.365,24,tested_positive
|
||||
1,139,62,41,480,40.7,0.536,21,tested_negative
|
||||
0,173,78,32,265,46.5,1.159,58,tested_negative
|
||||
4,99,72,17,0,25.6,0.294,28,tested_negative
|
||||
8,194,80,0,0,26.1,0.551,67,tested_negative
|
||||
2,83,65,28,66,36.8,0.629,24,tested_negative
|
||||
2,89,90,30,0,33.5,0.292,42,tested_negative
|
||||
4,99,68,38,0,32.8,0.145,33,tested_negative
|
||||
4,125,70,18,122,28.9,1.144,45,tested_positive
|
||||
3,80,0,0,0,0,0.174,22,tested_negative
|
||||
6,166,74,0,0,26.6,0.304,66,tested_negative
|
||||
5,110,68,0,0,26,0.292,30,tested_negative
|
||||
2,81,72,15,76,30.1,0.547,25,tested_negative
|
||||
7,195,70,33,145,25.1,0.163,55,tested_positive
|
||||
6,154,74,32,193,29.3,0.839,39,tested_negative
|
||||
2,117,90,19,71,25.2,0.313,21,tested_negative
|
||||
3,84,72,32,0,37.2,0.267,28,tested_negative
|
||||
6,0,68,41,0,39,0.727,41,tested_positive
|
||||
7,94,64,25,79,33.3,0.738,41,tested_negative
|
||||
3,96,78,39,0,37.3,0.238,40,tested_negative
|
||||
10,75,82,0,0,33.3,0.263,38,tested_negative
|
||||
0,180,90,26,90,36.5,0.314,35,tested_positive
|
||||
1,130,60,23,170,28.6,0.692,21,tested_negative
|
||||
2,84,50,23,76,30.4,0.968,21,tested_negative
|
||||
8,120,78,0,0,25,0.409,64,tested_negative
|
||||
12,84,72,31,0,29.7,0.297,46,tested_positive
|
||||
0,139,62,17,210,22.1,0.207,21,tested_negative
|
||||
9,91,68,0,0,24.2,0.2,58,tested_negative
|
||||
2,91,62,0,0,27.3,0.525,22,tested_negative
|
||||
3,99,54,19,86,25.6,0.154,24,tested_negative
|
||||
3,163,70,18,105,31.6,0.268,28,tested_positive
|
||||
9,145,88,34,165,30.3,0.771,53,tested_positive
|
||||
7,125,86,0,0,37.6,0.304,51,tested_negative
|
||||
13,76,60,0,0,32.8,0.18,41,tested_negative
|
||||
6,129,90,7,326,19.6,0.582,60,tested_negative
|
||||
2,68,70,32,66,25,0.187,25,tested_negative
|
||||
3,124,80,33,130,33.2,0.305,26,tested_negative
|
||||
6,114,0,0,0,0,0.189,26,tested_negative
|
||||
9,130,70,0,0,34.2,0.652,45,tested_positive
|
||||
3,125,58,0,0,31.6,0.151,24,tested_negative
|
||||
3,87,60,18,0,21.8,0.444,21,tested_negative
|
||||
1,97,64,19,82,18.2,0.299,21,tested_negative
|
||||
3,116,74,15,105,26.3,0.107,24,tested_negative
|
||||
0,117,66,31,188,30.8,0.493,22,tested_negative
|
||||
0,111,65,0,0,24.6,0.66,31,tested_negative
|
||||
2,122,60,18,106,29.8,0.717,22,tested_negative
|
||||
0,107,76,0,0,45.3,0.686,24,tested_negative
|
||||
1,86,66,52,65,41.3,0.917,29,tested_negative
|
||||
6,91,0,0,0,29.8,0.501,31,tested_negative
|
||||
1,77,56,30,56,33.3,1.251,24,tested_negative
|
||||
4,132,0,0,0,32.9,0.302,23,tested_positive
|
||||
0,105,90,0,0,29.6,0.197,46,tested_negative
|
||||
0,57,60,0,0,21.7,0.735,67,tested_negative
|
||||
0,127,80,37,210,36.3,0.804,23,tested_negative
|
||||
3,129,92,49,155,36.4,0.968,32,tested_positive
|
||||
8,100,74,40,215,39.4,0.661,43,tested_positive
|
||||
3,128,72,25,190,32.4,0.549,27,tested_positive
|
||||
10,90,85,32,0,34.9,0.825,56,tested_positive
|
||||
4,84,90,23,56,39.5,0.159,25,tested_negative
|
||||
1,88,78,29,76,32,0.365,29,tested_negative
|
||||
8,186,90,35,225,34.5,0.423,37,tested_positive
|
||||
5,187,76,27,207,43.6,1.034,53,tested_positive
|
||||
4,131,68,21,166,33.1,0.16,28,tested_negative
|
||||
1,164,82,43,67,32.8,0.341,50,tested_negative
|
||||
4,189,110,31,0,28.5,0.68,37,tested_negative
|
||||
1,116,70,28,0,27.4,0.204,21,tested_negative
|
||||
3,84,68,30,106,31.9,0.591,25,tested_negative
|
||||
6,114,88,0,0,27.8,0.247,66,tested_negative
|
||||
1,88,62,24,44,29.9,0.422,23,tested_negative
|
||||
1,84,64,23,115,36.9,0.471,28,tested_negative
|
||||
7,124,70,33,215,25.5,0.161,37,tested_negative
|
||||
1,97,70,40,0,38.1,0.218,30,tested_negative
|
||||
8,110,76,0,0,27.8,0.237,58,tested_negative
|
||||
11,103,68,40,0,46.2,0.126,42,tested_negative
|
||||
11,85,74,0,0,30.1,0.3,35,tested_negative
|
||||
6,125,76,0,0,33.8,0.121,54,tested_positive
|
||||
0,198,66,32,274,41.3,0.502,28,tested_positive
|
||||
1,87,68,34,77,37.6,0.401,24,tested_negative
|
||||
6,99,60,19,54,26.9,0.497,32,tested_negative
|
||||
0,91,80,0,0,32.4,0.601,27,tested_negative
|
||||
2,95,54,14,88,26.1,0.748,22,tested_negative
|
||||
1,99,72,30,18,38.6,0.412,21,tested_negative
|
||||
6,92,62,32,126,32,0.085,46,tested_negative
|
||||
4,154,72,29,126,31.3,0.338,37,tested_negative
|
||||
0,121,66,30,165,34.3,0.203,33,tested_positive
|
||||
3,78,70,0,0,32.5,0.27,39,tested_negative
|
||||
2,130,96,0,0,22.6,0.268,21,tested_negative
|
||||
3,111,58,31,44,29.5,0.43,22,tested_negative
|
||||
2,98,60,17,120,34.7,0.198,22,tested_negative
|
||||
1,143,86,30,330,30.1,0.892,23,tested_negative
|
||||
1,119,44,47,63,35.5,0.28,25,tested_negative
|
||||
6,108,44,20,130,24,0.813,35,tested_negative
|
||||
2,118,80,0,0,42.9,0.693,21,tested_positive
|
||||
10,133,68,0,0,27,0.245,36,tested_negative
|
||||
2,197,70,99,0,34.7,0.575,62,tested_positive
|
||||
0,151,90,46,0,42.1,0.371,21,tested_positive
|
||||
6,109,60,27,0,25,0.206,27,tested_negative
|
||||
12,121,78,17,0,26.5,0.259,62,tested_negative
|
||||
8,100,76,0,0,38.7,0.19,42,tested_negative
|
||||
8,124,76,24,600,28.7,0.687,52,tested_positive
|
||||
1,93,56,11,0,22.5,0.417,22,tested_negative
|
||||
8,143,66,0,0,34.9,0.129,41,tested_positive
|
||||
6,103,66,0,0,24.3,0.249,29,tested_negative
|
||||
3,176,86,27,156,33.3,1.154,52,tested_positive
|
||||
0,73,0,0,0,21.1,0.342,25,tested_negative
|
||||
11,111,84,40,0,46.8,0.925,45,tested_positive
|
||||
2,112,78,50,140,39.4,0.175,24,tested_negative
|
||||
3,132,80,0,0,34.4,0.402,44,tested_positive
|
||||
2,82,52,22,115,28.5,1.699,25,tested_negative
|
||||
6,123,72,45,230,33.6,0.733,34,tested_negative
|
||||
0,188,82,14,185,32,0.682,22,tested_positive
|
||||
0,67,76,0,0,45.3,0.194,46,tested_negative
|
||||
1,89,24,19,25,27.8,0.559,21,tested_negative
|
||||
1,173,74,0,0,36.8,0.088,38,tested_positive
|
||||
1,109,38,18,120,23.1,0.407,26,tested_negative
|
||||
1,108,88,19,0,27.1,0.4,24,tested_negative
|
||||
6,96,0,0,0,23.7,0.19,28,tested_negative
|
||||
1,124,74,36,0,27.8,0.1,30,tested_negative
|
||||
7,150,78,29,126,35.2,0.692,54,tested_positive
|
||||
4,183,0,0,0,28.4,0.212,36,tested_positive
|
||||
1,124,60,32,0,35.8,0.514,21,tested_negative
|
||||
1,181,78,42,293,40,1.258,22,tested_positive
|
||||
1,92,62,25,41,19.5,0.482,25,tested_negative
|
||||
0,152,82,39,272,41.5,0.27,27,tested_negative
|
||||
1,111,62,13,182,24,0.138,23,tested_negative
|
||||
3,106,54,21,158,30.9,0.292,24,tested_negative
|
||||
3,174,58,22,194,32.9,0.593,36,tested_positive
|
||||
7,168,88,42,321,38.2,0.787,40,tested_positive
|
||||
6,105,80,28,0,32.5,0.878,26,tested_negative
|
||||
11,138,74,26,144,36.1,0.557,50,tested_positive
|
||||
3,106,72,0,0,25.8,0.207,27,tested_negative
|
||||
6,117,96,0,0,28.7,0.157,30,tested_negative
|
||||
2,68,62,13,15,20.1,0.257,23,tested_negative
|
||||
9,112,82,24,0,28.2,1.282,50,tested_positive
|
||||
0,119,0,0,0,32.4,0.141,24,tested_positive
|
||||
2,112,86,42,160,38.4,0.246,28,tested_negative
|
||||
2,92,76,20,0,24.2,1.698,28,tested_negative
|
||||
6,183,94,0,0,40.8,1.461,45,tested_negative
|
||||
0,94,70,27,115,43.5,0.347,21,tested_negative
|
||||
2,108,64,0,0,30.8,0.158,21,tested_negative
|
||||
4,90,88,47,54,37.7,0.362,29,tested_negative
|
||||
0,125,68,0,0,24.7,0.206,21,tested_negative
|
||||
0,132,78,0,0,32.4,0.393,21,tested_negative
|
||||
5,128,80,0,0,34.6,0.144,45,tested_negative
|
||||
4,94,65,22,0,24.7,0.148,21,tested_negative
|
||||
7,114,64,0,0,27.4,0.732,34,tested_positive
|
||||
0,102,78,40,90,34.5,0.238,24,tested_negative
|
||||
2,111,60,0,0,26.2,0.343,23,tested_negative
|
||||
1,128,82,17,183,27.5,0.115,22,tested_negative
|
||||
10,92,62,0,0,25.9,0.167,31,tested_negative
|
||||
13,104,72,0,0,31.2,0.465,38,tested_positive
|
||||
5,104,74,0,0,28.8,0.153,48,tested_negative
|
||||
2,94,76,18,66,31.6,0.649,23,tested_negative
|
||||
7,97,76,32,91,40.9,0.871,32,tested_positive
|
||||
1,100,74,12,46,19.5,0.149,28,tested_negative
|
||||
0,102,86,17,105,29.3,0.695,27,tested_negative
|
||||
4,128,70,0,0,34.3,0.303,24,tested_negative
|
||||
6,147,80,0,0,29.5,0.178,50,tested_positive
|
||||
4,90,0,0,0,28,0.61,31,tested_negative
|
||||
3,103,72,30,152,27.6,0.73,27,tested_negative
|
||||
2,157,74,35,440,39.4,0.134,30,tested_negative
|
||||
1,167,74,17,144,23.4,0.447,33,tested_positive
|
||||
0,179,50,36,159,37.8,0.455,22,tested_positive
|
||||
11,136,84,35,130,28.3,0.26,42,tested_positive
|
||||
0,107,60,25,0,26.4,0.133,23,tested_negative
|
||||
1,91,54,25,100,25.2,0.234,23,tested_negative
|
||||
1,117,60,23,106,33.8,0.466,27,tested_negative
|
||||
5,123,74,40,77,34.1,0.269,28,tested_negative
|
||||
2,120,54,0,0,26.8,0.455,27,tested_negative
|
||||
1,106,70,28,135,34.2,0.142,22,tested_negative
|
||||
2,155,52,27,540,38.7,0.24,25,tested_positive
|
||||
2,101,58,35,90,21.8,0.155,22,tested_negative
|
||||
1,120,80,48,200,38.9,1.162,41,tested_negative
|
||||
11,127,106,0,0,39,0.19,51,tested_negative
|
||||
3,80,82,31,70,34.2,1.292,27,tested_positive
|
||||
10,162,84,0,0,27.7,0.182,54,tested_negative
|
||||
1,199,76,43,0,42.9,1.394,22,tested_positive
|
||||
8,167,106,46,231,37.6,0.165,43,tested_positive
|
||||
9,145,80,46,130,37.9,0.637,40,tested_positive
|
||||
6,115,60,39,0,33.7,0.245,40,tested_positive
|
||||
1,112,80,45,132,34.8,0.217,24,tested_negative
|
||||
4,145,82,18,0,32.5,0.235,70,tested_positive
|
||||
10,111,70,27,0,27.5,0.141,40,tested_positive
|
||||
6,98,58,33,190,34,0.43,43,tested_negative
|
||||
9,154,78,30,100,30.9,0.164,45,tested_negative
|
||||
6,165,68,26,168,33.6,0.631,49,tested_negative
|
||||
1,99,58,10,0,25.4,0.551,21,tested_negative
|
||||
10,68,106,23,49,35.5,0.285,47,tested_negative
|
||||
3,123,100,35,240,57.3,0.88,22,tested_negative
|
||||
8,91,82,0,0,35.6,0.587,68,tested_negative
|
||||
6,195,70,0,0,30.9,0.328,31,tested_positive
|
||||
9,156,86,0,0,24.8,0.23,53,tested_positive
|
||||
0,93,60,0,0,35.3,0.263,25,tested_negative
|
||||
3,121,52,0,0,36,0.127,25,tested_positive
|
||||
2,101,58,17,265,24.2,0.614,23,tested_negative
|
||||
2,56,56,28,45,24.2,0.332,22,tested_negative
|
||||
0,162,76,36,0,49.6,0.364,26,tested_positive
|
||||
0,95,64,39,105,44.6,0.366,22,tested_negative
|
||||
4,125,80,0,0,32.3,0.536,27,tested_positive
|
||||
5,136,82,0,0,0,0.64,69,tested_negative
|
||||
2,129,74,26,205,33.2,0.591,25,tested_negative
|
||||
3,130,64,0,0,23.1,0.314,22,tested_negative
|
||||
1,107,50,19,0,28.3,0.181,29,tested_negative
|
||||
1,140,74,26,180,24.1,0.828,23,tested_negative
|
||||
1,144,82,46,180,46.1,0.335,46,tested_positive
|
||||
8,107,80,0,0,24.6,0.856,34,tested_negative
|
||||
13,158,114,0,0,42.3,0.257,44,tested_positive
|
||||
2,121,70,32,95,39.1,0.886,23,tested_negative
|
||||
7,129,68,49,125,38.5,0.439,43,tested_positive
|
||||
2,90,60,0,0,23.5,0.191,25,tested_negative
|
||||
7,142,90,24,480,30.4,0.128,43,tested_positive
|
||||
3,169,74,19,125,29.9,0.268,31,tested_positive
|
||||
0,99,0,0,0,25,0.253,22,tested_negative
|
||||
4,127,88,11,155,34.5,0.598,28,tested_negative
|
||||
4,118,70,0,0,44.5,0.904,26,tested_negative
|
||||
2,122,76,27,200,35.9,0.483,26,tested_negative
|
||||
6,125,78,31,0,27.6,0.565,49,tested_positive
|
||||
1,168,88,29,0,35,0.905,52,tested_positive
|
||||
2,129,0,0,0,38.5,0.304,41,tested_negative
|
||||
4,110,76,20,100,28.4,0.118,27,tested_negative
|
||||
6,80,80,36,0,39.8,0.177,28,tested_negative
|
||||
10,115,0,0,0,0,0.261,30,tested_positive
|
||||
2,127,46,21,335,34.4,0.176,22,tested_negative
|
||||
9,164,78,0,0,32.8,0.148,45,tested_positive
|
||||
2,93,64,32,160,38,0.674,23,tested_positive
|
||||
3,158,64,13,387,31.2,0.295,24,tested_negative
|
||||
5,126,78,27,22,29.6,0.439,40,tested_negative
|
||||
10,129,62,36,0,41.2,0.441,38,tested_positive
|
||||
0,134,58,20,291,26.4,0.352,21,tested_negative
|
||||
3,102,74,0,0,29.5,0.121,32,tested_negative
|
||||
7,187,50,33,392,33.9,0.826,34,tested_positive
|
||||
3,173,78,39,185,33.8,0.97,31,tested_positive
|
||||
10,94,72,18,0,23.1,0.595,56,tested_negative
|
||||
1,108,60,46,178,35.5,0.415,24,tested_negative
|
||||
5,97,76,27,0,35.6,0.378,52,tested_positive
|
||||
4,83,86,19,0,29.3,0.317,34,tested_negative
|
||||
1,114,66,36,200,38.1,0.289,21,tested_negative
|
||||
1,149,68,29,127,29.3,0.349,42,tested_positive
|
||||
5,117,86,30,105,39.1,0.251,42,tested_negative
|
||||
1,111,94,0,0,32.8,0.265,45,tested_negative
|
||||
4,112,78,40,0,39.4,0.236,38,tested_negative
|
||||
1,116,78,29,180,36.1,0.496,25,tested_negative
|
||||
0,141,84,26,0,32.4,0.433,22,tested_negative
|
||||
2,175,88,0,0,22.9,0.326,22,tested_negative
|
||||
2,92,52,0,0,30.1,0.141,22,tested_negative
|
||||
3,130,78,23,79,28.4,0.323,34,tested_positive
|
||||
8,120,86,0,0,28.4,0.259,22,tested_positive
|
||||
2,174,88,37,120,44.5,0.646,24,tested_positive
|
||||
2,106,56,27,165,29,0.426,22,tested_negative
|
||||
2,105,75,0,0,23.3,0.56,53,tested_negative
|
||||
4,95,60,32,0,35.4,0.284,28,tested_negative
|
||||
0,126,86,27,120,27.4,0.515,21,tested_negative
|
||||
8,65,72,23,0,32,0.6,42,tested_negative
|
||||
2,99,60,17,160,36.6,0.453,21,tested_negative
|
||||
1,102,74,0,0,39.5,0.293,42,tested_positive
|
||||
11,120,80,37,150,42.3,0.785,48,tested_positive
|
||||
3,102,44,20,94,30.8,0.4,26,tested_negative
|
||||
1,109,58,18,116,28.5,0.219,22,tested_negative
|
||||
9,140,94,0,0,32.7,0.734,45,tested_positive
|
||||
13,153,88,37,140,40.6,1.174,39,tested_negative
|
||||
12,100,84,33,105,30,0.488,46,tested_negative
|
||||
1,147,94,41,0,49.3,0.358,27,tested_positive
|
||||
1,81,74,41,57,46.3,1.096,32,tested_negative
|
||||
3,187,70,22,200,36.4,0.408,36,tested_positive
|
||||
6,162,62,0,0,24.3,0.178,50,tested_positive
|
||||
4,136,70,0,0,31.2,1.182,22,tested_positive
|
||||
1,121,78,39,74,39,0.261,28,tested_negative
|
||||
3,108,62,24,0,26,0.223,25,tested_negative
|
||||
0,181,88,44,510,43.3,0.222,26,tested_positive
|
||||
8,154,78,32,0,32.4,0.443,45,tested_positive
|
||||
1,128,88,39,110,36.5,1.057,37,tested_positive
|
||||
7,137,90,41,0,32,0.391,39,tested_negative
|
||||
0,123,72,0,0,36.3,0.258,52,tested_positive
|
||||
1,106,76,0,0,37.5,0.197,26,tested_negative
|
||||
6,190,92,0,0,35.5,0.278,66,tested_positive
|
||||
2,88,58,26,16,28.4,0.766,22,tested_negative
|
||||
9,170,74,31,0,44,0.403,43,tested_positive
|
||||
9,89,62,0,0,22.5,0.142,33,tested_negative
|
||||
10,101,76,48,180,32.9,0.171,63,tested_negative
|
||||
2,122,70,27,0,36.8,0.34,27,tested_negative
|
||||
5,121,72,23,112,26.2,0.245,30,tested_negative
|
||||
1,126,60,0,0,30.1,0.349,47,tested_positive
|
||||
1,93,70,31,0,30.4,0.315,23,tested_negative
|
332
tests/datasets/glass.arff
Executable file
332
tests/datasets/glass.arff
Executable file
@@ -0,0 +1,332 @@
|
||||
% 1. Title: Glass Identification Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Creator: B. German
|
||||
% -- Central Research Establishment
|
||||
% Home Office Forensic Science Service
|
||||
% Aldermaston, Reading, Berkshire RG7 4PN
|
||||
% (b) Donor: Vina Spiehler, Ph.D., DABFT
|
||||
% Diagnostic Products Corporation
|
||||
% (213) 776-0180 (ext 3014)
|
||||
% (c) Date: September, 1987
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% -- Rule Induction in Forensic Science
|
||||
% -- Ian W. Evett and Ernest J. Spiehler
|
||||
% -- Central Research Establishment
|
||||
% Home Office Forensic Science Service
|
||||
% Aldermaston, Reading, Berkshire RG7 4PN
|
||||
% -- Unknown technical note number (sorry, not listed here)
|
||||
% -- General Results: nearest neighbor held its own with respect to the
|
||||
% rule-based system
|
||||
%
|
||||
% 4. Relevant Information:n
|
||||
% Vina conducted a comparison test of her rule-based system, BEAGLE, the
|
||||
% nearest-neighbor algorithm, and discriminant analysis. BEAGLE is
|
||||
% a product available through VRS Consulting, Inc.; 4676 Admiralty Way,
|
||||
% Suite 206; Marina Del Ray, CA 90292 (213) 827-7890 and FAX: -3189.
|
||||
% In determining whether the glass was a type of "float" glass or not,
|
||||
% the following results were obtained (# incorrect answers):
|
||||
%
|
||||
% Type of Sample Beagle NN DA
|
||||
% Windows that were float processed (87) 10 12 21
|
||||
% Windows that were not: (76) 19 16 22
|
||||
%
|
||||
% The study of classification of types of glass was motivated by
|
||||
% criminological investigation. At the scene of the crime, the glass left
|
||||
% can be used as evidence...if it is correctly identified!
|
||||
%
|
||||
% 5. Number of Instances: 214
|
||||
%
|
||||
% 6. Number of Attributes: 10 (including an Id#) plus the class attribute
|
||||
% -- all attributes are continuously valued
|
||||
%
|
||||
% 7. Attribute Information:
|
||||
% 1. Id number: 1 to 214
|
||||
% 2. RI: refractive index
|
||||
% 3. Na: Sodium (unit measurement: weight percent in corresponding oxide, as
|
||||
% are attributes 4-10)
|
||||
% 4. Mg: Magnesium
|
||||
% 5. Al: Aluminum
|
||||
% 6. Si: Silicon
|
||||
% 7. K: Potassium
|
||||
% 8. Ca: Calcium
|
||||
% 9. Ba: Barium
|
||||
% 10. Fe: Iron
|
||||
% 11. Type of glass: (class attribute)
|
||||
% -- 1 building_windows_float_processed
|
||||
% -- 2 building_windows_non_float_processed
|
||||
% -- 3 vehicle_windows_float_processed
|
||||
% -- 4 vehicle_windows_non_float_processed (none in this database)
|
||||
% -- 5 containers
|
||||
% -- 6 tableware
|
||||
% -- 7 headlamps
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% Summary Statistics:
|
||||
% Attribute: Min Max Mean SD Correlation with class
|
||||
% 2. RI: 1.5112 1.5339 1.5184 0.0030 -0.1642
|
||||
% 3. Na: 10.73 17.38 13.4079 0.8166 0.5030
|
||||
% 4. Mg: 0 4.49 2.6845 1.4424 -0.7447
|
||||
% 5. Al: 0.29 3.5 1.4449 0.4993 0.5988
|
||||
% 6. Si: 69.81 75.41 72.6509 0.7745 0.1515
|
||||
% 7. K: 0 6.21 0.4971 0.6522 -0.0100
|
||||
% 8. Ca: 5.43 16.19 8.9570 1.4232 0.0007
|
||||
% 9. Ba: 0 3.15 0.1750 0.4972 0.5751
|
||||
% 10. Fe: 0 0.51 0.0570 0.0974 -0.1879
|
||||
%
|
||||
% 9. Class Distribution: (out of 214 total instances)
|
||||
% -- 163 Window glass (building windows and vehicle windows)
|
||||
% -- 87 float processed
|
||||
% -- 70 building windows
|
||||
% -- 17 vehicle windows
|
||||
% -- 76 non-float processed
|
||||
% -- 76 building windows
|
||||
% -- 0 vehicle windows
|
||||
% -- 51 Non-window glass
|
||||
% -- 13 containers
|
||||
% -- 9 tableware
|
||||
% -- 29 headlamps
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
%
|
||||
% Relabeled values in attribute 'Type'
|
||||
% From: '1' To: 'build wind float'
|
||||
% From: '2' To: 'build wind non-float'
|
||||
% From: '3' To: 'vehic wind float'
|
||||
% From: '4' To: 'vehic wind non-float'
|
||||
% From: '5' To: containers
|
||||
% From: '6' To: tableware
|
||||
% From: '7' To: headlamps
|
||||
%
|
||||
@relation Glass
|
||||
@attribute 'RI' real
|
||||
@attribute 'Na' real
|
||||
@attribute 'Mg' real
|
||||
@attribute 'Al' real
|
||||
@attribute 'Si' real
|
||||
@attribute 'K' real
|
||||
@attribute 'Ca' real
|
||||
@attribute 'Ba' real
|
||||
@attribute 'Fe' real
|
||||
@attribute 'Type' {'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
|
||||
@data
|
||||
1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'
|
||||
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0,0,'vehic wind float'
|
||||
1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0,0,'build wind float'
|
||||
1.51299,14.4,1.74,1.54,74.55,0,7.59,0,0,tableware
|
||||
1.53393,12.3,0,1,70.16,0.12,16.19,0,0.24,'build wind non-float'
|
||||
1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,'build wind non-float'
|
||||
1.51779,13.64,3.65,0.65,73,0.06,8.93,0,0,'vehic wind float'
|
||||
1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0,0,'build wind float'
|
||||
1.51545,14.14,0,2.68,73.39,0.08,9.07,0.61,0.05,headlamps
|
||||
1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0,0.28,'build wind non-float'
|
||||
1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0,0,'build wind non-float'
|
||||
1.51743,12.2,3.25,1.16,73.55,0.62,8.9,0,0.24,'build wind non-float'
|
||||
1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0,0,'build wind float'
|
||||
1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0,0,'vehic wind float'
|
||||
1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0,0.17,'vehic wind float'
|
||||
1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0,0,'build wind non-float'
|
||||
1.51719,14.75,0,2,73.02,0,8.53,1.59,0.08,headlamps
|
||||
1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0,0,'build wind non-float'
|
||||
1.51994,13.27,0,1.76,73.03,0.47,11.32,0,0,containers
|
||||
1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0,'build wind non-float'
|
||||
1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0,0.17,'build wind float'
|
||||
1.52475,11.45,0,1.88,72.19,0.81,13.24,0,0.34,'build wind non-float'
|
||||
1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0,0.22,'build wind non-float'
|
||||
1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0,0.11,'build wind float'
|
||||
1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,containers
|
||||
1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0,0,'build wind non-float'
|
||||
1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0,0,'build wind non-float'
|
||||
1.51683,14.56,0,1.98,73.29,0,8.52,1.57,0.07,headlamps
|
||||
1.51687,13.23,3.54,1.48,72.84,0.56,8.1,0,0,'build wind non-float'
|
||||
1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0,0,'vehic wind float'
|
||||
1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0,0,'build wind non-float'
|
||||
1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0,0,'vehic wind float'
|
||||
1.51115,17.38,0,0.34,75.41,0,6.65,0,0,tableware
|
||||
1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0,0,'build wind non-float'
|
||||
1.51755,13,3.6,1.36,72.99,0.57,8.4,0,0.11,'build wind float'
|
||||
1.51571,12.72,3.46,1.56,73.2,0.67,8.09,0,0.24,'build wind float'
|
||||
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0,0.26,'build wind float'
|
||||
1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0,0,'build wind non-float'
|
||||
1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,'build wind non-float'
|
||||
1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0,0,'build wind non-float'
|
||||
1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0,0,'build wind float'
|
||||
1.51806,13,3.8,1.08,73.07,0.56,8.38,0,0.12,'build wind non-float'
|
||||
1.51627,13,3.58,1.54,72.83,0.61,8.04,0,0,'build wind non-float'
|
||||
1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0,0,'build wind non-float'
|
||||
1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,'vehic wind float'
|
||||
1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0,0,'build wind float'
|
||||
1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0,containers
|
||||
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0,0,'build wind float'
|
||||
1.51784,13.08,3.49,1.28,72.86,0.6,8.49,0,0,'build wind float'
|
||||
1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0,0,'build wind non-float'
|
||||
1.51753,12.57,3.47,1.38,73.39,0.6,8.55,0,0.06,'build wind float'
|
||||
1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,'build wind non-float'
|
||||
1.51743,13.3,3.6,1.14,73.09,0.58,8.17,0,0,'build wind float'
|
||||
1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0,0,'build wind non-float'
|
||||
1.5164,14.37,0,2.74,72.85,0,9.45,0.54,0,headlamps
|
||||
1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0,0.07,'build wind float'
|
||||
1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0,0,headlamps
|
||||
1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0,0,'build wind float'
|
||||
1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0,0,'vehic wind float'
|
||||
1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0,0,'build wind non-float'
|
||||
1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0,0.32,'build wind non-float'
|
||||
1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0,0,'build wind float'
|
||||
1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0,0,'build wind float'
|
||||
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
|
||||
1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0,0.16,'build wind float'
|
||||
1.51556,13.87,0,2.54,73.23,0.14,9.41,0.81,0.01,headlamps
|
||||
1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0,0.11,'build wind float'
|
||||
1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0,0.37,'vehic wind float'
|
||||
1.53125,10.73,0,2.1,69.81,0.58,13.3,3.15,0.28,'build wind non-float'
|
||||
1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0,0.17,'build wind float'
|
||||
1.51829,14.46,2.24,1.62,72.38,0,9.26,0,0,tableware
|
||||
1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0,0.14,'build wind non-float'
|
||||
1.51888,14.99,0.78,1.74,72.5,0,9.95,0,0,tableware
|
||||
1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0,0.1,'build wind non-float'
|
||||
1.523,13.31,3.58,0.82,71.99,0.12,10.17,0,0.03,'build wind float'
|
||||
1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0,0,'build wind non-float'
|
||||
1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0,0,'build wind float'
|
||||
1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0,0.31,'build wind float'
|
||||
1.51646,13.04,3.4,1.26,73.01,0.52,8.58,0,0,'vehic wind float'
|
||||
1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0,0,'build wind float'
|
||||
1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0,0,'build wind float'
|
||||
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0,0,'build wind float'
|
||||
1.52127,14.32,3.9,0.83,71.5,0,9.49,0,0,'vehic wind float'
|
||||
1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0,0,'build wind float'
|
||||
1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0,0,containers
|
||||
1.518,13.71,3.93,1.54,71.81,0.54,8.21,0,0.15,'build wind non-float'
|
||||
1.52777,12.64,0,0.67,72.02,0.06,14.4,0,0,'build wind non-float'
|
||||
1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0,0.19,'build wind float'
|
||||
1.51764,12.98,3.54,1.21,73,0.65,8.53,0,0,'build wind float'
|
||||
1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0,0,'build wind non-float'
|
||||
1.51645,14.94,0,1.87,73.11,0,8.67,1.38,0,headlamps
|
||||
1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0,0.3,'build wind float'
|
||||
1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0,0.16,'build wind float'
|
||||
1.51937,13.79,2.41,1.19,72.76,0,9.77,0,0,tableware
|
||||
1.51514,14.85,0,2.42,73.72,0,8.39,0.56,0,headlamps
|
||||
1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0,0.07,'build wind float'
|
||||
1.51732,14.95,0,1.8,72.99,0,8.61,1.55,0,headlamps
|
||||
1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0,0.18,'build wind non-float'
|
||||
1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0,0,'build wind non-float'
|
||||
1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0,0,'build wind non-float'
|
||||
1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0,0,'build wind float'
|
||||
1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0,0.29,'build wind non-float'
|
||||
1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0,headlamps
|
||||
1.51685,14.92,0,1.99,73.06,0,8.4,1.59,0,headlamps
|
||||
1.51658,14.8,0,1.99,73.11,0,8.28,1.71,0,headlamps
|
||||
1.51316,13.02,0,3.04,70.48,6.21,6.96,0,0,containers
|
||||
1.51709,13,3.47,1.79,72.72,0.66,8.18,0,0,'build wind non-float'
|
||||
1.51727,14.7,0,2.34,73.28,0,8.95,0.66,0,headlamps
|
||||
1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0,0,'build wind float'
|
||||
1.51969,12.64,0,1.65,73.75,0.38,11.53,0,0,containers
|
||||
1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0,0.2,'build wind non-float'
|
||||
1.51617,14.95,0,2.27,73.3,0,8.71,0.67,0,headlamps
|
||||
1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0,0,'build wind float'
|
||||
1.51651,14.38,0,1.94,73.61,0,8.48,1.57,0,headlamps
|
||||
1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0,0,'vehic wind float'
|
||||
1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0,0,headlamps
|
||||
1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,'build wind non-float'
|
||||
1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0,headlamps
|
||||
1.51818,13.72,0,0.56,74.45,0,10.99,0,0,'build wind non-float'
|
||||
1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0,0.24,'build wind float'
|
||||
1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0,0.24,'build wind non-float'
|
||||
1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0,0.24,'build wind float'
|
||||
1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0,0,'build wind non-float'
|
||||
1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0,0,'build wind non-float'
|
||||
1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0,0.28,containers
|
||||
1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0,0.17,'build wind float'
|
||||
1.51653,11.95,0,1.19,75.18,2.7,8.93,0,0,headlamps
|
||||
1.51623,14.14,0,2.88,72.61,0.08,9.18,1.06,0,headlamps
|
||||
1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0,0,'build wind float'
|
||||
1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0,0,'build wind float'
|
||||
1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0,0,'build wind non-float'
|
||||
1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0,0,'build wind non-float'
|
||||
1.52065,14.36,0,2.02,73.42,0,8.44,1.64,0,headlamps
|
||||
1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0,0.14,'build wind float'
|
||||
1.52369,13.44,0,1.58,72.22,0.32,12.24,0,0,containers
|
||||
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0,0,'build wind float'
|
||||
1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0,0,'build wind float'
|
||||
1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0,0,'build wind non-float'
|
||||
1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0,0,'build wind float'
|
||||
1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0,0.09,'build wind non-float'
|
||||
1.51784,12.68,3.67,1.16,73.11,0.61,8.7,0,0,'build wind float'
|
||||
1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0,'build wind float'
|
||||
1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0,'build wind float'
|
||||
1.51666,12.86,0,1.83,73.88,0.97,10.17,0,0,containers
|
||||
1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0,0,'build wind non-float'
|
||||
1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0,0.12,'build wind non-float'
|
||||
1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0,0,'build wind non-float'
|
||||
1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0,0.17,'build wind non-float'
|
||||
1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0,0.12,'build wind non-float'
|
||||
1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0,0,'build wind non-float'
|
||||
1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0,headlamps
|
||||
1.52227,14.17,3.81,0.78,71.35,0,9.69,0,0,'build wind float'
|
||||
1.52614,13.7,0,1.36,71.24,0.19,13.44,0,0.1,'build wind non-float'
|
||||
1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0,0,'build wind non-float'
|
||||
1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0,0,'vehic wind float'
|
||||
1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0,0,'build wind float'
|
||||
1.51508,15.15,0,2.25,73.5,0,8.34,0.63,0,headlamps
|
||||
1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0,0,containers
|
||||
1.51966,14.77,3.75,0.29,72.02,0.03,9,0,0,'build wind float'
|
||||
1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0,0,'build wind non-float'
|
||||
1.52664,11.23,0,0.77,73.21,0,14.68,0,0,'build wind non-float'
|
||||
1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0,0.11,'build wind float'
|
||||
1.51602,14.85,0,2.38,73.28,0,8.76,0.64,0.09,headlamps
|
||||
1.51321,13,0,3.02,70.7,6.21,6.93,0,0,containers
|
||||
1.52739,11.02,0,0.75,73.08,0,14.96,0,0,'build wind non-float'
|
||||
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
|
||||
1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0,0,'build wind float'
|
||||
1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0,0.35,'build wind non-float'
|
||||
1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0,0,'build wind non-float'
|
||||
1.51609,15.01,0,2.51,73.05,0.05,8.83,0.53,0,headlamps
|
||||
1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0,0,'build wind non-float'
|
||||
1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0,0.19,'build wind non-float'
|
||||
1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0,0.1,'build wind float'
|
||||
1.51831,14.39,0,1.82,72.86,1.41,6.47,2.88,0,headlamps
|
||||
1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0,0,'build wind float'
|
||||
1.51613,13.88,1.78,1.79,73.1,0,8.67,0.76,0,headlamps
|
||||
1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0,0,'build wind float'
|
||||
1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0,0,'build wind float'
|
||||
1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0,0,containers
|
||||
1.51969,14.56,0,0.56,73.48,0,11.22,0,0,tableware
|
||||
1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0,0,'build wind non-float'
|
||||
1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0,0.1,'build wind non-float'
|
||||
1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0,0.09,'vehic wind float'
|
||||
1.52222,14.43,0,1,72.67,0.1,11.52,0,0.08,'build wind non-float'
|
||||
1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0,0,'build wind float'
|
||||
1.51711,14.23,0,2.08,73.36,0,8.62,1.67,0,headlamps
|
||||
1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0,0,'build wind float'
|
||||
1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0,0,'build wind float'
|
||||
1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0,0.1,'vehic wind float'
|
||||
1.52043,13.38,0,1.4,72.25,0.33,12.5,0,0,containers
|
||||
1.519,13.49,3.48,1.35,71.95,0.55,9,0,0,'build wind float'
|
||||
1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0,0.09,'build wind float'
|
||||
1.51905,14,2.39,1.56,72.37,0,9.57,0,0,tableware
|
||||
1.51531,14.38,0,2.66,73.1,0.04,9.08,0.64,0,headlamps
|
||||
1.51916,14.15,0,2.09,72.74,0,10.88,0,0,tableware
|
||||
1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0,0.15,'build wind non-float'
|
||||
1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0,0,'build wind non-float'
|
||||
1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0,0,'build wind non-float'
|
||||
1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0,0.09,'build wind non-float'
|
||||
1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0,0.21,'build wind non-float'
|
||||
1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0,0,'build wind non-float'
|
||||
1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0,0.16,'build wind float'
|
||||
1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0,0,'vehic wind float'
|
||||
1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,'build wind float'
|
||||
1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0,0,'build wind non-float'
|
||||
1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0,0,'build wind float'
|
||||
1.51623,14.2,0,2.79,73.46,0.04,9.04,0.4,0.09,headlamps
|
||||
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0,0,'build wind float'
|
||||
1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0,0,'build wind float'
|
||||
1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0,0,'vehic wind float'
|
||||
1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0,0,'build wind non-float'
|
||||
1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0,0.14,'build wind non-float'
|
||||
1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0,0,'build wind non-float'
|
||||
1.51852,14.09,2.19,1.66,72.67,0,9.32,0,0,tableware
|
399
tests/datasets/liver-disorders.arff
Executable file
399
tests/datasets/liver-disorders.arff
Executable file
@@ -0,0 +1,399 @@
|
||||
% 1. Title: BUPA liver disorders
|
||||
%
|
||||
% 2. Source information:
|
||||
% -- Creators: BUPA Medical Research Ltd.
|
||||
% -- Donor: Richard S. Forsyth
|
||||
% 8 Grosvenor Avenue
|
||||
% Mapperley Park
|
||||
% Nottingham NG3 5DX
|
||||
% 0602-621676
|
||||
% -- Date: 5/15/1990
|
||||
%
|
||||
% 3. Past usage:
|
||||
% -- None known other than what is shown in the PC/BEAGLE User's Guide
|
||||
% (written by Richard S. Forsyth).
|
||||
%
|
||||
% 4. Relevant information:
|
||||
% -- The first 5 variables are all blood tests which are thought
|
||||
% to be sensitive to liver disorders that might arise from
|
||||
% excessive alcohol consumption. Each line in the bupa.data file
|
||||
% constitutes the record of a single male individual.
|
||||
% -- It appears that drinks>5 is some sort of a selector on this database.
|
||||
% See the PC/BEAGLE User's Guide for more information.
|
||||
%
|
||||
% 5. Number of instances: 345
|
||||
%
|
||||
% 6. Number of attributes: 7 overall
|
||||
%
|
||||
% 7. Attribute information:
|
||||
% 1. mcv mean corpuscular volume
|
||||
% 2. alkphos alkaline phosphotase
|
||||
% 3. sgpt alamine aminotransferase
|
||||
% 4. sgot aspartate aminotransferase
|
||||
% 5. gammagt gamma-glutamyl transpeptidase
|
||||
% 6. drinks number of half-pint equivalents of alcoholic beverages
|
||||
% drunk per day
|
||||
% 7. selector field used to split data into two sets
|
||||
%
|
||||
% 8. Missing values: none%
|
||||
% Information about the dataset
|
||||
% CLASSTYPE: nominal
|
||||
% CLASSINDEX: last
|
||||
%
|
||||
|
||||
@relation liver-disorders
|
||||
|
||||
@attribute mcv INTEGER
|
||||
@attribute alkphos INTEGER
|
||||
@attribute sgpt INTEGER
|
||||
@attribute sgot INTEGER
|
||||
@attribute gammagt INTEGER
|
||||
@attribute drinks REAL
|
||||
@attribute selector {1,2}
|
||||
|
||||
@data
|
||||
85,92,45,27,31,0.0,1
|
||||
85,64,59,32,23,0.0,2
|
||||
86,54,33,16,54,0.0,2
|
||||
91,78,34,24,36,0.0,2
|
||||
87,70,12,28,10,0.0,2
|
||||
98,55,13,17,17,0.0,2
|
||||
88,62,20,17,9,0.5,1
|
||||
88,67,21,11,11,0.5,1
|
||||
92,54,22,20,7,0.5,1
|
||||
90,60,25,19,5,0.5,1
|
||||
89,52,13,24,15,0.5,1
|
||||
82,62,17,17,15,0.5,1
|
||||
90,64,61,32,13,0.5,1
|
||||
86,77,25,19,18,0.5,1
|
||||
96,67,29,20,11,0.5,1
|
||||
91,78,20,31,18,0.5,1
|
||||
89,67,23,16,10,0.5,1
|
||||
89,79,17,17,16,0.5,1
|
||||
91,107,20,20,56,0.5,1
|
||||
94,116,11,33,11,0.5,1
|
||||
92,59,35,13,19,0.5,1
|
||||
93,23,35,20,20,0.5,1
|
||||
90,60,23,27,5,0.5,1
|
||||
96,68,18,19,19,0.5,1
|
||||
84,80,47,33,97,0.5,1
|
||||
92,70,24,13,26,0.5,1
|
||||
90,47,28,15,18,0.5,1
|
||||
88,66,20,21,10,0.5,1
|
||||
91,102,17,13,19,0.5,1
|
||||
87,41,31,19,16,0.5,1
|
||||
86,79,28,16,17,0.5,1
|
||||
91,57,31,23,42,0.5,1
|
||||
93,77,32,18,29,0.5,1
|
||||
88,96,28,21,40,0.5,1
|
||||
94,65,22,18,11,0.5,1
|
||||
91,72,155,68,82,0.5,2
|
||||
85,54,47,33,22,0.5,2
|
||||
79,39,14,19,9,0.5,2
|
||||
85,85,25,26,30,0.5,2
|
||||
89,63,24,20,38,0.5,2
|
||||
84,92,68,37,44,0.5,2
|
||||
89,68,26,39,42,0.5,2
|
||||
89,101,18,25,13,0.5,2
|
||||
86,84,18,14,16,0.5,2
|
||||
85,65,25,14,18,0.5,2
|
||||
88,61,19,21,13,0.5,2
|
||||
92,56,14,16,10,0.5,2
|
||||
95,50,29,25,50,0.5,2
|
||||
91,75,24,22,11,0.5,2
|
||||
83,40,29,25,38,0.5,2
|
||||
89,74,19,23,16,0.5,2
|
||||
85,64,24,22,11,0.5,2
|
||||
92,57,64,36,90,0.5,2
|
||||
94,48,11,23,43,0.5,2
|
||||
87,52,21,19,30,0.5,2
|
||||
85,65,23,29,15,0.5,2
|
||||
84,82,21,21,19,0.5,2
|
||||
88,49,20,22,19,0.5,2
|
||||
96,67,26,26,36,0.5,2
|
||||
90,63,24,24,24,0.5,2
|
||||
90,45,33,34,27,0.5,2
|
||||
90,72,14,15,18,0.5,2
|
||||
91,55,4,8,13,0.5,2
|
||||
91,52,15,22,11,0.5,2
|
||||
87,71,32,19,27,1.0,1
|
||||
89,77,26,20,19,1.0,1
|
||||
89,67,5,17,14,1.0,2
|
||||
85,51,26,24,23,1.0,2
|
||||
103,75,19,30,13,1.0,2
|
||||
90,63,16,21,14,1.0,2
|
||||
90,63,29,23,57,2.0,1
|
||||
90,67,35,19,35,2.0,1
|
||||
87,66,27,22,9,2.0,1
|
||||
90,73,34,21,22,2.0,1
|
||||
86,54,20,21,16,2.0,1
|
||||
90,80,19,14,42,2.0,1
|
||||
87,90,43,28,156,2.0,2
|
||||
96,72,28,19,30,2.0,2
|
||||
91,55,9,25,16,2.0,2
|
||||
95,78,27,25,30,2.0,2
|
||||
92,101,34,30,64,2.0,2
|
||||
89,51,41,22,48,2.0,2
|
||||
91,99,42,33,16,2.0,2
|
||||
94,58,21,18,26,2.0,2
|
||||
92,60,30,27,297,2.0,2
|
||||
94,58,21,18,26,2.0,2
|
||||
88,47,33,26,29,2.0,2
|
||||
92,65,17,25,9,2.0,2
|
||||
92,79,22,20,11,3.0,1
|
||||
84,83,20,25,7,3.0,1
|
||||
88,68,27,21,26,3.0,1
|
||||
86,48,20,20,6,3.0,1
|
||||
99,69,45,32,30,3.0,1
|
||||
88,66,23,12,15,3.0,1
|
||||
89,62,42,30,20,3.0,1
|
||||
90,51,23,17,27,3.0,1
|
||||
81,61,32,37,53,3.0,2
|
||||
89,89,23,18,104,3.0,2
|
||||
89,65,26,18,36,3.0,2
|
||||
92,75,26,26,24,3.0,2
|
||||
85,59,25,20,25,3.0,2
|
||||
92,61,18,13,81,3.0,2
|
||||
89,63,22,27,10,4.0,1
|
||||
90,84,18,23,13,4.0,1
|
||||
88,95,25,19,14,4.0,1
|
||||
89,35,27,29,17,4.0,1
|
||||
91,80,37,23,27,4.0,1
|
||||
91,109,33,15,18,4.0,1
|
||||
91,65,17,5,7,4.0,1
|
||||
88,107,29,20,50,4.0,2
|
||||
87,76,22,55,9,4.0,2
|
||||
87,86,28,23,21,4.0,2
|
||||
87,42,26,23,17,4.0,2
|
||||
88,80,24,25,17,4.0,2
|
||||
90,96,34,49,169,4.0,2
|
||||
86,67,11,15,8,4.0,2
|
||||
92,40,19,20,21,4.0,2
|
||||
85,60,17,21,14,4.0,2
|
||||
89,90,15,17,25,4.0,2
|
||||
91,57,15,16,16,4.0,2
|
||||
96,55,48,39,42,4.0,2
|
||||
79,101,17,27,23,4.0,2
|
||||
90,134,14,20,14,4.0,2
|
||||
89,76,14,21,24,4.0,2
|
||||
88,93,29,27,31,4.0,2
|
||||
90,67,10,16,16,4.0,2
|
||||
92,73,24,21,48,4.0,2
|
||||
91,55,28,28,82,4.0,2
|
||||
83,45,19,21,13,4.0,2
|
||||
90,74,19,14,22,4.0,2
|
||||
92,66,21,16,33,5.0,1
|
||||
93,63,26,18,18,5.0,1
|
||||
86,78,47,39,107,5.0,2
|
||||
97,44,113,45,150,5.0,2
|
||||
87,59,15,19,12,5.0,2
|
||||
86,44,21,11,15,5.0,2
|
||||
87,64,16,20,24,5.0,2
|
||||
92,57,21,23,22,5.0,2
|
||||
90,70,25,23,112,5.0,2
|
||||
99,59,17,19,11,5.0,2
|
||||
92,80,10,26,20,6.0,1
|
||||
95,60,26,22,28,6.0,1
|
||||
91,63,25,26,15,6.0,1
|
||||
92,62,37,21,36,6.0,1
|
||||
95,50,13,14,15,6.0,1
|
||||
90,76,37,19,50,6.0,1
|
||||
96,70,70,26,36,6.0,1
|
||||
95,62,64,42,76,6.0,1
|
||||
92,62,20,23,20,6.0,1
|
||||
91,63,25,26,15,6.0,1
|
||||
82,56,67,38,92,6.0,2
|
||||
92,82,27,24,37,6.0,2
|
||||
90,63,12,26,21,6.0,2
|
||||
88,37,9,15,16,6.0,2
|
||||
100,60,29,23,76,6.0,2
|
||||
98,43,35,23,69,6.0,2
|
||||
91,74,87,50,67,6.0,2
|
||||
92,87,57,25,44,6.0,2
|
||||
93,99,36,34,48,6.0,2
|
||||
90,72,17,19,19,6.0,2
|
||||
97,93,21,20,68,6.0,2
|
||||
93,50,18,25,17,6.0,2
|
||||
90,57,20,26,33,6.0,2
|
||||
92,76,31,28,41,6.0,2
|
||||
88,55,19,17,14,6.0,2
|
||||
89,63,24,29,29,6.0,2
|
||||
92,79,70,32,84,7.0,1
|
||||
92,93,58,35,120,7.0,1
|
||||
93,84,58,47,62,7.0,2
|
||||
97,71,29,22,52,8.0,1
|
||||
84,99,33,19,26,8.0,1
|
||||
96,44,42,23,73,8.0,1
|
||||
90,62,22,21,21,8.0,1
|
||||
92,94,18,17,6,8.0,1
|
||||
90,67,77,39,114,8.0,1
|
||||
97,71,29,22,52,8.0,1
|
||||
91,69,25,25,66,8.0,2
|
||||
93,59,17,20,14,8.0,2
|
||||
92,95,85,48,200,8.0,2
|
||||
90,50,26,22,53,8.0,2
|
||||
91,62,59,47,60,8.0,2
|
||||
92,93,22,28,123,9.0,1
|
||||
92,77,86,41,31,10.0,1
|
||||
86,66,22,24,26,10.0,2
|
||||
98,57,31,34,73,10.0,2
|
||||
95,80,50,64,55,10.0,2
|
||||
92,108,53,33,94,12.0,2
|
||||
97,92,22,28,49,12.0,2
|
||||
93,77,39,37,108,16.0,1
|
||||
94,83,81,34,201,20.0,1
|
||||
87,75,25,21,14,0.0,1
|
||||
88,56,23,18,12,0.0,1
|
||||
84,97,41,20,32,0.0,2
|
||||
94,91,27,20,15,0.5,1
|
||||
97,62,17,13,5,0.5,1
|
||||
92,85,25,20,12,0.5,1
|
||||
82,48,27,15,12,0.5,1
|
||||
88,74,31,25,15,0.5,1
|
||||
95,77,30,14,21,0.5,1
|
||||
88,94,26,18,8,0.5,1
|
||||
91,70,19,19,22,0.5,1
|
||||
83,54,27,15,12,0.5,1
|
||||
91,105,40,26,56,0.5,1
|
||||
86,79,37,28,14,0.5,1
|
||||
91,96,35,22,135,0.5,1
|
||||
89,82,23,14,35,0.5,1
|
||||
90,73,24,23,11,0.5,1
|
||||
90,87,19,25,19,0.5,1
|
||||
89,82,33,32,18,0.5,1
|
||||
85,79,17,8,9,0.5,1
|
||||
85,119,30,26,17,0.5,1
|
||||
78,69,24,18,31,0.5,1
|
||||
88,107,34,21,27,0.5,1
|
||||
89,115,17,27,7,0.5,1
|
||||
92,67,23,15,12,0.5,1
|
||||
89,101,27,34,14,0.5,1
|
||||
91,84,11,12,10,0.5,1
|
||||
94,101,41,20,53,0.5,2
|
||||
88,46,29,22,18,0.5,2
|
||||
88,122,35,29,42,0.5,2
|
||||
84,88,28,25,35,0.5,2
|
||||
90,79,18,15,24,0.5,2
|
||||
87,69,22,26,11,0.5,2
|
||||
65,63,19,20,14,0.5,2
|
||||
90,64,12,17,14,0.5,2
|
||||
85,58,18,24,16,0.5,2
|
||||
88,81,41,27,36,0.5,2
|
||||
86,78,52,29,62,0.5,2
|
||||
82,74,38,28,48,0.5,2
|
||||
86,58,36,27,59,0.5,2
|
||||
94,56,30,18,27,0.5,2
|
||||
87,57,30,30,22,0.5,2
|
||||
98,74,148,75,159,0.5,2
|
||||
94,75,20,25,38,0.5,2
|
||||
83,68,17,20,71,0.5,2
|
||||
93,56,25,21,33,0.5,2
|
||||
101,65,18,21,22,0.5,2
|
||||
92,65,25,20,31,0.5,2
|
||||
92,58,14,16,13,0.5,2
|
||||
86,58,16,23,23,0.5,2
|
||||
85,62,15,13,22,0.5,2
|
||||
86,57,13,20,13,0.5,2
|
||||
86,54,26,30,13,0.5,2
|
||||
81,41,33,27,34,1.0,1
|
||||
91,67,32,26,13,1.0,1
|
||||
91,80,21,19,14,1.0,1
|
||||
92,60,23,15,19,1.0,1
|
||||
91,60,32,14,8,1.0,1
|
||||
93,65,28,22,10,1.0,1
|
||||
90,63,45,24,85,1.0,2
|
||||
87,92,21,22,37,1.0,2
|
||||
83,78,31,19,115,1.0,2
|
||||
95,62,24,23,14,1.0,2
|
||||
93,59,41,30,48,1.0,2
|
||||
84,82,43,32,38,2.0,1
|
||||
87,71,33,20,22,2.0,1
|
||||
86,44,24,15,18,2.0,1
|
||||
86,66,28,24,21,2.0,1
|
||||
88,58,31,17,17,2.0,1
|
||||
90,61,28,29,31,2.0,1
|
||||
88,69,70,24,64,2.0,1
|
||||
93,87,18,17,26,2.0,1
|
||||
98,58,33,21,28,2.0,1
|
||||
91,44,18,18,23,2.0,2
|
||||
87,75,37,19,70,2.0,2
|
||||
94,91,30,26,25,2.0,2
|
||||
88,85,14,15,10,2.0,2
|
||||
89,109,26,25,27,2.0,2
|
||||
87,59,37,27,34,2.0,2
|
||||
93,58,20,23,18,2.0,2
|
||||
88,57,9,15,16,2.0,2
|
||||
94,65,38,27,17,3.0,1
|
||||
91,71,12,22,11,3.0,1
|
||||
90,55,20,20,16,3.0,1
|
||||
91,64,21,17,26,3.0,2
|
||||
88,47,35,26,33,3.0,2
|
||||
82,72,31,20,84,3.0,2
|
||||
85,58,83,49,51,3.0,2
|
||||
91,54,25,22,35,4.0,1
|
||||
98,50,27,25,53,4.0,2
|
||||
86,62,29,21,26,4.0,2
|
||||
89,48,32,22,14,4.0,2
|
||||
82,68,20,22,9,4.0,2
|
||||
83,70,17,19,23,4.0,2
|
||||
96,70,21,26,21,4.0,2
|
||||
94,117,77,56,52,4.0,2
|
||||
93,45,11,14,21,4.0,2
|
||||
93,49,27,21,29,4.0,2
|
||||
84,73,46,32,39,4.0,2
|
||||
91,63,17,17,46,4.0,2
|
||||
90,57,31,18,37,4.0,2
|
||||
87,45,19,13,16,4.0,2
|
||||
91,68,14,20,19,4.0,2
|
||||
86,55,29,35,108,4.0,2
|
||||
91,86,52,47,52,4.0,2
|
||||
88,46,15,33,55,4.0,2
|
||||
85,52,22,23,34,4.0,2
|
||||
89,72,33,27,55,4.0,2
|
||||
95,59,23,18,19,4.0,2
|
||||
94,43,154,82,121,4.0,2
|
||||
96,56,38,26,23,5.0,2
|
||||
90,52,10,17,12,5.0,2
|
||||
94,45,20,16,12,5.0,2
|
||||
99,42,14,21,49,5.0,2
|
||||
93,102,47,23,37,5.0,2
|
||||
94,71,25,26,31,5.0,2
|
||||
92,73,33,34,115,5.0,2
|
||||
87,54,41,29,23,6.0,1
|
||||
92,67,15,14,14,6.0,1
|
||||
98,101,31,26,32,6.0,1
|
||||
92,53,51,33,92,6.0,1
|
||||
97,94,43,43,82,6.0,1
|
||||
93,43,11,16,54,6.0,1
|
||||
93,68,24,18,19,6.0,1
|
||||
95,36,38,19,15,6.0,1
|
||||
99,86,58,42,203,6.0,1
|
||||
98,66,103,57,114,6.0,1
|
||||
92,80,10,26,20,6.0,1
|
||||
96,74,27,25,43,6.0,2
|
||||
95,93,21,27,47,6.0,2
|
||||
86,109,16,22,28,6.0,2
|
||||
91,46,30,24,39,7.0,2
|
||||
102,82,34,78,203,7.0,2
|
||||
85,50,12,18,14,7.0,2
|
||||
91,57,33,23,12,8.0,1
|
||||
91,52,76,32,24,8.0,1
|
||||
93,70,46,30,33,8.0,1
|
||||
87,55,36,19,25,8.0,1
|
||||
98,123,28,24,31,8.0,1
|
||||
82,55,18,23,44,8.0,2
|
||||
95,73,20,25,225,8.0,2
|
||||
97,80,17,20,53,8.0,2
|
||||
100,83,25,24,28,8.0,2
|
||||
88,91,56,35,126,9.0,2
|
||||
91,138,45,21,48,10.0,1
|
||||
92,41,37,22,37,10.0,1
|
||||
86,123,20,25,23,10.0,2
|
||||
91,93,35,34,37,10.0,2
|
||||
87,87,15,23,11,10.0,2
|
||||
87,56,52,43,55,10.0,2
|
||||
99,75,26,24,41,12.0,1
|
||||
96,69,53,43,203,12.0,2
|
||||
98,77,55,35,89,15.0,1
|
||||
91,68,27,26,14,16.0,1
|
||||
98,99,57,45,65,20.0,1
|
180
tests/datasets/test.arff
Executable file
180
tests/datasets/test.arff
Executable file
@@ -0,0 +1,180 @@
|
||||
% 1. Title: Test Feature extracted from Glass
|
||||
%
|
||||
|
||||
@RELATION test
|
||||
|
||||
@ATTRIBUTE Mg REAL
|
||||
@ATTRIBUTE Type {0,1,2,3,4,5,6}
|
||||
|
||||
@DATA
|
||||
3.5,0
|
||||
3.52,1
|
||||
1.74,2
|
||||
0.0,3
|
||||
2.85,3
|
||||
3.65,1
|
||||
2.84,0
|
||||
0.0,4
|
||||
3.9,3
|
||||
3.58,3
|
||||
3.25,3
|
||||
3.76,1
|
||||
3.45,1
|
||||
3.48,3
|
||||
0.0,4
|
||||
0.0,5
|
||||
2.96,3
|
||||
3.65,0
|
||||
0.0,3
|
||||
3.74,3
|
||||
3.66,0
|
||||
1.61,5
|
||||
3.49,3
|
||||
3.52,3
|
||||
3.54,3
|
||||
3.53,1
|
||||
3.56,3
|
||||
3.34,1
|
||||
0.0,2
|
||||
3.61,3
|
||||
3.6,0
|
||||
3.46,0
|
||||
2.72,3
|
||||
3.51,3
|
||||
3.09,3
|
||||
3.48,0
|
||||
3.8,3
|
||||
3.58,3
|
||||
3.54,1
|
||||
3.42,0
|
||||
2.68,5
|
||||
3.49,0
|
||||
3.68,3
|
||||
3.6,0
|
||||
3.59,3
|
||||
0.0,4
|
||||
3.54,0
|
||||
2.2,4
|
||||
3.59,0
|
||||
3.66,1
|
||||
3.87,3
|
||||
3.45,0
|
||||
3.82,0
|
||||
3.72,0
|
||||
3.33,0
|
||||
3.78,1
|
||||
2.24,2
|
||||
3.83,3
|
||||
0.78,2
|
||||
3.9,3
|
||||
3.58,0
|
||||
3.57,3
|
||||
3.52,0
|
||||
3.47,0
|
||||
3.48,0
|
||||
3.66,0
|
||||
3.62,0
|
||||
3.39,0
|
||||
0.0,3
|
||||
3.55,0
|
||||
1.01,3
|
||||
0.0,4
|
||||
3.43,0
|
||||
3.58,0
|
||||
0.0,4
|
||||
3.74,0
|
||||
0.0,4
|
||||
3.44,3
|
||||
3.97,3
|
||||
3.6,0
|
||||
3.64,3
|
||||
1.83,4
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,4
|
||||
3.73,0
|
||||
3.58,1
|
||||
3.34,4
|
||||
2.09,3
|
||||
2.71,0
|
||||
3.18,3
|
||||
3.43,0
|
||||
3.15,3
|
||||
3.56,0
|
||||
0.0,4
|
||||
0.0,4
|
||||
4.49,0
|
||||
3.59,0
|
||||
3.56,3
|
||||
3.52,3
|
||||
0.0,4
|
||||
0.0,5
|
||||
3.61,0
|
||||
3.74,0
|
||||
3.62,3
|
||||
3.84,0
|
||||
3.67,0
|
||||
3.58,0
|
||||
0.0,5
|
||||
3.66,3
|
||||
3.68,3
|
||||
2.28,3
|
||||
3.67,3
|
||||
3.2,4
|
||||
3.81,0
|
||||
0.0,3
|
||||
3.39,1
|
||||
3.57,0
|
||||
1.85,5
|
||||
3.75,0
|
||||
3.76,3
|
||||
0.0,3
|
||||
3.86,0
|
||||
0.0,4
|
||||
0.0,5
|
||||
0.0,3
|
||||
3.5,0
|
||||
3.67,3
|
||||
3.55,3
|
||||
0.0,4
|
||||
3.61,3
|
||||
3.41,3
|
||||
3.7,0
|
||||
0.0,4
|
||||
3.58,0
|
||||
1.78,4
|
||||
3.85,0
|
||||
3.48,0
|
||||
1.71,5
|
||||
0.0,2
|
||||
3.5,3
|
||||
3.49,3
|
||||
3.36,1
|
||||
0.0,3
|
||||
3.54,0
|
||||
0.0,4
|
||||
2.87,0
|
||||
3.57,1
|
||||
3.48,0
|
||||
2.81,0
|
||||
0.0,4
|
||||
0.0,2
|
||||
3.62,3
|
||||
3.58,3
|
||||
3.45,3
|
||||
3.48,3
|
||||
3.54,3
|
||||
3.54,3
|
||||
3.37,0
|
||||
3.41,1
|
||||
3.48,0
|
||||
3.43,3
|
||||
3.5,0
|
||||
0.0,4
|
||||
3.54,0
|
||||
3.52,3
|
||||
3.52,3
|
||||
2.88,3
|
||||
2.19,2
|
222
tests/datasets/tests.txt
Normal file
222
tests/datasets/tests.txt
Normal file
@@ -0,0 +1,222 @@
|
||||
#
|
||||
# from, to, step, #bins, Q/U
|
||||
# discretized data
|
||||
# cut points
|
||||
#
|
||||
#
|
||||
# Range experiments
|
||||
#
|
||||
RANGE
|
||||
0, 100, 1, 4, Q
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.0, 24.75, 49.5, 74.25, 99.0
|
||||
RANGE
|
||||
0, 50, 1, 4, Q
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.0, 12.25, 24.5, 36.75, 49.0
|
||||
RANGE
|
||||
0, 100, 1, 3, Q
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.0, 33.0, 66.0, 99.0
|
||||
RANGE
|
||||
0, 50, 1, 3, Q
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.0, 16.33333, 32.66667, 49.0
|
||||
RANGE
|
||||
0, 10, 1, 3, Q
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
0.0, 3.0, 6.0, 9.0
|
||||
RANGE
|
||||
0, 100, 1, 4, U
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.0, 24.75, 49.5, 74.25, 99.0
|
||||
RANGE
|
||||
0, 50, 1, 4, U
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.0, 12.25, 24.5, 36.75, 49.0
|
||||
RANGE
|
||||
0, 100, 1, 3, U
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.0, 33.0, 66.0, 99.0
|
||||
RANGE
|
||||
0, 50, 1, 3, U
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.0, 16.33333, 32.66667, 49.0
|
||||
RANGE
|
||||
0, 10, 1, 3, U
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
0.0, 3.0, 6.0, 9.0
|
||||
RANGE
|
||||
1, 10, 1, 3, Q
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2
|
||||
1.0, 3.66667, 6.33333, 9.0
|
||||
RANGE
|
||||
1, 10, 1, 3, U
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2
|
||||
1.0, 3.66667, 6.33333, 9.0
|
||||
RANGE
|
||||
1, 11, 1, 3, Q
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.0, 7.0, 10.0
|
||||
RANGE
|
||||
1, 11, 1, 3, U
|
||||
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.0, 7.0, 10.0
|
||||
RANGE
|
||||
1, 12, 1, 3, Q
|
||||
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.33333, 7.66667, 11.0
|
||||
RANGE
|
||||
1, 12, 1, 3, U
|
||||
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.33333, 7.66667, 11.0
|
||||
RANGE
|
||||
1, 13, 1, 3, Q
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.66667, 8.33333, 12.0
|
||||
RANGE
|
||||
1, 13, 1, 3, U
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.66667, 8.33333, 12.0
|
||||
RANGE
|
||||
1, 14, 1, 3, Q
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.0, 9.0, 13.0
|
||||
RANGE
|
||||
1, 14, 1, 3, U
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.0, 9.0, 13.0
|
||||
RANGE
|
||||
1, 15, 1, 3, Q
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.33333, 9.66667, 14.0
|
||||
RANGE
|
||||
1, 15, 1, 3, U
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.33333, 9.66667, 14.0
|
||||
#
|
||||
# Vector experiments
|
||||
#
|
||||
VECTOR
|
||||
Q3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
|
||||
1, 0, 0, 1, 0, 0, 1, 0, 0
|
||||
1.0, 1.66667, 3.0
|
||||
VECTOR
|
||||
U3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
|
||||
2, 0, 0, 2, 0, 0, 2, 0, 0
|
||||
1.0, 1.66667, 2.33333, 3.0
|
||||
VECTOR
|
||||
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.66667, 8.33333, 12.0
|
||||
VECTOR
|
||||
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
1.0, 4.66667, 8.33333, 12.0
|
||||
VECTOR
|
||||
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.0, 9.0, 13.0
|
||||
VECTOR
|
||||
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.0, 9.0, 13.0
|
||||
VECTOR
|
||||
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.33333, 9.66667, 14.0
|
||||
VECTOR
|
||||
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.33333, 9.66667, 14.0
|
||||
VECTOR
|
||||
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.66667, 10.33333, 15.0
|
||||
VECTOR
|
||||
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
|
||||
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
|
||||
1.0, 5.66667, 10.33333, 15.0
|
||||
VECTOR
|
||||
Q3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
||||
2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
|
||||
1.0, 5.66667, 10.33333, 15.0
|
||||
VECTOR
|
||||
U3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
|
||||
2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
|
||||
1.0, 5.66667, 10.33333, 15.0
|
||||
VECTOR
|
||||
Q3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
|
||||
0, 1, 1, 1, 1, 1, 2, 2, 2, 2
|
||||
0.0, 1.0, 3.0, 4.0
|
||||
VECTOR
|
||||
U3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
|
||||
0, 0, 0, 0, 1, 1, 2, 2, 2, 2
|
||||
0.0, 1.33333, 2.66667, 4.0
|
||||
#
|
||||
# Vector experiments with iris
|
||||
#
|
||||
VECTOR
|
||||
Q3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1
|
||||
4.3, 5.4, 6.3, 7.9
|
||||
VECTOR
|
||||
U3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 2, 1, 1, 2, 0, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1
|
||||
4.3, 5.5, 6.7, 7.9
|
||||
VECTOR
|
||||
Q4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||
1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2
|
||||
4.3, 5.1, 5.8, 6.4, 7.9
|
||||
VECTOR
|
||||
U4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
|
||||
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1
|
||||
4.3, 5.2, 6.1, 7.0, 7.9
|
||||
VECTOR
|
||||
Q3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||
2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 1, 1, 1, 1, 0, 1, 0, 2, 2, 0, 1, 0, 0, 2, 1, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 0, 1, 0, 2, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 0, 2, 2, 1, 0, 1, 2, 1
|
||||
2.0, 2.9, 3.2, 4.4
|
||||
VECTOR
|
||||
U3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||
1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0, 1, 1, 1, 2, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1
|
||||
2.0, 2.8, 3.6, 4.4
|
||||
VECTOR
|
||||
Q4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||
3, 2, 2, 2, 3, 3, 3, 3, 1, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 0, 2, 3, 3, 2, 3, 2, 3, 3, 2, 2, 2, 0, 1, 1, 3, 0, 1, 0, 0, 2, 0, 1, 1, 2, 2, 0, 0, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 0, 0, 0, 0, 0, 2, 3, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 1, 1, 0, 1, 3, 0, 2, 1, 2, 2, 0, 1, 0, 3, 2, 0, 2, 0, 1, 2, 2, 3, 0, 0, 2, 1, 1, 0, 3, 2, 1, 2, 1, 2, 1, 3, 1, 1, 0, 2, 3, 2, 2, 2, 2, 2, 0, 2, 3, 2, 0, 2, 3, 2
|
||||
2.0, 2.8, 3.0, 3.3, 4.4
|
||||
VECTOR
|
||||
U4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0]
|
||||
2, 1, 2, 1, 2, 3, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 3, 2, 3, 3, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 3, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 3, 1, 3, 2, 2, 2, 2, 2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 2, 1, 3, 1, 0, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 0, 1, 2, 1
|
||||
2.0, 2.6, 3.2, 3.8, 4.4
|
||||
VECTOR
|
||||
Q3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
1.0, 2.63333, 4.9, 6.9
|
||||
VECTOR
|
||||
U3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
1.0, 2.96667, 4.93333, 6.9
|
||||
VECTOR
|
||||
Q4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 3, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3
|
||||
1.0, 1.6, 4.35, 5.1, 6.9
|
||||
VECTOR
|
||||
U4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, 3, 2, 2, 2, 2, 2
|
||||
1.0, 2.475, 3.95, 5.425, 6.9
|
||||
VECTOR
|
||||
Q3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.1, 0.86667, 1.6, 2.5
|
||||
VECTOR
|
||||
U3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
0.1, 0.9, 1.7, 2.5
|
||||
VECTOR
|
||||
Q4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 3, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
0.1, 0.3, 1.3, 1.8, 2.5
|
||||
VECTOR
|
||||
U4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8]
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2
|
||||
0.1, 0.7, 1.3, 1.9, 2.5
|
12
tests/test
12
tests/test
@@ -1,12 +0,0 @@
|
||||
cmake -S . -B build -Wno-dev
|
||||
if test $? -ne 0; then
|
||||
echo "Error in creating build commands."
|
||||
exit 1
|
||||
fi
|
||||
cmake --build build
|
||||
if test $? -ne 0; then
|
||||
echo "Error in build command."
|
||||
exit 1
|
||||
fi
|
||||
cd build
|
||||
ctest --output-on-failure
|
71
tests/tests_do.py
Normal file
71
tests/tests_do.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# ***************************************************************
|
||||
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
# SPDX-FileType: SOURCE
|
||||
# SPDX-License-Identifier: MIT
|
||||
# ***************************************************************
|
||||
|
||||
import json
|
||||
from sklearn.preprocessing import KBinsDiscretizer
|
||||
|
||||
with open("datasets/tests.txt") as f:
|
||||
data = f.readlines()
|
||||
|
||||
data = [x.strip() for x in data if x[0] != "#"]
|
||||
|
||||
errors = False
|
||||
for i in range(0, len(data), 4):
|
||||
experiment_type = data[i]
|
||||
print("Experiment:", data[i + 1])
|
||||
if experiment_type == "RANGE":
|
||||
range_data = data[i + 1]
|
||||
from_, to_, step_, n_bins_, strategy_ = range_data.split(",")
|
||||
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
|
||||
else:
|
||||
strategy_ = data[i + 1][0]
|
||||
n_bins_ = data[i + 1][1]
|
||||
vector = data[i + 1][2:]
|
||||
X = [[float(x)] for x in json.loads(vector)]
|
||||
|
||||
strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
|
||||
disc = KBinsDiscretizer(
|
||||
n_bins=int(n_bins_),
|
||||
encode="ordinal",
|
||||
strategy=strategy,
|
||||
)
|
||||
expected_data = data[i + 2]
|
||||
cuts_data = data[i + 3]
|
||||
disc.fit(X)
|
||||
#
|
||||
# Normalize the cutpoints to remove numerical errors such as 33.0000000001
|
||||
# instead of 33
|
||||
#
|
||||
for j in range(len(disc.bin_edges_[0])):
|
||||
disc.bin_edges_[0][j] = round(disc.bin_edges_[0][j], 5)
|
||||
result = disc.transform(X)
|
||||
result = [int(x) for x in result.flatten()]
|
||||
expected = [int(x) for x in expected_data.split(",")]
|
||||
#
|
||||
# Check the Results
|
||||
#
|
||||
assert len(result) == len(expected)
|
||||
for j in range(len(result)):
|
||||
if result[j] != expected[j]:
|
||||
print("* Error at", j, "Expected=", expected[j], "Result=", result[j])
|
||||
errors = True
|
||||
expected_cuts = disc.bin_edges_[0]
|
||||
computed_cuts = [float(x) for x in cuts_data.split(",")]
|
||||
assert len(expected_cuts) == len(computed_cuts)
|
||||
for j in range(len(expected_cuts)):
|
||||
if round(expected_cuts[j], 5) != computed_cuts[j]:
|
||||
print(
|
||||
"* Error at",
|
||||
j,
|
||||
"Expected=",
|
||||
expected_cuts[j],
|
||||
"Result=",
|
||||
computed_cuts[j],
|
||||
)
|
||||
errors = True
|
||||
if errors:
|
||||
raise Exception("There were errors!")
|
||||
print("*** All tests run succesfully! ***")
|
209
tests/tests_generate.ipynb
Normal file
209
tests/tests_generate.ipynb
Normal file
@@ -0,0 +1,209 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import KBinsDiscretizer\n",
|
||||
"from sklearn.datasets import load_iris"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiments_range = [\n",
|
||||
" [0, 100, 1, 4, \"Q\"],\n",
|
||||
" [0, 50, 1, 4, \"Q\"],\n",
|
||||
" [0, 100, 1, 3, \"Q\"],\n",
|
||||
" [0, 50, 1, 3, \"Q\"],\n",
|
||||
" [0, 10, 1, 3, \"Q\"],\n",
|
||||
" [0, 100, 1, 4, \"U\"],\n",
|
||||
" [0, 50, 1, 4, \"U\"],\n",
|
||||
" [0, 100, 1, 3, \"U\"],\n",
|
||||
" [0, 50, 1, 3, \"U\"],\n",
|
||||
"# \n",
|
||||
" [0, 10, 1, 3, \"U\"],\n",
|
||||
" [1, 10, 1, 3, \"Q\"],\n",
|
||||
" [1, 10, 1, 3, \"U\"],\n",
|
||||
" [1, 11, 1, 3, \"Q\"],\n",
|
||||
" [1, 11, 1, 3, \"U\"],\n",
|
||||
" [1, 12, 1, 3, \"Q\"],\n",
|
||||
" [1, 12, 1, 3, \"U\"],\n",
|
||||
" [1, 13, 1, 3, \"Q\"],\n",
|
||||
" [1, 13, 1, 3, \"U\"],\n",
|
||||
" [1, 14, 1, 3, \"Q\"],\n",
|
||||
" [1, 14, 1, 3, \"U\"],\n",
|
||||
" [1, 15, 1, 3, \"Q\"],\n",
|
||||
" [1, 15, 1, 3, \"U\"]\n",
|
||||
"]\n",
|
||||
"experiments_vectors = [\n",
|
||||
" (3, [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]),\n",
|
||||
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]),\n",
|
||||
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]),\n",
|
||||
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]),\n",
|
||||
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]),\n",
|
||||
" (3, [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]),\n",
|
||||
" (3, [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0])\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/rmontanana/miniconda3/lib/python3.11/site-packages/sklearn/preprocessing/_discretization.py:307: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def write_lists(file, data, cuts):\n",
|
||||
" sep = \"\"\n",
|
||||
" for res in data:\n",
|
||||
" file.write(f\"{sep}{int(res):d}\")\n",
|
||||
" sep= \", \"\n",
|
||||
" file.write(\"\\n\")\n",
|
||||
" sep = \"\"\n",
|
||||
" for res in cuts:\n",
|
||||
" file.write(sep + str(round(res,5)))\n",
|
||||
" sep = \", \"\n",
|
||||
" file.write(\"\\n\")\n",
|
||||
"\n",
|
||||
"def normalize_cuts(cuts):\n",
|
||||
" #\n",
|
||||
" # Normalize the cutpoints to remove numerical errors such as 33.0000000001\n",
|
||||
" # instead of 33\n",
|
||||
" #\n",
|
||||
" for k in range(cuts.shape[0]):\n",
|
||||
" for i in range(len(cuts[k])):\n",
|
||||
" cuts[k][i] = round(cuts[k][i], 5)\n",
|
||||
"\n",
|
||||
"with open(\"datasets/tests.txt\", \"w\") as file:\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" file.write(\"# from, to, step, #bins, Q/U\\n\")\n",
|
||||
" file.write(\"# discretized data\\n\")\n",
|
||||
" file.write(\"# cut points\\n\")\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" #\n",
|
||||
" # Range experiments\n",
|
||||
" #\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" file.write(\"# Range experiments\\n\")\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" for experiment in experiments_range:\n",
|
||||
" file.write(\"RANGE\\n\")\n",
|
||||
" (from_, to_, step_, bins_, strategy) = experiment\n",
|
||||
" disc = KBinsDiscretizer(n_bins=bins_, encode='ordinal', strategy='quantile' if strategy.strip() == \"Q\" else 'uniform')\n",
|
||||
" data = [[x] for x in range(from_, to_, step_)]\n",
|
||||
" disc.fit(data)\n",
|
||||
" normalize_cuts(disc.bin_edges_)\n",
|
||||
" result = disc.transform(data)\n",
|
||||
" file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n",
|
||||
" write_lists(file, result, disc.bin_edges_[0])\n",
|
||||
" #\n",
|
||||
" # Vector experiments\n",
|
||||
" #\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" file.write(\"# Vector experiments\\n\")\n",
|
||||
" file.write(\"#\\n\")\n",
|
||||
" for n_bins, experiment in experiments_vectors:\n",
|
||||
" for strategy in [\"Q\", \"U\"]:\n",
|
||||
" file.write(\"VECTOR\\n\")\n",
|
||||
" file.write(f\"{strategy}{n_bins}{experiment}\\n\")\n",
|
||||
" disc = KBinsDiscretizer(\n",
|
||||
" n_bins=n_bins,\n",
|
||||
" encode=\"ordinal\",\n",
|
||||
" \n",
|
||||
" strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\",\n",
|
||||
" )\n",
|
||||
" data = [[x] for x in experiment]\n",
|
||||
" disc.fit(data)\n",
|
||||
" normalize_cuts(disc.bin_edges_)\n",
|
||||
" result = disc.transform(data)\n",
|
||||
" write_lists(file, result, disc.bin_edges_[0])\n",
|
||||
" #\n",
|
||||
" # Vector experiments iris\n",
|
||||
" #\n",
|
||||
" file.write(\"#\\n\");\n",
|
||||
" file.write(\"# Vector experiments with iris\\n\");\n",
|
||||
" file.write(\"#\\n\");\n",
|
||||
" X, y = load_iris(return_X_y=True)\n",
|
||||
" for i in range(X.shape[1]):\n",
|
||||
" for n_bins in [3, 4]:\n",
|
||||
" for strategy in [\"Q\", \"U\"]:\n",
|
||||
" file.write(\"VECTOR\\n\")\n",
|
||||
" experiment = X[:, i]\n",
|
||||
" file.write(f\"{strategy}{n_bins}{experiment.tolist()}\\n\")\n",
|
||||
" disc = KBinsDiscretizer(\n",
|
||||
" n_bins=n_bins,\n",
|
||||
" encode=\"ordinal\",\n",
|
||||
" strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\")\n",
|
||||
" data = [[x] for x in experiment]\n",
|
||||
" disc.fit(data)\n",
|
||||
" normalize_cuts(disc.bin_edges_)\n",
|
||||
" result = disc.transform(data)\n",
|
||||
" write_lists(file, result, disc.bin_edges_[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Cut points: [array([ 0., 33., 66., 99.])]\n",
|
||||
"Mistaken transformed data disc.transform([[33]]) = [[0.]]\n",
|
||||
"Reason of the mistake the cutpoint has decimals (double): 33.00000000000001\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#\n",
|
||||
"# Proving the mistakes due to floating point precision\n",
|
||||
"#\n",
|
||||
"from sklearn.preprocessing import KBinsDiscretizer\n",
|
||||
"\n",
|
||||
"data = [[x] for x in range(100)]\n",
|
||||
"disc = KBinsDiscretizer(n_bins=3, encode=\"ordinal\", strategy=\"quantile\")\n",
|
||||
"disc.fit(data)\n",
|
||||
"print(\"Cut points: \", disc.bin_edges_)\n",
|
||||
"print(\"Mistaken transformed data disc.transform([[33]]) =\", disc.transform([[33]]))\n",
|
||||
"print(\"Reason of the mistake the cutpoint has decimals (double): \", disc.bin_edges_[0][1])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.1.undefined"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@@ -1,16 +0,0 @@
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
typedef float precision_t;
|
||||
typedef vector<precision_t> samples_t;
|
||||
typedef vector<int> labels_t;
|
||||
typedef vector<size_t> indices_t;
|
||||
typedef vector<precision_t> cutPoints_t;
|
||||
typedef map<tuple<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
38
update_coverage.py
Normal file
38
update_coverage.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# ***************************************************************
|
||||
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
# SPDX-FileType: SOURCE
|
||||
# SPDX-License-Identifier: MIT
|
||||
# ***************************************************************
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
readme_file = "README.md"
|
||||
print("Updating coverage...")
|
||||
# Generate badge line
|
||||
output = subprocess.check_output(
|
||||
"lcov --summary " + sys.argv[1] + "/coverage.info",
|
||||
shell=True,
|
||||
)
|
||||
value = output.decode("utf-8").strip()
|
||||
percentage = 0
|
||||
for line in value.splitlines():
|
||||
if "lines" in line:
|
||||
percentage = float(line.split(":")[1].split("%")[0])
|
||||
break
|
||||
print(f"Coverage: {percentage}%")
|
||||
if percentage < 90:
|
||||
print("⛔Coverage is less than 90%. I won't update the badge.")
|
||||
sys.exit(1)
|
||||
percentage_label = str(percentage).replace(".", ",")
|
||||
coverage_line = f"[](html/index.html)"
|
||||
# Update README.md
|
||||
with open(readme_file, "r") as f:
|
||||
lines = f.readlines()
|
||||
with open(readme_file, "w") as f:
|
||||
for line in lines:
|
||||
if "img.shields.io/badge/Coverage" in line:
|
||||
f.write(coverage_line + "\n")
|
||||
else:
|
||||
f.write(line)
|
||||
print(f"✅Coverage updated with value: {percentage}")
|
Reference in New Issue
Block a user