diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..268bb77 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# ---> C++ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app +build/** +build_*/** +*.dSYM/** +cmake-build*/** +.idea +puml/** +.vscode/settings.json diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..5801187 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "lib/catch2"] + path = lib/catch2 + url = https://github.com/catchorg/Catch2.git +[submodule "lib/mdlp"] + path = lib/mdlp + url = https://github.com/rmontanana/mdlp diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..6faaf51 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "Mac", + "includePath": [ + "${workspaceFolder}/**" + ], + "defines": [], + "macFrameworkPath": [ + "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks" + ], + "cStandard": "c17", + "cppStandard": "c++17", + "compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..1e30c2d --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,130 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "sample", + "program": "${workspaceFolder}/build_debug/sample/BayesNetSample", + "args": [ + "-d", + "iris", + "-m", + "TANLd", + "-s", + "271", + "-p", + "/Users/rmontanana/Code/discretizbench/datasets/", + ], + //"cwd": "${workspaceFolder}/build/sample/", + }, + { + "type": "lldb", + "request": "launch", + "name": "experimentPy", + "program": "${workspaceFolder}/build_debug/src/Platform/b_main", + "args": [ + "-m", + "STree", + "--stratified", + "-d", + "iris", + //"--discretize" + // "--hyperparameters", + // "{\"repeatSparent\": true, \"maxModels\": 12}" + ], + "cwd": "${workspaceFolder}/../discretizbench", + }, + { + "type": "lldb", + "request": "launch", + "name": "gridsearch", + "program": "${workspaceFolder}/build_debug/src/Platform/b_grid", + "args": [ + "-m", + "KDB", + "--discretize", + "--continue", + "glass", + "--only", + "--compute" + ], + "cwd": "${workspaceFolder}/../discretizbench", + }, + { + "type": "lldb", + "request": "launch", + "name": "experimentBayes", + "program": "${workspaceFolder}/build_debug/src/Platform/b_main", + "args": [ + "-m", + "TAN", + "--stratified", + "--discretize", + "-d", + "iris", + "--hyperparameters", + "{\"repeatSparent\": true, \"maxModels\": 12}" + ], + "cwd": "/home/rmontanana/Code/discretizbench", + }, + { + "type": "lldb", + "request": "launch", + "name": "best", + "program": "${workspaceFolder}/build_debug/src/Platform/b_best", + "args": [ + "-m", + "BoostAODE", + "-s", + "accuracy", + "--build", + ], + "cwd": "${workspaceFolder}/../discretizbench", + }, + { + "type": "lldb", + "request": "launch", + "name": "manage", + "program": "${workspaceFolder}/build_debug/src/Platform/b_manage", + "args": [ + "-n", + "20" + ], + "cwd": "${workspaceFolder}/../discretizbench", + }, + { + "type": "lldb", + "request": "launch", + "name": "list", + "program": "${workspaceFolder}/build_debug/src/Platform/b_list", + "args": [], + //"cwd": "/Users/rmontanana/Code/discretizbench", + "cwd": "${workspaceFolder}/../discretizbench", + }, + { + "type": "lldb", + "request": "launch", + "name": "test", + "program": "${workspaceFolder}/build_debug/tests/unit_tests", + "args": [ + "-c=\"Metrics Test\"", + // "-s", + ], + "cwd": "${workspaceFolder}/build/tests", + }, + { + "name": "Build & debug active file", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build_debug/bayesnet", + "args": [], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "lldb", + "preLaunchTask": "CMake: build" + } + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..45cc63d --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,60 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "cmake", + "label": "CMake: build", + "command": "build", + "targets": [ + "all" + ], + "group": "build", + "problemMatcher": [], + "detail": "CMake template build task" + }, + { + "type": "cppbuild", + "label": "C/C++: clang build active file", + "command": "/usr/bin/clang", + "args": [ + "-fcolor-diagnostics", + "-fansi-escape-codes", + "-g", + "${file}", + "-o", + "${fileDirname}/${fileBasenameNoExtension}" + ], + "options": { + "cwd": "${fileDirname}" + }, + "problemMatcher": [ + "$gcc" + ], + "group": "build", + "detail": "Task generated by Debugger." + }, + { + "type": "cppbuild", + "label": "C/C++: g++ build active file", + "command": "/usr/bin/g++", + "args": [ + "-fdiagnostics-color=always", + "-g", + "${file}", + "-o", + "${fileDirname}/${fileBasenameNoExtension}" + ], + "options": { + "cwd": "${fileDirname}" + }, + "problemMatcher": [ + "$gcc" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "detail": "Task generated by Debugger." + } + ] +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..d591480 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,53 @@ +cmake_minimum_required(VERSION 3.20) + +project(Folding + VERSION 1.0.0 + DESCRIPTION "Folding utility for BayesNet library" + HOMEPAGE_URL "https://github.com/rmontanana/folding" + LANGUAGES CXX +) + +if (CODE_COVERAGE AND NOT ENABLE_TESTING) + MESSAGE(FATAL_ERROR "Code coverage requires testing enabled") +endif (CODE_COVERAGE AND NOT ENABLE_TESTING) + +find_package(Torch REQUIRED) + +if (POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif () + +# Global CMake variables +# ---------------------- +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") +# Options +# ------- +option(ENABLE_TESTING "Unit testing build" OFF) + +# CMakes modules +# -------------- +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) +include(AddGitSubmodule) + +# Subdirectories +# -------------- +add_subdirectory(config) + +# Testing +# ------- + +if (ENABLE_TESTING) + MESSAGE("Testing enabled") + add_git_submodule("lib/catch2") + add_git_submodule("lib/Files") + add_git_submodule("lib/mdlp") + include(CTest) + add_subdirectory(tests) +endif (ENABLE_TESTING) + +add_library(folding INTERFACE folding.hpp) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ac35ab2 --- /dev/null +++ b/Makefile @@ -0,0 +1,62 @@ +SHELL := /bin/bash +.DEFAULT_GOAL := help +.PHONY: help build test clean + +f_debug = build_debug +test_targets = unit_tests_folding +n_procs = -j 16 + +define ClearTests + @for t in $(test_targets); do \ + if [ -f $(f_debug)/tests/$$t ]; then \ + echo ">>> Cleaning $$t..." ; \ + rm -f $(f_debug)/tests/$$t ; \ + fi ; \ + done + @nfiles="$(find . -name "*.gcda" -print0)" ; \ + if test "${nfiles}" != "" ; then \ + find . -name "*.gcda" -print0 | xargs -0 rm 2>/dev/null ;\ + fi ; +endef + +clean: ## Clean the tests info + @echo ">>> Cleaning Debug Folding tests..."; + $(call ClearTests) + @echo ">>> Done"; + +build: ## Build a debug version of the project + @echo ">>> Building Debug Folding..."; + @if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi + @mkdir $(f_debug); + @cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON + @echo ">>> Done"; + +opt = "" +test: ## Run tests (opt="-s") to verbose output the tests + @echo ">>> Running Folding tests..."; + @$(MAKE) clean + @cmake --build $(f_debug) -t $(test_targets) $(n_procs) + @for t in $(test_targets); do \ + if [ -f $(f_debug)/tests/$$t ]; then \ + cd $(f_debug)/tests ; \ + ./$$t $(opt) ; \ + fi ; \ + done + @echo ">>> Done"; + +help: ## Show help message + @IFS=$$'\n' ; \ + help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \ + printf "%s\n\n" "Usage: make [task]"; \ + printf "%-20s %s\n" "task" "help" ; \ + printf "%-20s %s\n" "------" "----" ; \ + for help_line in $${help_lines[@]}; do \ + IFS=$$':' ; \ + help_split=($$help_line) ; \ + help_command=`echo $${help_split[0]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \ + help_info=`echo $${help_split[2]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \ + printf '\033[36m'; \ + printf "%-20s %s" $$help_command ; \ + printf '\033[0m'; \ + printf "%s\n" $$help_info; \ + done diff --git a/README2.md b/README2.md new file mode 100644 index 0000000..a3a4f6a --- /dev/null +++ b/README2.md @@ -0,0 +1,91 @@ +# BayesNet + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +Bayesian Network Classifier with libtorch from scratch + +## 0. Setup + +Before compiling BayesNet. + +### Miniconda + +To be able to run Python Classifiers such as STree, ODTE, SVC, etc. it is needed to install Miniconda. To do so, download the installer from [Miniconda](https://docs.conda.io/en/latest/miniconda.html) and run it. It is recommended to install it in the home folder. + +In Linux sometimes the library libstdc++ is mistaken from the miniconda installation and produces the next message when running the b_xxxx executables: + +```bash +libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by b_xxxx) +``` + +The solution is to erase the libstdc++ library from the miniconda installation: + +### MPI + +In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable: + +```bash +export MPI_HOME="/usr/lib64/openmpi" +``` + +In Mac OS X, install mpich with brew and if cmake doesn't find it, edit mpicxx wrapper to remove the ",-commons,use_dylibs" from final_ldflags + +```bash +vi /opt/homebrew/bin/mpicx +``` + +### boost library + +[Getting Started]() + +The best option is install the packages that the Linux distribution have in its repository. If this is the case: + +```bash +sudo dnf install boost-devel +``` + +If this is not possible and the compressed packaged is installed, the following environment variable has to be set pointing to the folder where it was unzipped to: + +```bash +export BOOST_ROOT=/path/to/library/ +``` + +In some cases, it is needed to build the library, to do so: + +```bash +cd /path/to/library +mkdir own +./bootstrap.sh --prefix=/path/to/library/own +./b2 install +export BOOST_ROOT=/path/to/library/own/ +``` + +Don't forget to add the export BOOST_ROOT statement to .bashrc or wherever it is meant to be. + +### libxlswriter + +```bash +cd lib/libxlsxwriter +make +make install DESTDIR=/home/rmontanana/Code PREFIX= +``` + +Environment variable has to be set: + +```bash + export LD_LIBRARY_PATH=/usr/local/lib + ``` + +### Release + +```bash +make release +``` + +### Debug & Tests + +```bash +make debug +``` + +## 1. Introduction diff --git a/cmake/modules/AddGitSubmodule.cmake b/cmake/modules/AddGitSubmodule.cmake new file mode 100644 index 0000000..7855fce --- /dev/null +++ b/cmake/modules/AddGitSubmodule.cmake @@ -0,0 +1,12 @@ + +function(add_git_submodule dir) + find_package(Git REQUIRED) + + if(NOT EXISTS ${dir}/CMakeLists.txt) + message(STATUS "🚨 Adding git submodule => ${dir}") + execute_process(COMMAND ${GIT_EXECUTABLE} + submodule update --init --recursive -- ${dir} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + endif() + add_subdirectory(${dir}) +endfunction(add_git_submodule) diff --git a/cmake/modules/CodeCoverage.cmake b/cmake/modules/CodeCoverage.cmake new file mode 100644 index 0000000..d4a039f --- /dev/null +++ b/cmake/modules/CodeCoverage.cmake @@ -0,0 +1,742 @@ +# Copyright (c) 2012 - 2017, Lars Bilke +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CHANGES: +# +# 2012-01-31, Lars Bilke +# - Enable Code Coverage +# +# 2013-09-17, Joakim Söderberg +# - Added support for Clang. +# - Some additional usage instructions. +# +# 2016-02-03, Lars Bilke +# - Refactored functions to use named parameters +# +# 2017-06-02, Lars Bilke +# - Merged with modified version from github.com/ufz/ogs +# +# 2019-05-06, Anatolii Kurotych +# - Remove unnecessary --coverage flag +# +# 2019-12-13, FeRD (Frank Dana) +# - Deprecate COVERAGE_LCOVR_EXCLUDES and COVERAGE_GCOVR_EXCLUDES lists in favor +# of tool-agnostic COVERAGE_EXCLUDES variable, or EXCLUDE setup arguments. +# - CMake 3.4+: All excludes can be specified relative to BASE_DIRECTORY +# - All setup functions: accept BASE_DIRECTORY, EXCLUDE list +# - Set lcov basedir with -b argument +# - Add automatic --demangle-cpp in lcovr, if 'c++filt' is available (can be +# overridden with NO_DEMANGLE option in setup_target_for_coverage_lcovr().) +# - Delete output dir, .info file on 'make clean' +# - Remove Python detection, since version mismatches will break gcovr +# - Minor cleanup (lowercase function names, update examples...) +# +# 2019-12-19, FeRD (Frank Dana) +# - Rename Lcov outputs, make filtered file canonical, fix cleanup for targets +# +# 2020-01-19, Bob Apthorpe +# - Added gfortran support +# +# 2020-02-17, FeRD (Frank Dana) +# - Make all add_custom_target()s VERBATIM to auto-escape wildcard characters +# in EXCLUDEs, and remove manual escaping from gcovr targets +# +# 2021-01-19, Robin Mueller +# - Add CODE_COVERAGE_VERBOSE option which will allow to print out commands which are run +# - Added the option for users to set the GCOVR_ADDITIONAL_ARGS variable to supply additional +# flags to the gcovr command +# +# 2020-05-04, Mihchael Davis +# - Add -fprofile-abs-path to make gcno files contain absolute paths +# - Fix BASE_DIRECTORY not working when defined +# - Change BYPRODUCT from folder to index.html to stop ninja from complaining about double defines +# +# 2021-05-10, Martin Stump +# - Check if the generator is multi-config before warning about non-Debug builds +# +# 2022-02-22, Marko Wehle +# - Change gcovr output from -o for --xml and --html output respectively. +# This will allow for Multiple Output Formats at the same time by making use of GCOVR_ADDITIONAL_ARGS, e.g. GCOVR_ADDITIONAL_ARGS "--txt". +# +# 2022-09-28, Sebastian Mueller +# - fix append_coverage_compiler_flags_to_target to correctly add flags +# - replace "-fprofile-arcs -ftest-coverage" with "--coverage" (equivalent) +# +# USAGE: +# +# 1. Copy this file into your cmake modules path. +# +# 2. Add the following line to your CMakeLists.txt (best inside an if-condition +# using a CMake option() to enable it just optionally): +# include(CodeCoverage) +# +# 3. Append necessary compiler flags for all supported source files: +# append_coverage_compiler_flags() +# Or for specific target: +# append_coverage_compiler_flags_to_target(YOUR_TARGET_NAME) +# +# 3.a (OPTIONAL) Set appropriate optimization flags, e.g. -O0, -O1 or -Og +# +# 4. If you need to exclude additional directories from the report, specify them +# using full paths in the COVERAGE_EXCLUDES variable before calling +# setup_target_for_coverage_*(). +# Example: +# set(COVERAGE_EXCLUDES +# '${PROJECT_SOURCE_DIR}/src/dir1/*' +# '/path/to/my/src/dir2/*') +# Or, use the EXCLUDE argument to setup_target_for_coverage_*(). +# Example: +# setup_target_for_coverage_lcov( +# NAME coverage +# EXECUTABLE testrunner +# EXCLUDE "${PROJECT_SOURCE_DIR}/src/dir1/*" "/path/to/my/src/dir2/*") +# +# 4.a NOTE: With CMake 3.4+, COVERAGE_EXCLUDES or EXCLUDE can also be set +# relative to the BASE_DIRECTORY (default: PROJECT_SOURCE_DIR) +# Example: +# set(COVERAGE_EXCLUDES "dir1/*") +# setup_target_for_coverage_gcovr_html( +# NAME coverage +# EXECUTABLE testrunner +# BASE_DIRECTORY "${PROJECT_SOURCE_DIR}/src" +# EXCLUDE "dir2/*") +# +# 5. Use the functions described below to create a custom make target which +# runs your test executable and produces a code coverage report. +# +# 6. Build a Debug build: +# cmake -DCMAKE_BUILD_TYPE=Debug .. +# make +# make my_coverage_target +# + +include(CMakeParseArguments) + +option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE) + +# Check prereqs +find_program( GCOV_PATH gcov ) +find_program( LCOV_PATH NAMES lcov lcov.bat lcov.exe lcov.perl) +find_program( FASTCOV_PATH NAMES fastcov fastcov.py ) +find_program( GENHTML_PATH NAMES genhtml genhtml.perl genhtml.bat ) +find_program( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/scripts/test) +find_program( CPPFILT_PATH NAMES c++filt ) + +if(NOT GCOV_PATH) + message(FATAL_ERROR "gcov not found! Aborting...") +endif() # NOT GCOV_PATH + +# Check supported compiler (Clang, GNU and Flang) +get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) +foreach(LANG ${LANGUAGES}) + if("${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang") + if("${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS 3) + message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...") + endif() + elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU" + AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang") + message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") + endif() +endforeach() + +set(COVERAGE_COMPILER_FLAGS "-g --coverage" + CACHE INTERNAL "") +if(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Clang)") + include(CheckCXXCompilerFlag) + check_cxx_compiler_flag(-fprofile-abs-path HAVE_fprofile_abs_path) + if(HAVE_fprofile_abs_path) + set(COVERAGE_COMPILER_FLAGS "${COVERAGE_COMPILER_FLAGS} -fprofile-abs-path") + endif() +endif() + +set(CMAKE_Fortran_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the Fortran compiler during coverage builds." + FORCE ) +set(CMAKE_CXX_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the C++ compiler during coverage builds." + FORCE ) +set(CMAKE_C_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the C compiler during coverage builds." + FORCE ) +set(CMAKE_EXE_LINKER_FLAGS_COVERAGE + "" + CACHE STRING "Flags used for linking binaries during coverage builds." + FORCE ) +set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE + "" + CACHE STRING "Flags used by the shared libraries linker during coverage builds." + FORCE ) +mark_as_advanced( + CMAKE_Fortran_FLAGS_COVERAGE + CMAKE_CXX_FLAGS_COVERAGE + CMAKE_C_FLAGS_COVERAGE + CMAKE_EXE_LINKER_FLAGS_COVERAGE + CMAKE_SHARED_LINKER_FLAGS_COVERAGE ) + +get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) +if(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG)) + message(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading") +endif() # NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG) + +if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + link_libraries(gcov) +endif() + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_lcov( +# NAME testrunner_coverage # New target name +# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES testrunner # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# NO_DEMANGLE # Don't demangle C++ symbols +# # even if c++filt is found +# ) +function(setup_target_for_coverage_lcov) + + set(options NO_DEMANGLE SONARQUBE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES LCOV_ARGS GENHTML_ARGS) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT LCOV_PATH) + message(FATAL_ERROR "lcov not found! Aborting...") + endif() # NOT LCOV_PATH + + if(NOT GENHTML_PATH) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() # NOT GENHTML_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(LCOV_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_LCOV_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND LCOV_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES LCOV_EXCLUDES) + + # Conditional arguments + if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) + set(GENHTML_EXTRA_ARGS "--demangle-cpp") + endif() + + # Setting up commands which will be run to generate coverage data. + # Cleanup lcov + set(LCOV_CLEAN_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -directory . + -b ${BASEDIR} --zerocounters + ) + # Create baseline to make sure untouched files show up in the report + set(LCOV_BASELINE_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -c -i -d . -b + ${BASEDIR} -o ${Coverage_NAME}.base + ) + # Run tests + set(LCOV_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Capturing lcov counters and generating report + set(LCOV_CAPTURE_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --directory . -b + ${BASEDIR} --capture --output-file ${Coverage_NAME}.capture + ) + # add baseline counters + set(LCOV_BASELINE_COUNT_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -a ${Coverage_NAME}.base + -a ${Coverage_NAME}.capture --output-file ${Coverage_NAME}.total + ) + # filter collected data to final coverage report + set(LCOV_FILTER_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --remove + ${Coverage_NAME}.total ${LCOV_EXCLUDES} --output-file ${Coverage_NAME}.info + ) + # Generate HTML output + set(LCOV_GEN_HTML_CMD + ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} -o + ${Coverage_NAME} ${Coverage_NAME}.info + ) + if(${Coverage_SONARQUBE}) + # Generate SonarQube output + set(GCOVR_XML_CMD + ${GCOVR_PATH} --sonarqube ${Coverage_NAME}_sonarqube.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + set(GCOVR_XML_CMD_COMMAND + COMMAND ${GCOVR_XML_CMD} + ) + set(GCOVR_XML_CMD_BYPRODUCTS ${Coverage_NAME}_sonarqube.xml) + set(GCOVR_XML_CMD_COMMENT COMMENT "SonarQube code coverage info report saved in ${Coverage_NAME}_sonarqube.xml.") + endif() + + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + message(STATUS "Command to clean up lcov: ") + string(REPLACE ";" " " LCOV_CLEAN_CMD_SPACED "${LCOV_CLEAN_CMD}") + message(STATUS "${LCOV_CLEAN_CMD_SPACED}") + + message(STATUS "Command to create baseline: ") + string(REPLACE ";" " " LCOV_BASELINE_CMD_SPACED "${LCOV_BASELINE_CMD}") + message(STATUS "${LCOV_BASELINE_CMD_SPACED}") + + message(STATUS "Command to run the tests: ") + string(REPLACE ";" " " LCOV_EXEC_TESTS_CMD_SPACED "${LCOV_EXEC_TESTS_CMD}") + message(STATUS "${LCOV_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to capture counters and generate report: ") + string(REPLACE ";" " " LCOV_CAPTURE_CMD_SPACED "${LCOV_CAPTURE_CMD}") + message(STATUS "${LCOV_CAPTURE_CMD_SPACED}") + + message(STATUS "Command to add baseline counters: ") + string(REPLACE ";" " " LCOV_BASELINE_COUNT_CMD_SPACED "${LCOV_BASELINE_COUNT_CMD}") + message(STATUS "${LCOV_BASELINE_COUNT_CMD_SPACED}") + + message(STATUS "Command to filter collected data: ") + string(REPLACE ";" " " LCOV_FILTER_CMD_SPACED "${LCOV_FILTER_CMD}") + message(STATUS "${LCOV_FILTER_CMD_SPACED}") + + message(STATUS "Command to generate lcov HTML output: ") + string(REPLACE ";" " " LCOV_GEN_HTML_CMD_SPACED "${LCOV_GEN_HTML_CMD}") + message(STATUS "${LCOV_GEN_HTML_CMD_SPACED}") + + if(${Coverage_SONARQUBE}) + message(STATUS "Command to generate SonarQube XML output: ") + string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}") + message(STATUS "${GCOVR_XML_CMD_SPACED}") + endif() + endif() + + # Setup target + add_custom_target(${Coverage_NAME} + COMMAND ${LCOV_CLEAN_CMD} + COMMAND ${LCOV_BASELINE_CMD} + COMMAND ${LCOV_EXEC_TESTS_CMD} + COMMAND ${LCOV_CAPTURE_CMD} + COMMAND ${LCOV_BASELINE_COUNT_CMD} + COMMAND ${LCOV_FILTER_CMD} + COMMAND ${LCOV_GEN_HTML_CMD} + ${GCOVR_XML_CMD_COMMAND} + + # Set output files as GENERATED (will be removed on 'make clean') + BYPRODUCTS + ${Coverage_NAME}.base + ${Coverage_NAME}.capture + ${Coverage_NAME}.total + ${Coverage_NAME}.info + ${GCOVR_XML_CMD_BYPRODUCTS} + ${Coverage_NAME}/index.html + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report." + ) + + # Show where to find the lcov info report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Lcov code coverage info report saved in ${Coverage_NAME}.info." + ${GCOVR_XML_CMD_COMMENT} + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." + ) + +endfunction() # setup_target_for_coverage_lcov + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_gcovr_xml( +# NAME ctest_coverage # New target name +# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES executable_target # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# ) +# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the +# GCVOR command. +function(setup_target_for_coverage_gcovr_xml) + + set(options NONE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT GCOVR_PATH) + message(FATAL_ERROR "gcovr not found! Aborting...") + endif() # NOT GCOVR_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(GCOVR_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES GCOVR_EXCLUDES) + + # Combine excludes to several -e arguments + set(GCOVR_EXCLUDE_ARGS "") + foreach(EXCLUDE ${GCOVR_EXCLUDES}) + list(APPEND GCOVR_EXCLUDE_ARGS "-e") + list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") + endforeach() + + # Set up commands which will be run to generate coverage data + # Run tests + set(GCOVR_XML_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Running gcovr + set(GCOVR_XML_CMD + ${GCOVR_PATH} --xml ${Coverage_NAME}.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + + message(STATUS "Command to run tests: ") + string(REPLACE ";" " " GCOVR_XML_EXEC_TESTS_CMD_SPACED "${GCOVR_XML_EXEC_TESTS_CMD}") + message(STATUS "${GCOVR_XML_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to generate gcovr XML coverage data: ") + string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}") + message(STATUS "${GCOVR_XML_CMD_SPACED}") + endif() + + add_custom_target(${Coverage_NAME} + COMMAND ${GCOVR_XML_EXEC_TESTS_CMD} + COMMAND ${GCOVR_XML_CMD} + + BYPRODUCTS ${Coverage_NAME}.xml + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Running gcovr to produce Cobertura code coverage report." + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Cobertura code coverage report saved in ${Coverage_NAME}.xml." + ) +endfunction() # setup_target_for_coverage_gcovr_xml + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_gcovr_html( +# NAME ctest_coverage # New target name +# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES executable_target # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# ) +# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the +# GCVOR command. +function(setup_target_for_coverage_gcovr_html) + + set(options NONE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT GCOVR_PATH) + message(FATAL_ERROR "gcovr not found! Aborting...") + endif() # NOT GCOVR_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(GCOVR_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES GCOVR_EXCLUDES) + + # Combine excludes to several -e arguments + set(GCOVR_EXCLUDE_ARGS "") + foreach(EXCLUDE ${GCOVR_EXCLUDES}) + list(APPEND GCOVR_EXCLUDE_ARGS "-e") + list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") + endforeach() + + # Set up commands which will be run to generate coverage data + # Run tests + set(GCOVR_HTML_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Create folder + set(GCOVR_HTML_FOLDER_CMD + ${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/${Coverage_NAME} + ) + # Running gcovr + set(GCOVR_HTML_CMD + ${GCOVR_PATH} --html ${Coverage_NAME}/index.html --html-details -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + + message(STATUS "Command to run tests: ") + string(REPLACE ";" " " GCOVR_HTML_EXEC_TESTS_CMD_SPACED "${GCOVR_HTML_EXEC_TESTS_CMD}") + message(STATUS "${GCOVR_HTML_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to create a folder: ") + string(REPLACE ";" " " GCOVR_HTML_FOLDER_CMD_SPACED "${GCOVR_HTML_FOLDER_CMD}") + message(STATUS "${GCOVR_HTML_FOLDER_CMD_SPACED}") + + message(STATUS "Command to generate gcovr HTML coverage data: ") + string(REPLACE ";" " " GCOVR_HTML_CMD_SPACED "${GCOVR_HTML_CMD}") + message(STATUS "${GCOVR_HTML_CMD_SPACED}") + endif() + + add_custom_target(${Coverage_NAME} + COMMAND ${GCOVR_HTML_EXEC_TESTS_CMD} + COMMAND ${GCOVR_HTML_FOLDER_CMD} + COMMAND ${GCOVR_HTML_CMD} + + BYPRODUCTS ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html # report directory + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Running gcovr to produce HTML code coverage report." + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." + ) + +endfunction() # setup_target_for_coverage_gcovr_html + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_fastcov( +# NAME testrunner_coverage # New target name +# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES testrunner # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/" "src/dir2/" # Patterns to exclude. +# NO_DEMANGLE # Don't demangle C++ symbols +# # even if c++filt is found +# SKIP_HTML # Don't create html report +# POST_CMD perl -i -pe s!${PROJECT_SOURCE_DIR}/!!g ctest_coverage.json # E.g. for stripping source dir from file paths +# ) +function(setup_target_for_coverage_fastcov) + + set(options NO_DEMANGLE SKIP_HTML) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES FASTCOV_ARGS GENHTML_ARGS POST_CMD) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT FASTCOV_PATH) + message(FATAL_ERROR "fastcov not found! Aborting...") + endif() + + if(NOT Coverage_SKIP_HTML AND NOT GENHTML_PATH) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (Patterns, not paths, for fastcov) + set(FASTCOV_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_FASTCOV_EXCLUDES}) + list(APPEND FASTCOV_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES FASTCOV_EXCLUDES) + + # Conditional arguments + if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) + set(GENHTML_EXTRA_ARGS "--demangle-cpp") + endif() + + # Set up commands which will be run to generate coverage data + set(FASTCOV_EXEC_TESTS_CMD ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}) + + set(FASTCOV_CAPTURE_CMD ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} + --search-directory ${BASEDIR} + --process-gcno + --output ${Coverage_NAME}.json + --exclude ${FASTCOV_EXCLUDES} + ) + + set(FASTCOV_CONVERT_CMD ${FASTCOV_PATH} + -C ${Coverage_NAME}.json --lcov --output ${Coverage_NAME}.info + ) + + if(Coverage_SKIP_HTML) + set(FASTCOV_HTML_CMD ";") + else() + set(FASTCOV_HTML_CMD ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} + -o ${Coverage_NAME} ${Coverage_NAME}.info + ) + endif() + + set(FASTCOV_POST_CMD ";") + if(Coverage_POST_CMD) + set(FASTCOV_POST_CMD ${Coverage_POST_CMD}) + endif() + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Code coverage commands for target ${Coverage_NAME} (fastcov):") + + message(" Running tests:") + string(REPLACE ";" " " FASTCOV_EXEC_TESTS_CMD_SPACED "${FASTCOV_EXEC_TESTS_CMD}") + message(" ${FASTCOV_EXEC_TESTS_CMD_SPACED}") + + message(" Capturing fastcov counters and generating report:") + string(REPLACE ";" " " FASTCOV_CAPTURE_CMD_SPACED "${FASTCOV_CAPTURE_CMD}") + message(" ${FASTCOV_CAPTURE_CMD_SPACED}") + + message(" Converting fastcov .json to lcov .info:") + string(REPLACE ";" " " FASTCOV_CONVERT_CMD_SPACED "${FASTCOV_CONVERT_CMD}") + message(" ${FASTCOV_CONVERT_CMD_SPACED}") + + if(NOT Coverage_SKIP_HTML) + message(" Generating HTML report: ") + string(REPLACE ";" " " FASTCOV_HTML_CMD_SPACED "${FASTCOV_HTML_CMD}") + message(" ${FASTCOV_HTML_CMD_SPACED}") + endif() + if(Coverage_POST_CMD) + message(" Running post command: ") + string(REPLACE ";" " " FASTCOV_POST_CMD_SPACED "${FASTCOV_POST_CMD}") + message(" ${FASTCOV_POST_CMD_SPACED}") + endif() + endif() + + # Setup target + add_custom_target(${Coverage_NAME} + + # Cleanup fastcov + COMMAND ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} + --search-directory ${BASEDIR} + --zerocounters + + COMMAND ${FASTCOV_EXEC_TESTS_CMD} + COMMAND ${FASTCOV_CAPTURE_CMD} + COMMAND ${FASTCOV_CONVERT_CMD} + COMMAND ${FASTCOV_HTML_CMD} + COMMAND ${FASTCOV_POST_CMD} + + # Set output files as GENERATED (will be removed on 'make clean') + BYPRODUCTS + ${Coverage_NAME}.info + ${Coverage_NAME}.json + ${Coverage_NAME}/index.html # report directory + + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Resetting code coverage counters to zero. Processing code coverage counters and generating report." + ) + + set(INFO_MSG "fastcov code coverage info report saved in ${Coverage_NAME}.info and ${Coverage_NAME}.json.") + if(NOT Coverage_SKIP_HTML) + string(APPEND INFO_MSG " Open ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html in your browser to view the coverage report.") + endif() + # Show where to find the fastcov info report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E echo ${INFO_MSG} + ) + +endfunction() # setup_target_for_coverage_fastcov + +function(append_coverage_compiler_flags) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + message(STATUS "Appending code coverage compiler flags: ${COVERAGE_COMPILER_FLAGS}") +endfunction() # append_coverage_compiler_flags + +# Setup coverage for specific library +function(append_coverage_compiler_flags_to_target name) + separate_arguments(_flag_list NATIVE_COMMAND "${COVERAGE_COMPILER_FLAGS}") + target_compile_options(${name} PRIVATE ${_flag_list}) + if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_link_libraries(${name} PRIVATE gcov) + endif() +endfunction() diff --git a/cmake/modules/StaticAnalyzers.cmake b/cmake/modules/StaticAnalyzers.cmake new file mode 100644 index 0000000..8a15aed --- /dev/null +++ b/cmake/modules/StaticAnalyzers.cmake @@ -0,0 +1,22 @@ +if(ENABLE_CLANG_TIDY) + find_program(CLANG_TIDY_COMMAND NAMES clang-tidy) + + if(NOT CLANG_TIDY_COMMAND) + message(WARNING "🔴 CMake_RUN_CLANG_TIDY is ON but clang-tidy is not found!") + set(CMAKE_CXX_CLANG_TIDY "" CACHE STRING "" FORCE) + else() + + message(STATUS "🟢 CMake_RUN_CLANG_TIDY is ON") + set(CLANGTIDY_EXTRA_ARGS + "-extra-arg=-Wno-unknown-warning-option" + ) + set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND};-p=${CMAKE_BINARY_DIR};${CLANGTIDY_EXTRA_ARGS}" CACHE STRING "" FORCE) + + add_custom_target(clang-tidy + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target ${CMAKE_PROJECT_NAME} + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target clang-tidy + COMMENT "Running clang-tidy..." + ) + set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + endif() +endif(ENABLE_CLANG_TIDY) diff --git a/config/CMakeLists.txt b/config/CMakeLists.txt new file mode 100644 index 0000000..c6c4cde --- /dev/null +++ b/config/CMakeLists.txt @@ -0,0 +1,4 @@ +configure_file( + "config.h.in" + "${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES +) diff --git a/config/config.h.in b/config/config.h.in new file mode 100644 index 0000000..6f12c23 --- /dev/null +++ b/config/config.h.in @@ -0,0 +1,14 @@ +#pragma once + +#include +#include + +#define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR @ +#define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR @ +#define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH @ + +static constexpr std::string_view project_name = "@PROJECT_NAME@"; +static constexpr std::string_view project_version = "@PROJECT_VERSION@"; +static constexpr std::string_view project_description = "@PROJECT_DESCRIPTION@"; +static constexpr std::string_view git_sha = "@GIT_SHA@"; +static constexpr std::string_view data_path = "@Folding_SOURCE_DIR@/tests/data/"; diff --git a/folding.hpp b/folding.hpp new file mode 100644 index 0000000..0cf0fc7 --- /dev/null +++ b/folding.hpp @@ -0,0 +1,138 @@ +#pragma once +#include +#include +#include +#include +#include +namespace folding { + class Fold { + protected: + int k; + int n; + int seed; + std::default_random_engine random_seed; + public: + Fold(int k, int n, int seed = -1); + virtual std::pair, std::vector> getFold(int nFold) = 0; + virtual ~Fold() = default; + int getNumberOfFolds() { return k; } + }; + class KFold : public Fold { + private: + std::vector indices; + public: + KFold(int k, int n, int seed = -1); + std::pair, std::vector> getFold(int nFold) override; + }; + class StratifiedKFold : public Fold { + private: + std::vector y; + std::vector> stratified_indices; + void build(); + bool faulty = false; // Only true if the number of samples of any class is less than the number of folds. + public: + StratifiedKFold(int k, const std::vector& y, int seed = -1); + StratifiedKFold(int k, torch::Tensor& y, int seed = -1); + std::pair, std::vector> getFold(int nFold) override; + bool isFaulty() { return faulty; } + }; + Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed) + { + std::random_device rd; + random_seed = std::default_random_engine(seed == -1 ? rd() : seed); + std::srand(seed == -1 ? time(0) : seed); + } + KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(std::vector(n)) + { + std::iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1 + shuffle(indices.begin(), indices.end(), random_seed); + } + std::pair, std::vector> KFold::getFold(int nFold) + { + if (nFold >= k || nFold < 0) { + throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")"); + } + int nTest = n / k; + auto train = std::vector(); + auto test = std::vector(); + for (int i = 0; i < n; i++) { + if (i >= nTest * nFold && i < nTest * (nFold + 1)) { + test.push_back(indices[i]); + } else { + train.push_back(indices[i]); + } + } + return { train, test }; + } + StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed) + { + n = y.numel(); + this->y = std::vector(y.data_ptr(), y.data_ptr() + n); + build(); + } + StratifiedKFold::StratifiedKFold(int k, const std::vector& y, int seed) + : Fold(k, y.size(), seed) + { + this->y = y; + n = y.size(); + build(); + } + void StratifiedKFold::build() + { + stratified_indices = std::vector>(k); + int fold_size = n / k; + + // Compute class counts and indices + auto class_indices = std::map>(); + std::vector class_counts(*max_element(y.begin(), y.end()) + 1, 0); + for (auto i = 0; i < n; ++i) { + class_counts[y[i]]++; + class_indices[y[i]].push_back(i); + } + // Shuffle class indices + for (auto& [cls, indices] : class_indices) { + shuffle(indices.begin(), indices.end(), random_seed); + } + // Assign indices to folds + for (auto label = 0; label < class_counts.size(); ++label) { + auto num_samples_to_take = class_counts.at(label) / k; + if (num_samples_to_take == 0) { + std::cerr << "Warning! The number of samples in class " << label << " (" << class_counts.at(label) + << ") is less than the number of folds (" << k << ")." << std::endl; + faulty = true; + continue; + } + auto remainder_samples_to_take = class_counts[label] % k; + for (auto fold = 0; fold < k; ++fold) { + auto it = next(class_indices[label].begin(), num_samples_to_take); + move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ## + class_indices[label].erase(class_indices[label].begin(), it); + } + auto chosen = std::vector(k, false); + while (remainder_samples_to_take > 0) { + int fold = (rand() % static_cast(k)); + if (chosen.at(fold)) { + continue; + } + chosen[fold] = true; + auto it = next(class_indices[label].begin(), 1); + stratified_indices[fold].push_back(*class_indices[label].begin()); + class_indices[label].erase(class_indices[label].begin(), it); + remainder_samples_to_take--; + } + } + } + std::pair, std::vector> StratifiedKFold::getFold(int nFold) + { + if (nFold >= k || nFold < 0) { + throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")"); + } + std::vector test_indices = stratified_indices[nFold]; + std::vector train_indices; + for (int i = 0; i < k; ++i) { + if (i == nFold) continue; + train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end()); + } + return { train_indices, test_indices }; + } +} \ No newline at end of file diff --git a/gcovr.cfg b/gcovr.cfg new file mode 100644 index 0000000..89e0877 --- /dev/null +++ b/gcovr.cfg @@ -0,0 +1,4 @@ +filter = src/ +exclude-directories = build_debug/lib/ +print-summary = yes +sort-percentage = yes diff --git a/lib/Files/ArffFiles.cc b/lib/Files/ArffFiles.cc new file mode 100644 index 0000000..99f29bd --- /dev/null +++ b/lib/Files/ArffFiles.cc @@ -0,0 +1,168 @@ +#include "ArffFiles.h" +#include +#include +#include +#include + +ArffFiles::ArffFiles() = default; + +std::vector ArffFiles::getLines() const +{ + return lines; +} + +unsigned long int ArffFiles::getSize() const +{ + return lines.size(); +} + +std::vector> ArffFiles::getAttributes() const +{ + return attributes; +} + +std::string ArffFiles::getClassName() const +{ + return className; +} + +std::string ArffFiles::getClassType() const +{ + return classType; +} + +std::vector>& ArffFiles::getX() +{ + return X; +} + +std::vector& ArffFiles::getY() +{ + return y; +} + +void ArffFiles::loadCommon(std::string fileName) +{ + std::ifstream file(fileName); + if (!file.is_open()) { + throw std::invalid_argument("Unable to open file"); + } + std::string line; + std::string keyword; + std::string attribute; + std::string type; + std::string type_w; + while (getline(file, line)) { + if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { + continue; + } + if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) { + std::stringstream ss(line); + ss >> keyword >> attribute; + type = ""; + while (ss >> type_w) + type += type_w + " "; + attributes.emplace_back(trim(attribute), trim(type)); + continue; + } + if (line[0] == '@') { + continue; + } + lines.push_back(line); + } + file.close(); + if (attributes.empty()) + throw std::invalid_argument("No attributes found"); +} + +void ArffFiles::load(const std::string& fileName, bool classLast) +{ + int labelIndex; + loadCommon(fileName); + if (classLast) { + className = std::get<0>(attributes.back()); + classType = std::get<1>(attributes.back()); + attributes.pop_back(); + labelIndex = static_cast(attributes.size()); + } else { + className = std::get<0>(attributes.front()); + classType = std::get<1>(attributes.front()); + attributes.erase(attributes.begin()); + labelIndex = 0; + } + generateDataset(labelIndex); +} +void ArffFiles::load(const std::string& fileName, const std::string& name) +{ + int labelIndex; + loadCommon(fileName); + bool found = false; + for (int i = 0; i < attributes.size(); ++i) { + if (attributes[i].first == name) { + className = std::get<0>(attributes[i]); + classType = std::get<1>(attributes[i]); + attributes.erase(attributes.begin() + i); + labelIndex = i; + found = true; + break; + } + } + if (!found) { + throw std::invalid_argument("Class name not found"); + } + generateDataset(labelIndex); +} + +void ArffFiles::generateDataset(int labelIndex) +{ + X = std::vector>(attributes.size(), std::vector(lines.size())); + auto yy = std::vector(lines.size(), ""); + auto removeLines = std::vector(); // Lines with missing values + for (size_t i = 0; i < lines.size(); i++) { + std::stringstream ss(lines[i]); + std::string value; + int pos = 0; + int xIndex = 0; + while (getline(ss, value, ',')) { + if (pos++ == labelIndex) { + yy[i] = value; + } else { + if (value == "?") { + X[xIndex++][i] = -1; + removeLines.push_back(i); + } else + X[xIndex++][i] = stof(value); + } + } + } + for (auto i : removeLines) { + yy.erase(yy.begin() + i); + for (auto& x : X) { + x.erase(x.begin() + i); + } + } + y = factorize(yy); +} + +std::string ArffFiles::trim(const std::string& source) +{ + std::string s(source); + s.erase(0, s.find_first_not_of(" '\n\r\t")); + s.erase(s.find_last_not_of(" '\n\r\t") + 1); + return s; +} + +std::vector ArffFiles::factorize(const std::vector& labels_t) +{ + std::vector yy; + yy.reserve(labels_t.size()); + std::map labelMap; + int i = 0; + for (const std::string& label : labels_t) { + if (labelMap.find(label) == labelMap.end()) { + labelMap[label] = i++; + } + yy.push_back(labelMap[label]); + } + return yy; +} \ No newline at end of file diff --git a/lib/Files/ArffFiles.h b/lib/Files/ArffFiles.h new file mode 100644 index 0000000..25e5a8c --- /dev/null +++ b/lib/Files/ArffFiles.h @@ -0,0 +1,32 @@ +#ifndef ARFFFILES_H +#define ARFFFILES_H + +#include +#include + +class ArffFiles { +private: + std::vector lines; + std::vector> attributes; + std::string className; + std::string classType; + std::vector> X; + std::vector y; + void generateDataset(int); + void loadCommon(std::string); +public: + ArffFiles(); + void load(const std::string&, bool = true); + void load(const std::string&, const std::string&); + std::vector getLines() const; + unsigned long int getSize() const; + std::string getClassName() const; + std::string getClassType() const; + static std::string trim(const std::string&); + std::vector>& getX(); + std::vector& getY(); + std::vector> getAttributes() const; + static std::vector factorize(const std::vector& labels_t); +}; + +#endif \ No newline at end of file diff --git a/lib/Files/CMakeLists.txt b/lib/Files/CMakeLists.txt new file mode 100644 index 0000000..fce5b8f --- /dev/null +++ b/lib/Files/CMakeLists.txt @@ -0,0 +1 @@ +add_library(ArffFiles ArffFiles.cc) \ No newline at end of file diff --git a/lib/catch2 b/lib/catch2 new file mode 160000 index 0000000..863c662 --- /dev/null +++ b/lib/catch2 @@ -0,0 +1 @@ +Subproject commit 863c662c0eff026300f4d729a7054e90d6d12cdd diff --git a/lib/mdlp b/lib/mdlp new file mode 160000 index 0000000..5708dc3 --- /dev/null +++ b/lib/mdlp @@ -0,0 +1 @@ +Subproject commit 5708dc3de944fc22d61a2dd071b63aa338e04db3 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..5a00026 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,12 @@ +if(ENABLE_TESTING) + include_directories( + ${Folding_SOURCE_DIR} + ${Folding_SOURCE_DIR}/lib/Files + ${Folding_SOURCE_DIR}/lib/mdlp + ${CMAKE_BINARY_DIR}/configured_files/include + ) + set(TEST_FOLDING "unit_tests_folding") + add_executable(${TEST_FOLDING} TestFolding.cc TestUtils.cc) + target_link_libraries(${TEST_FOLDING} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain) + add_test(NAME ${TEST_FOLDING} COMMAND ${TEST_FOLDING}) +endif(ENABLE_TESTING) diff --git a/tests/TestFolding.cc b/tests/TestFolding.cc new file mode 100644 index 0000000..1424552 --- /dev/null +++ b/tests/TestFolding.cc @@ -0,0 +1,95 @@ +#include +#include +#include +#include "TestUtils.h" +#include "folding.hpp" + +TEST_CASE("KFold Test", "[Platform][KFold]") +{ + // Initialize a KFold object with k=5 and a seed of 19. + std::string file_name = GENERATE("iris", "diabetes"); + auto raw = RawDatasets(file_name, true); + int nFolds = 5; + folding::KFold kfold(nFolds, raw.nSamples, 19); + int number = raw.nSamples * (kfold.getNumberOfFolds() - 1) / kfold.getNumberOfFolds(); + + SECTION("Number of Folds") + { + REQUIRE(kfold.getNumberOfFolds() == nFolds); + } + SECTION("Fold Test") + { + // Test each fold's size and contents. + for (int i = 0; i < nFolds; ++i) { + auto [train_indices, test_indices] = kfold.getFold(i); + bool result = train_indices.size() == number || train_indices.size() == number + 1; + REQUIRE(result); + REQUIRE(train_indices.size() + test_indices.size() == raw.nSamples); + } + } +} + +map counts(std::vector y, std::vector indices) +{ + map result; + for (auto i = 0; i < indices.size(); ++i) { + result[y[indices[i]]]++; + } + return result; +} + +TEST_CASE("StratifiedKFold Test", "[Platform][StratifiedKFold]") +{ + // Initialize a StratifiedKFold object with k=3, using the y std::vector, and a seed of 17. + std::string file_name = GENERATE("iris", "diabetes"); + int nFolds = GENERATE(3, 5, 10); + auto raw = RawDatasets(file_name, true); + folding::StratifiedKFold stratified_kfoldt(nFolds, raw.yt, 17); + folding::StratifiedKFold stratified_kfoldv(nFolds, raw.yv, 17); + int number = raw.nSamples * (stratified_kfoldt.getNumberOfFolds() - 1) / stratified_kfoldt.getNumberOfFolds(); + + SECTION("Stratified Number of Folds") + { + REQUIRE(stratified_kfoldt.getNumberOfFolds() == nFolds); + } + SECTION("Stratified Fold Test") + { + // Test each fold's size and contents. + auto counts = map>(); + // Initialize the counts per Fold + for (int i = 0; i < nFolds; ++i) { + counts[i] = std::vector(raw.classNumStates, 0); + } + // Check fold and compute counts of each fold + for (int fold = 0; fold < nFolds; ++fold) { + auto [train_indicest, test_indicest] = stratified_kfoldt.getFold(fold); + auto [train_indicesv, test_indicesv] = stratified_kfoldv.getFold(fold); + REQUIRE(train_indicest == train_indicesv); + REQUIRE(test_indicest == test_indicesv); + // In the worst case scenario, the number of samples in the training set is number + raw.classNumStates + // because in that fold can come one remainder sample from each class. + REQUIRE(train_indicest.size() <= number + raw.classNumStates); + // If the number of samples in any class is less than the number of folds, then the fold is faulty. + // and the number of samples in the training set + test set will be less than nSamples + if (!stratified_kfoldt.isFaulty()) { + REQUIRE(train_indicest.size() + test_indicest.size() == raw.nSamples); + } else { + REQUIRE(train_indicest.size() + test_indicest.size() <= raw.nSamples); + } + auto train_t = torch::tensor(train_indicest); + auto ytrain = raw.yt.index({ train_t }); + // Check that the class labels have been equally assign to each fold + for (const auto& idx : train_indicest) { + counts[fold][raw.yt[idx].item()]++; + } + } + // Test the fold counting of every class + for (int fold = 0; fold < nFolds; ++fold) { + for (int j = 1; j < nFolds - 1; ++j) { + for (int k = 0; k < raw.classNumStates; ++k) { + REQUIRE(abs(counts.at(fold).at(k) - counts.at(j).at(k)) <= 1); + } + } + } + } +} diff --git a/tests/TestUtils.cc b/tests/TestUtils.cc new file mode 100644 index 0000000..5a0910e --- /dev/null +++ b/tests/TestUtils.cc @@ -0,0 +1,105 @@ +#include "TestUtils.h" +#include "config.h" + +class Paths { +public: + static std::string datasets() + { + return { data_path.begin(), data_path.end() }; + } +}; + +pair, map> discretize(std::vector& X, mdlp::labels_t& y, std::vector features) +{ + std::vector Xd; + map maxes; + auto fimdlp = mdlp::CPPFImdlp(); + for (int i = 0; i < X.size(); i++) { + fimdlp.fit(X[i], y); + mdlp::labels_t& xd = fimdlp.transform(X[i]); + maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1; + Xd.push_back(xd); + } + return { Xd, maxes }; +} + +std::vector discretizeDataset(std::vector& X, mdlp::labels_t& y) +{ + std::vector Xd; + auto fimdlp = mdlp::CPPFImdlp(); + for (int i = 0; i < X.size(); i++) { + fimdlp.fit(X[i], y); + mdlp::labels_t& xd = fimdlp.transform(X[i]); + Xd.push_back(xd); + } + return Xd; +} + +bool file_exists(const std::string& name) +{ + if (FILE* file = fopen(name.c_str(), "r")) { + fclose(file); + return true; + } else { + return false; + } +} + +tuple, std::string, map>> loadDataset(const std::string& name, bool class_last, bool discretize_dataset) +{ + auto handler = ArffFiles(); + handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); + // Get Dataset X, y + std::vector& X = handler.getX(); + mdlp::labels_t& y = handler.getY(); + // Get className & Features + auto className = handler.getClassName(); + std::vector features; + auto attributes = handler.getAttributes(); + transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); + torch::Tensor Xd; + auto states = map>(); + if (discretize_dataset) { + auto Xr = discretizeDataset(X, y); + Xd = torch::zeros({ static_cast(Xr.size()), static_cast(Xr[0].size()) }, torch::kInt32); + for (int i = 0; i < features.size(); ++i) { + states[features[i]] = std::vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); + auto item = states.at(features[i]); + iota(begin(item), end(item), 0); + Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32)); + } + states[className] = std::vector(*max_element(y.begin(), y.end()) + 1); + iota(begin(states.at(className)), end(states.at(className)), 0); + } else { + Xd = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kFloat32); + for (int i = 0; i < features.size(); ++i) { + Xd.index_put_({ i, "..." }, torch::tensor(X[i])); + } + } + return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; +} + +tuple>, std::vector, std::vector, std::string, map>> loadFile(const std::string& name) +{ + auto handler = ArffFiles(); + handler.load(Paths::datasets() + static_cast(name) + ".arff"); + // Get Dataset X, y + std::vector& X = handler.getX(); + mdlp::labels_t& y = handler.getY(); + // Get className & Features + auto className = handler.getClassName(); + std::vector features; + auto attributes = handler.getAttributes(); + transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); + // Discretize Dataset + std::vector Xd; + map maxes; + tie(Xd, maxes) = discretize(X, y, features); + maxes[className] = *max_element(y.begin(), y.end()) + 1; + map> states; + for (auto feature : features) { + states[feature] = std::vector(maxes[feature]); + } + states[className] = std::vector(maxes[className]); + return { Xd, y, features, className, states }; +} diff --git a/tests/TestUtils.h b/tests/TestUtils.h new file mode 100644 index 0000000..72954c0 --- /dev/null +++ b/tests/TestUtils.h @@ -0,0 +1,43 @@ +#ifndef TEST_UTILS_H +#define TEST_UTILS_H +#include +#include +#include +#include +#include +#include "ArffFiles.h" +#include "CPPFImdlp.h" + +bool file_exists(const std::string& name); +std::pair, map> discretize(std::vector& X, mdlp::labels_t& y, std::vector features); +std::vector discretizeDataset(std::vector& X, mdlp::labels_t& y); +std::tuple>, std::vector, std::vector, std::string, map>> loadFile(const std::string& name); +std::tuple, std::string, map>> loadDataset(const std::string& name, bool class_last, bool discretize_dataset); + +class RawDatasets { +public: + RawDatasets(const std::string& file_name, bool discretize) + { + // Xt can be either discretized or not + tie(Xt, yt, featurest, classNamet, statest) = loadDataset(file_name, true, discretize); + // Xv is always discretized + tie(Xv, yv, featuresv, classNamev, statesv) = loadFile(file_name); + auto yresized = torch::transpose(yt.view({ yt.size(0), 1 }), 0, 1); + dataset = torch::cat({ Xt, yresized }, 0); + nSamples = dataset.size(1); + weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble); + weightsv = std::vector(nSamples, 1.0 / nSamples); + classNumStates = discretize ? statest.at(classNamet).size() : 0; + } + torch::Tensor Xt, yt, dataset, weights; + std::vector> Xv; + std::vector weightsv; + std::vector yv; + std::vector featurest, featuresv; + map> statest, statesv; + std::string classNamet, classNamev; + int nSamples, classNumStates; + double epsilon = 1e-5; +}; + +#endif //TEST_UTILS_H \ No newline at end of file diff --git a/tests/data/diabetes.arff b/tests/data/diabetes.arff new file mode 100755 index 0000000..a33cbd8 --- /dev/null +++ b/tests/data/diabetes.arff @@ -0,0 +1,863 @@ +% 1. Title: Pima Indians Diabetes Database +% +% 2. Sources: +% (a) Original owners: National Institute of Diabetes and Digestive and +% Kidney Diseases +% (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu) +% Research Center, RMI Group Leader +% Applied Physics Laboratory +% The Johns Hopkins University +% Johns Hopkins Road +% Laurel, MD 20707 +% (301) 953-6231 +% (c) Date received: 9 May 1990 +% +% 3. Past Usage: +% 1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \& +% Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast +% the onset of diabetes mellitus. In {\it Proceedings of the Symposium +% on Computer Applications and Medical Care} (pp. 261--265). IEEE +% Computer Society Press. +% +% The diagnostic, binary-valued variable investigated is whether the +% patient shows signs of diabetes according to World Health Organization +% criteria (i.e., if the 2 hour post-load plasma glucose was at least +% 200 mg/dl at any survey examination or if found during routine medical +% care). The population lives near Phoenix, Arizona, USA. +% +% Results: Their ADAP algorithm makes a real-valued prediction between +% 0 and 1. This was transformed into a binary decision using a cutoff of +% 0.448. Using 576 training instances, the sensitivity and specificity +% of their algorithm was 76% on the remaining 192 instances. +% +% 4. Relevant Information: +% Several constraints were placed on the selection of these instances from +% a larger database. In particular, all patients here are females at +% least 21 years old of Pima Indian heritage. ADAP is an adaptive learning +% routine that generates and executes digital analogs of perceptron-like +% devices. It is a unique algorithm; see the paper for details. +% +% 5. Number of Instances: 768 +% +% 6. Number of Attributes: 8 plus class +% +% 7. For Each Attribute: (all numeric-valued) +% 1. Number of times pregnant +% 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test +% 3. Diastolic blood pressure (mm Hg) +% 4. Triceps skin fold thickness (mm) +% 5. 2-Hour serum insulin (mu U/ml) +% 6. Body mass index (weight in kg/(height in m)^2) +% 7. Diabetes pedigree function +% 8. Age (years) +% 9. Class variable (0 or 1) +% +% 8. Missing Attribute Values: None +% +% 9. Class Distribution: (class value 1 is interpreted as "tested positive for +% diabetes") +% +% Class Value Number of instances +% 0 500 +% 1 268 +% +% 10. Brief statistical analysis: +% +% Attribute number: Mean: Standard Deviation: +% 1. 3.8 3.4 +% 2. 120.9 32.0 +% 3. 69.1 19.4 +% 4. 20.5 16.0 +% 5. 79.8 115.2 +% 6. 32.0 7.9 +% 7. 0.5 0.3 +% 8. 33.2 11.8 +% +% +% +% +% +% +% Relabeled values in attribute 'class' +% From: 0 To: tested_negative +% From: 1 To: tested_positive +% +@relation pima_diabetes +@attribute 'preg' real +@attribute 'plas' real +@attribute 'pres' real +@attribute 'skin' real +@attribute 'insu' real +@attribute 'mass' real +@attribute 'pedi' real +@attribute 'age' real +@attribute 'class' { tested_negative, tested_positive} +@data +6,148,72,35,0,33.6,0.627,50,tested_positive +1,85,66,29,0,26.6,0.351,31,tested_negative +8,183,64,0,0,23.3,0.672,32,tested_positive +1,89,66,23,94,28.1,0.167,21,tested_negative +0,137,40,35,168,43.1,2.288,33,tested_positive +5,116,74,0,0,25.6,0.201,30,tested_negative +3,78,50,32,88,31,0.248,26,tested_positive +10,115,0,0,0,35.3,0.134,29,tested_negative +2,197,70,45,543,30.5,0.158,53,tested_positive +8,125,96,0,0,0,0.232,54,tested_positive +4,110,92,0,0,37.6,0.191,30,tested_negative +10,168,74,0,0,38,0.537,34,tested_positive +10,139,80,0,0,27.1,1.441,57,tested_negative +1,189,60,23,846,30.1,0.398,59,tested_positive +5,166,72,19,175,25.8,0.587,51,tested_positive +7,100,0,0,0,30,0.484,32,tested_positive +0,118,84,47,230,45.8,0.551,31,tested_positive +7,107,74,0,0,29.6,0.254,31,tested_positive +1,103,30,38,83,43.3,0.183,33,tested_negative +1,115,70,30,96,34.6,0.529,32,tested_positive +3,126,88,41,235,39.3,0.704,27,tested_negative +8,99,84,0,0,35.4,0.388,50,tested_negative +7,196,90,0,0,39.8,0.451,41,tested_positive +9,119,80,35,0,29,0.263,29,tested_positive +11,143,94,33,146,36.6,0.254,51,tested_positive +10,125,70,26,115,31.1,0.205,41,tested_positive +7,147,76,0,0,39.4,0.257,43,tested_positive +1,97,66,15,140,23.2,0.487,22,tested_negative +13,145,82,19,110,22.2,0.245,57,tested_negative +5,117,92,0,0,34.1,0.337,38,tested_negative +5,109,75,26,0,36,0.546,60,tested_negative +3,158,76,36,245,31.6,0.851,28,tested_positive +3,88,58,11,54,24.8,0.267,22,tested_negative +6,92,92,0,0,19.9,0.188,28,tested_negative +10,122,78,31,0,27.6,0.512,45,tested_negative +4,103,60,33,192,24,0.966,33,tested_negative +11,138,76,0,0,33.2,0.42,35,tested_negative +9,102,76,37,0,32.9,0.665,46,tested_positive +2,90,68,42,0,38.2,0.503,27,tested_positive +4,111,72,47,207,37.1,1.39,56,tested_positive +3,180,64,25,70,34,0.271,26,tested_negative +7,133,84,0,0,40.2,0.696,37,tested_negative +7,106,92,18,0,22.7,0.235,48,tested_negative +9,171,110,24,240,45.4,0.721,54,tested_positive +7,159,64,0,0,27.4,0.294,40,tested_negative +0,180,66,39,0,42,1.893,25,tested_positive +1,146,56,0,0,29.7,0.564,29,tested_negative +2,71,70,27,0,28,0.586,22,tested_negative +7,103,66,32,0,39.1,0.344,31,tested_positive +7,105,0,0,0,0,0.305,24,tested_negative +1,103,80,11,82,19.4,0.491,22,tested_negative +1,101,50,15,36,24.2,0.526,26,tested_negative +5,88,66,21,23,24.4,0.342,30,tested_negative +8,176,90,34,300,33.7,0.467,58,tested_positive +7,150,66,42,342,34.7,0.718,42,tested_negative +1,73,50,10,0,23,0.248,21,tested_negative +7,187,68,39,304,37.7,0.254,41,tested_positive +0,100,88,60,110,46.8,0.962,31,tested_negative +0,146,82,0,0,40.5,1.781,44,tested_negative +0,105,64,41,142,41.5,0.173,22,tested_negative +2,84,0,0,0,0,0.304,21,tested_negative +8,133,72,0,0,32.9,0.27,39,tested_positive +5,44,62,0,0,25,0.587,36,tested_negative +2,141,58,34,128,25.4,0.699,24,tested_negative +7,114,66,0,0,32.8,0.258,42,tested_positive +5,99,74,27,0,29,0.203,32,tested_negative +0,109,88,30,0,32.5,0.855,38,tested_positive +2,109,92,0,0,42.7,0.845,54,tested_negative +1,95,66,13,38,19.6,0.334,25,tested_negative +4,146,85,27,100,28.9,0.189,27,tested_negative +2,100,66,20,90,32.9,0.867,28,tested_positive +5,139,64,35,140,28.6,0.411,26,tested_negative +13,126,90,0,0,43.4,0.583,42,tested_positive +4,129,86,20,270,35.1,0.231,23,tested_negative +1,79,75,30,0,32,0.396,22,tested_negative +1,0,48,20,0,24.7,0.14,22,tested_negative +7,62,78,0,0,32.6,0.391,41,tested_negative +5,95,72,33,0,37.7,0.37,27,tested_negative +0,131,0,0,0,43.2,0.27,26,tested_positive +2,112,66,22,0,25,0.307,24,tested_negative +3,113,44,13,0,22.4,0.14,22,tested_negative +2,74,0,0,0,0,0.102,22,tested_negative +7,83,78,26,71,29.3,0.767,36,tested_negative +0,101,65,28,0,24.6,0.237,22,tested_negative +5,137,108,0,0,48.8,0.227,37,tested_positive +2,110,74,29,125,32.4,0.698,27,tested_negative +13,106,72,54,0,36.6,0.178,45,tested_negative +2,100,68,25,71,38.5,0.324,26,tested_negative +15,136,70,32,110,37.1,0.153,43,tested_positive +1,107,68,19,0,26.5,0.165,24,tested_negative +1,80,55,0,0,19.1,0.258,21,tested_negative +4,123,80,15,176,32,0.443,34,tested_negative +7,81,78,40,48,46.7,0.261,42,tested_negative +4,134,72,0,0,23.8,0.277,60,tested_positive +2,142,82,18,64,24.7,0.761,21,tested_negative +6,144,72,27,228,33.9,0.255,40,tested_negative +2,92,62,28,0,31.6,0.13,24,tested_negative +1,71,48,18,76,20.4,0.323,22,tested_negative +6,93,50,30,64,28.7,0.356,23,tested_negative +1,122,90,51,220,49.7,0.325,31,tested_positive +1,163,72,0,0,39,1.222,33,tested_positive +1,151,60,0,0,26.1,0.179,22,tested_negative +0,125,96,0,0,22.5,0.262,21,tested_negative +1,81,72,18,40,26.6,0.283,24,tested_negative +2,85,65,0,0,39.6,0.93,27,tested_negative +1,126,56,29,152,28.7,0.801,21,tested_negative +1,96,122,0,0,22.4,0.207,27,tested_negative +4,144,58,28,140,29.5,0.287,37,tested_negative +3,83,58,31,18,34.3,0.336,25,tested_negative +0,95,85,25,36,37.4,0.247,24,tested_positive +3,171,72,33,135,33.3,0.199,24,tested_positive +8,155,62,26,495,34,0.543,46,tested_positive +1,89,76,34,37,31.2,0.192,23,tested_negative +4,76,62,0,0,34,0.391,25,tested_negative +7,160,54,32,175,30.5,0.588,39,tested_positive +4,146,92,0,0,31.2,0.539,61,tested_positive +5,124,74,0,0,34,0.22,38,tested_positive +5,78,48,0,0,33.7,0.654,25,tested_negative +4,97,60,23,0,28.2,0.443,22,tested_negative +4,99,76,15,51,23.2,0.223,21,tested_negative +0,162,76,56,100,53.2,0.759,25,tested_positive +6,111,64,39,0,34.2,0.26,24,tested_negative +2,107,74,30,100,33.6,0.404,23,tested_negative +5,132,80,0,0,26.8,0.186,69,tested_negative +0,113,76,0,0,33.3,0.278,23,tested_positive +1,88,30,42,99,55,0.496,26,tested_positive +3,120,70,30,135,42.9,0.452,30,tested_negative +1,118,58,36,94,33.3,0.261,23,tested_negative +1,117,88,24,145,34.5,0.403,40,tested_positive +0,105,84,0,0,27.9,0.741,62,tested_positive +4,173,70,14,168,29.7,0.361,33,tested_positive +9,122,56,0,0,33.3,1.114,33,tested_positive +3,170,64,37,225,34.5,0.356,30,tested_positive +8,84,74,31,0,38.3,0.457,39,tested_negative +2,96,68,13,49,21.1,0.647,26,tested_negative +2,125,60,20,140,33.8,0.088,31,tested_negative +0,100,70,26,50,30.8,0.597,21,tested_negative +0,93,60,25,92,28.7,0.532,22,tested_negative +0,129,80,0,0,31.2,0.703,29,tested_negative +5,105,72,29,325,36.9,0.159,28,tested_negative +3,128,78,0,0,21.1,0.268,55,tested_negative +5,106,82,30,0,39.5,0.286,38,tested_negative +2,108,52,26,63,32.5,0.318,22,tested_negative +10,108,66,0,0,32.4,0.272,42,tested_positive +4,154,62,31,284,32.8,0.237,23,tested_negative +0,102,75,23,0,0,0.572,21,tested_negative +9,57,80,37,0,32.8,0.096,41,tested_negative +2,106,64,35,119,30.5,1.4,34,tested_negative +5,147,78,0,0,33.7,0.218,65,tested_negative +2,90,70,17,0,27.3,0.085,22,tested_negative +1,136,74,50,204,37.4,0.399,24,tested_negative +4,114,65,0,0,21.9,0.432,37,tested_negative +9,156,86,28,155,34.3,1.189,42,tested_positive +1,153,82,42,485,40.6,0.687,23,tested_negative +8,188,78,0,0,47.9,0.137,43,tested_positive +7,152,88,44,0,50,0.337,36,tested_positive +2,99,52,15,94,24.6,0.637,21,tested_negative +1,109,56,21,135,25.2,0.833,23,tested_negative +2,88,74,19,53,29,0.229,22,tested_negative +17,163,72,41,114,40.9,0.817,47,tested_positive +4,151,90,38,0,29.7,0.294,36,tested_negative +7,102,74,40,105,37.2,0.204,45,tested_negative +0,114,80,34,285,44.2,0.167,27,tested_negative +2,100,64,23,0,29.7,0.368,21,tested_negative +0,131,88,0,0,31.6,0.743,32,tested_positive +6,104,74,18,156,29.9,0.722,41,tested_positive +3,148,66,25,0,32.5,0.256,22,tested_negative +4,120,68,0,0,29.6,0.709,34,tested_negative +4,110,66,0,0,31.9,0.471,29,tested_negative +3,111,90,12,78,28.4,0.495,29,tested_negative +6,102,82,0,0,30.8,0.18,36,tested_positive +6,134,70,23,130,35.4,0.542,29,tested_positive +2,87,0,23,0,28.9,0.773,25,tested_negative +1,79,60,42,48,43.5,0.678,23,tested_negative +2,75,64,24,55,29.7,0.37,33,tested_negative +8,179,72,42,130,32.7,0.719,36,tested_positive +6,85,78,0,0,31.2,0.382,42,tested_negative +0,129,110,46,130,67.1,0.319,26,tested_positive +5,143,78,0,0,45,0.19,47,tested_negative +5,130,82,0,0,39.1,0.956,37,tested_positive +6,87,80,0,0,23.2,0.084,32,tested_negative +0,119,64,18,92,34.9,0.725,23,tested_negative +1,0,74,20,23,27.7,0.299,21,tested_negative +5,73,60,0,0,26.8,0.268,27,tested_negative +4,141,74,0,0,27.6,0.244,40,tested_negative +7,194,68,28,0,35.9,0.745,41,tested_positive +8,181,68,36,495,30.1,0.615,60,tested_positive +1,128,98,41,58,32,1.321,33,tested_positive +8,109,76,39,114,27.9,0.64,31,tested_positive +5,139,80,35,160,31.6,0.361,25,tested_positive +3,111,62,0,0,22.6,0.142,21,tested_negative +9,123,70,44,94,33.1,0.374,40,tested_negative +7,159,66,0,0,30.4,0.383,36,tested_positive +11,135,0,0,0,52.3,0.578,40,tested_positive +8,85,55,20,0,24.4,0.136,42,tested_negative +5,158,84,41,210,39.4,0.395,29,tested_positive +1,105,58,0,0,24.3,0.187,21,tested_negative +3,107,62,13,48,22.9,0.678,23,tested_positive +4,109,64,44,99,34.8,0.905,26,tested_positive +4,148,60,27,318,30.9,0.15,29,tested_positive +0,113,80,16,0,31,0.874,21,tested_negative +1,138,82,0,0,40.1,0.236,28,tested_negative +0,108,68,20,0,27.3,0.787,32,tested_negative +2,99,70,16,44,20.4,0.235,27,tested_negative +6,103,72,32,190,37.7,0.324,55,tested_negative +5,111,72,28,0,23.9,0.407,27,tested_negative +8,196,76,29,280,37.5,0.605,57,tested_positive +5,162,104,0,0,37.7,0.151,52,tested_positive +1,96,64,27,87,33.2,0.289,21,tested_negative +7,184,84,33,0,35.5,0.355,41,tested_positive +2,81,60,22,0,27.7,0.29,25,tested_negative +0,147,85,54,0,42.8,0.375,24,tested_negative +7,179,95,31,0,34.2,0.164,60,tested_negative +0,140,65,26,130,42.6,0.431,24,tested_positive +9,112,82,32,175,34.2,0.26,36,tested_positive +12,151,70,40,271,41.8,0.742,38,tested_positive +5,109,62,41,129,35.8,0.514,25,tested_positive +6,125,68,30,120,30,0.464,32,tested_negative +5,85,74,22,0,29,1.224,32,tested_positive +5,112,66,0,0,37.8,0.261,41,tested_positive +0,177,60,29,478,34.6,1.072,21,tested_positive +2,158,90,0,0,31.6,0.805,66,tested_positive +7,119,0,0,0,25.2,0.209,37,tested_negative +7,142,60,33,190,28.8,0.687,61,tested_negative +1,100,66,15,56,23.6,0.666,26,tested_negative +1,87,78,27,32,34.6,0.101,22,tested_negative +0,101,76,0,0,35.7,0.198,26,tested_negative +3,162,52,38,0,37.2,0.652,24,tested_positive +4,197,70,39,744,36.7,2.329,31,tested_negative +0,117,80,31,53,45.2,0.089,24,tested_negative +4,142,86,0,0,44,0.645,22,tested_positive +6,134,80,37,370,46.2,0.238,46,tested_positive +1,79,80,25,37,25.4,0.583,22,tested_negative +4,122,68,0,0,35,0.394,29,tested_negative +3,74,68,28,45,29.7,0.293,23,tested_negative +4,171,72,0,0,43.6,0.479,26,tested_positive +7,181,84,21,192,35.9,0.586,51,tested_positive +0,179,90,27,0,44.1,0.686,23,tested_positive +9,164,84,21,0,30.8,0.831,32,tested_positive +0,104,76,0,0,18.4,0.582,27,tested_negative +1,91,64,24,0,29.2,0.192,21,tested_negative +4,91,70,32,88,33.1,0.446,22,tested_negative +3,139,54,0,0,25.6,0.402,22,tested_positive +6,119,50,22,176,27.1,1.318,33,tested_positive +2,146,76,35,194,38.2,0.329,29,tested_negative +9,184,85,15,0,30,1.213,49,tested_positive +10,122,68,0,0,31.2,0.258,41,tested_negative +0,165,90,33,680,52.3,0.427,23,tested_negative +9,124,70,33,402,35.4,0.282,34,tested_negative +1,111,86,19,0,30.1,0.143,23,tested_negative +9,106,52,0,0,31.2,0.38,42,tested_negative +2,129,84,0,0,28,0.284,27,tested_negative +2,90,80,14,55,24.4,0.249,24,tested_negative +0,86,68,32,0,35.8,0.238,25,tested_negative +12,92,62,7,258,27.6,0.926,44,tested_positive +1,113,64,35,0,33.6,0.543,21,tested_positive +3,111,56,39,0,30.1,0.557,30,tested_negative +2,114,68,22,0,28.7,0.092,25,tested_negative +1,193,50,16,375,25.9,0.655,24,tested_negative +11,155,76,28,150,33.3,1.353,51,tested_positive +3,191,68,15,130,30.9,0.299,34,tested_negative +3,141,0,0,0,30,0.761,27,tested_positive +4,95,70,32,0,32.1,0.612,24,tested_negative +3,142,80,15,0,32.4,0.2,63,tested_negative +4,123,62,0,0,32,0.226,35,tested_positive +5,96,74,18,67,33.6,0.997,43,tested_negative +0,138,0,0,0,36.3,0.933,25,tested_positive +2,128,64,42,0,40,1.101,24,tested_negative +0,102,52,0,0,25.1,0.078,21,tested_negative +2,146,0,0,0,27.5,0.24,28,tested_positive +10,101,86,37,0,45.6,1.136,38,tested_positive +2,108,62,32,56,25.2,0.128,21,tested_negative +3,122,78,0,0,23,0.254,40,tested_negative +1,71,78,50,45,33.2,0.422,21,tested_negative +13,106,70,0,0,34.2,0.251,52,tested_negative +2,100,70,52,57,40.5,0.677,25,tested_negative +7,106,60,24,0,26.5,0.296,29,tested_positive +0,104,64,23,116,27.8,0.454,23,tested_negative +5,114,74,0,0,24.9,0.744,57,tested_negative +2,108,62,10,278,25.3,0.881,22,tested_negative +0,146,70,0,0,37.9,0.334,28,tested_positive +10,129,76,28,122,35.9,0.28,39,tested_negative +7,133,88,15,155,32.4,0.262,37,tested_negative +7,161,86,0,0,30.4,0.165,47,tested_positive +2,108,80,0,0,27,0.259,52,tested_positive +7,136,74,26,135,26,0.647,51,tested_negative +5,155,84,44,545,38.7,0.619,34,tested_negative +1,119,86,39,220,45.6,0.808,29,tested_positive +4,96,56,17,49,20.8,0.34,26,tested_negative +5,108,72,43,75,36.1,0.263,33,tested_negative +0,78,88,29,40,36.9,0.434,21,tested_negative +0,107,62,30,74,36.6,0.757,25,tested_positive +2,128,78,37,182,43.3,1.224,31,tested_positive +1,128,48,45,194,40.5,0.613,24,tested_positive +0,161,50,0,0,21.9,0.254,65,tested_negative +6,151,62,31,120,35.5,0.692,28,tested_negative +2,146,70,38,360,28,0.337,29,tested_positive +0,126,84,29,215,30.7,0.52,24,tested_negative +14,100,78,25,184,36.6,0.412,46,tested_positive +8,112,72,0,0,23.6,0.84,58,tested_negative +0,167,0,0,0,32.3,0.839,30,tested_positive +2,144,58,33,135,31.6,0.422,25,tested_positive +5,77,82,41,42,35.8,0.156,35,tested_negative +5,115,98,0,0,52.9,0.209,28,tested_positive +3,150,76,0,0,21,0.207,37,tested_negative +2,120,76,37,105,39.7,0.215,29,tested_negative +10,161,68,23,132,25.5,0.326,47,tested_positive +0,137,68,14,148,24.8,0.143,21,tested_negative +0,128,68,19,180,30.5,1.391,25,tested_positive +2,124,68,28,205,32.9,0.875,30,tested_positive +6,80,66,30,0,26.2,0.313,41,tested_negative +0,106,70,37,148,39.4,0.605,22,tested_negative +2,155,74,17,96,26.6,0.433,27,tested_positive +3,113,50,10,85,29.5,0.626,25,tested_negative +7,109,80,31,0,35.9,1.127,43,tested_positive +2,112,68,22,94,34.1,0.315,26,tested_negative +3,99,80,11,64,19.3,0.284,30,tested_negative +3,182,74,0,0,30.5,0.345,29,tested_positive +3,115,66,39,140,38.1,0.15,28,tested_negative +6,194,78,0,0,23.5,0.129,59,tested_positive +4,129,60,12,231,27.5,0.527,31,tested_negative +3,112,74,30,0,31.6,0.197,25,tested_positive +0,124,70,20,0,27.4,0.254,36,tested_positive +13,152,90,33,29,26.8,0.731,43,tested_positive +2,112,75,32,0,35.7,0.148,21,tested_negative +1,157,72,21,168,25.6,0.123,24,tested_negative +1,122,64,32,156,35.1,0.692,30,tested_positive +10,179,70,0,0,35.1,0.2,37,tested_negative +2,102,86,36,120,45.5,0.127,23,tested_positive +6,105,70,32,68,30.8,0.122,37,tested_negative +8,118,72,19,0,23.1,1.476,46,tested_negative +2,87,58,16,52,32.7,0.166,25,tested_negative +1,180,0,0,0,43.3,0.282,41,tested_positive +12,106,80,0,0,23.6,0.137,44,tested_negative +1,95,60,18,58,23.9,0.26,22,tested_negative +0,165,76,43,255,47.9,0.259,26,tested_negative +0,117,0,0,0,33.8,0.932,44,tested_negative +5,115,76,0,0,31.2,0.343,44,tested_positive +9,152,78,34,171,34.2,0.893,33,tested_positive +7,178,84,0,0,39.9,0.331,41,tested_positive +1,130,70,13,105,25.9,0.472,22,tested_negative +1,95,74,21,73,25.9,0.673,36,tested_negative +1,0,68,35,0,32,0.389,22,tested_negative +5,122,86,0,0,34.7,0.29,33,tested_negative +8,95,72,0,0,36.8,0.485,57,tested_negative +8,126,88,36,108,38.5,0.349,49,tested_negative +1,139,46,19,83,28.7,0.654,22,tested_negative +3,116,0,0,0,23.5,0.187,23,tested_negative +3,99,62,19,74,21.8,0.279,26,tested_negative +5,0,80,32,0,41,0.346,37,tested_positive +4,92,80,0,0,42.2,0.237,29,tested_negative +4,137,84,0,0,31.2,0.252,30,tested_negative +3,61,82,28,0,34.4,0.243,46,tested_negative +1,90,62,12,43,27.2,0.58,24,tested_negative +3,90,78,0,0,42.7,0.559,21,tested_negative +9,165,88,0,0,30.4,0.302,49,tested_positive +1,125,50,40,167,33.3,0.962,28,tested_positive +13,129,0,30,0,39.9,0.569,44,tested_positive +12,88,74,40,54,35.3,0.378,48,tested_negative +1,196,76,36,249,36.5,0.875,29,tested_positive +5,189,64,33,325,31.2,0.583,29,tested_positive +5,158,70,0,0,29.8,0.207,63,tested_negative +5,103,108,37,0,39.2,0.305,65,tested_negative +4,146,78,0,0,38.5,0.52,67,tested_positive +4,147,74,25,293,34.9,0.385,30,tested_negative +5,99,54,28,83,34,0.499,30,tested_negative +6,124,72,0,0,27.6,0.368,29,tested_positive +0,101,64,17,0,21,0.252,21,tested_negative +3,81,86,16,66,27.5,0.306,22,tested_negative +1,133,102,28,140,32.8,0.234,45,tested_positive +3,173,82,48,465,38.4,2.137,25,tested_positive +0,118,64,23,89,0,1.731,21,tested_negative +0,84,64,22,66,35.8,0.545,21,tested_negative +2,105,58,40,94,34.9,0.225,25,tested_negative +2,122,52,43,158,36.2,0.816,28,tested_negative +12,140,82,43,325,39.2,0.528,58,tested_positive +0,98,82,15,84,25.2,0.299,22,tested_negative +1,87,60,37,75,37.2,0.509,22,tested_negative +4,156,75,0,0,48.3,0.238,32,tested_positive +0,93,100,39,72,43.4,1.021,35,tested_negative +1,107,72,30,82,30.8,0.821,24,tested_negative +0,105,68,22,0,20,0.236,22,tested_negative +1,109,60,8,182,25.4,0.947,21,tested_negative +1,90,62,18,59,25.1,1.268,25,tested_negative +1,125,70,24,110,24.3,0.221,25,tested_negative +1,119,54,13,50,22.3,0.205,24,tested_negative +5,116,74,29,0,32.3,0.66,35,tested_positive +8,105,100,36,0,43.3,0.239,45,tested_positive +5,144,82,26,285,32,0.452,58,tested_positive +3,100,68,23,81,31.6,0.949,28,tested_negative +1,100,66,29,196,32,0.444,42,tested_negative +5,166,76,0,0,45.7,0.34,27,tested_positive +1,131,64,14,415,23.7,0.389,21,tested_negative +4,116,72,12,87,22.1,0.463,37,tested_negative +4,158,78,0,0,32.9,0.803,31,tested_positive +2,127,58,24,275,27.7,1.6,25,tested_negative +3,96,56,34,115,24.7,0.944,39,tested_negative +0,131,66,40,0,34.3,0.196,22,tested_positive +3,82,70,0,0,21.1,0.389,25,tested_negative +3,193,70,31,0,34.9,0.241,25,tested_positive +4,95,64,0,0,32,0.161,31,tested_positive +6,137,61,0,0,24.2,0.151,55,tested_negative +5,136,84,41,88,35,0.286,35,tested_positive +9,72,78,25,0,31.6,0.28,38,tested_negative +5,168,64,0,0,32.9,0.135,41,tested_positive +2,123,48,32,165,42.1,0.52,26,tested_negative +4,115,72,0,0,28.9,0.376,46,tested_positive +0,101,62,0,0,21.9,0.336,25,tested_negative +8,197,74,0,0,25.9,1.191,39,tested_positive +1,172,68,49,579,42.4,0.702,28,tested_positive +6,102,90,39,0,35.7,0.674,28,tested_negative +1,112,72,30,176,34.4,0.528,25,tested_negative +1,143,84,23,310,42.4,1.076,22,tested_negative +1,143,74,22,61,26.2,0.256,21,tested_negative +0,138,60,35,167,34.6,0.534,21,tested_positive +3,173,84,33,474,35.7,0.258,22,tested_positive +1,97,68,21,0,27.2,1.095,22,tested_negative +4,144,82,32,0,38.5,0.554,37,tested_positive +1,83,68,0,0,18.2,0.624,27,tested_negative +3,129,64,29,115,26.4,0.219,28,tested_positive +1,119,88,41,170,45.3,0.507,26,tested_negative +2,94,68,18,76,26,0.561,21,tested_negative +0,102,64,46,78,40.6,0.496,21,tested_negative +2,115,64,22,0,30.8,0.421,21,tested_negative +8,151,78,32,210,42.9,0.516,36,tested_positive +4,184,78,39,277,37,0.264,31,tested_positive +0,94,0,0,0,0,0.256,25,tested_negative +1,181,64,30,180,34.1,0.328,38,tested_positive +0,135,94,46,145,40.6,0.284,26,tested_negative +1,95,82,25,180,35,0.233,43,tested_positive +2,99,0,0,0,22.2,0.108,23,tested_negative +3,89,74,16,85,30.4,0.551,38,tested_negative +1,80,74,11,60,30,0.527,22,tested_negative +2,139,75,0,0,25.6,0.167,29,tested_negative +1,90,68,8,0,24.5,1.138,36,tested_negative +0,141,0,0,0,42.4,0.205,29,tested_positive +12,140,85,33,0,37.4,0.244,41,tested_negative +5,147,75,0,0,29.9,0.434,28,tested_negative +1,97,70,15,0,18.2,0.147,21,tested_negative +6,107,88,0,0,36.8,0.727,31,tested_negative +0,189,104,25,0,34.3,0.435,41,tested_positive +2,83,66,23,50,32.2,0.497,22,tested_negative +4,117,64,27,120,33.2,0.23,24,tested_negative +8,108,70,0,0,30.5,0.955,33,tested_positive +4,117,62,12,0,29.7,0.38,30,tested_positive +0,180,78,63,14,59.4,2.42,25,tested_positive +1,100,72,12,70,25.3,0.658,28,tested_negative +0,95,80,45,92,36.5,0.33,26,tested_negative +0,104,64,37,64,33.6,0.51,22,tested_positive +0,120,74,18,63,30.5,0.285,26,tested_negative +1,82,64,13,95,21.2,0.415,23,tested_negative +2,134,70,0,0,28.9,0.542,23,tested_positive +0,91,68,32,210,39.9,0.381,25,tested_negative +2,119,0,0,0,19.6,0.832,72,tested_negative +2,100,54,28,105,37.8,0.498,24,tested_negative +14,175,62,30,0,33.6,0.212,38,tested_positive +1,135,54,0,0,26.7,0.687,62,tested_negative +5,86,68,28,71,30.2,0.364,24,tested_negative +10,148,84,48,237,37.6,1.001,51,tested_positive +9,134,74,33,60,25.9,0.46,81,tested_negative +9,120,72,22,56,20.8,0.733,48,tested_negative +1,71,62,0,0,21.8,0.416,26,tested_negative +8,74,70,40,49,35.3,0.705,39,tested_negative +5,88,78,30,0,27.6,0.258,37,tested_negative +10,115,98,0,0,24,1.022,34,tested_negative +0,124,56,13,105,21.8,0.452,21,tested_negative +0,74,52,10,36,27.8,0.269,22,tested_negative +0,97,64,36,100,36.8,0.6,25,tested_negative +8,120,0,0,0,30,0.183,38,tested_positive +6,154,78,41,140,46.1,0.571,27,tested_negative +1,144,82,40,0,41.3,0.607,28,tested_negative +0,137,70,38,0,33.2,0.17,22,tested_negative +0,119,66,27,0,38.8,0.259,22,tested_negative +7,136,90,0,0,29.9,0.21,50,tested_negative +4,114,64,0,0,28.9,0.126,24,tested_negative +0,137,84,27,0,27.3,0.231,59,tested_negative +2,105,80,45,191,33.7,0.711,29,tested_positive +7,114,76,17,110,23.8,0.466,31,tested_negative +8,126,74,38,75,25.9,0.162,39,tested_negative +4,132,86,31,0,28,0.419,63,tested_negative +3,158,70,30,328,35.5,0.344,35,tested_positive +0,123,88,37,0,35.2,0.197,29,tested_negative +4,85,58,22,49,27.8,0.306,28,tested_negative +0,84,82,31,125,38.2,0.233,23,tested_negative +0,145,0,0,0,44.2,0.63,31,tested_positive +0,135,68,42,250,42.3,0.365,24,tested_positive +1,139,62,41,480,40.7,0.536,21,tested_negative +0,173,78,32,265,46.5,1.159,58,tested_negative +4,99,72,17,0,25.6,0.294,28,tested_negative +8,194,80,0,0,26.1,0.551,67,tested_negative +2,83,65,28,66,36.8,0.629,24,tested_negative +2,89,90,30,0,33.5,0.292,42,tested_negative +4,99,68,38,0,32.8,0.145,33,tested_negative +4,125,70,18,122,28.9,1.144,45,tested_positive +3,80,0,0,0,0,0.174,22,tested_negative +6,166,74,0,0,26.6,0.304,66,tested_negative +5,110,68,0,0,26,0.292,30,tested_negative +2,81,72,15,76,30.1,0.547,25,tested_negative +7,195,70,33,145,25.1,0.163,55,tested_positive +6,154,74,32,193,29.3,0.839,39,tested_negative +2,117,90,19,71,25.2,0.313,21,tested_negative +3,84,72,32,0,37.2,0.267,28,tested_negative +6,0,68,41,0,39,0.727,41,tested_positive +7,94,64,25,79,33.3,0.738,41,tested_negative +3,96,78,39,0,37.3,0.238,40,tested_negative +10,75,82,0,0,33.3,0.263,38,tested_negative +0,180,90,26,90,36.5,0.314,35,tested_positive +1,130,60,23,170,28.6,0.692,21,tested_negative +2,84,50,23,76,30.4,0.968,21,tested_negative +8,120,78,0,0,25,0.409,64,tested_negative +12,84,72,31,0,29.7,0.297,46,tested_positive +0,139,62,17,210,22.1,0.207,21,tested_negative +9,91,68,0,0,24.2,0.2,58,tested_negative +2,91,62,0,0,27.3,0.525,22,tested_negative +3,99,54,19,86,25.6,0.154,24,tested_negative +3,163,70,18,105,31.6,0.268,28,tested_positive +9,145,88,34,165,30.3,0.771,53,tested_positive +7,125,86,0,0,37.6,0.304,51,tested_negative +13,76,60,0,0,32.8,0.18,41,tested_negative +6,129,90,7,326,19.6,0.582,60,tested_negative +2,68,70,32,66,25,0.187,25,tested_negative +3,124,80,33,130,33.2,0.305,26,tested_negative +6,114,0,0,0,0,0.189,26,tested_negative +9,130,70,0,0,34.2,0.652,45,tested_positive +3,125,58,0,0,31.6,0.151,24,tested_negative +3,87,60,18,0,21.8,0.444,21,tested_negative +1,97,64,19,82,18.2,0.299,21,tested_negative +3,116,74,15,105,26.3,0.107,24,tested_negative +0,117,66,31,188,30.8,0.493,22,tested_negative +0,111,65,0,0,24.6,0.66,31,tested_negative +2,122,60,18,106,29.8,0.717,22,tested_negative +0,107,76,0,0,45.3,0.686,24,tested_negative +1,86,66,52,65,41.3,0.917,29,tested_negative +6,91,0,0,0,29.8,0.501,31,tested_negative +1,77,56,30,56,33.3,1.251,24,tested_negative +4,132,0,0,0,32.9,0.302,23,tested_positive +0,105,90,0,0,29.6,0.197,46,tested_negative +0,57,60,0,0,21.7,0.735,67,tested_negative +0,127,80,37,210,36.3,0.804,23,tested_negative +3,129,92,49,155,36.4,0.968,32,tested_positive +8,100,74,40,215,39.4,0.661,43,tested_positive +3,128,72,25,190,32.4,0.549,27,tested_positive +10,90,85,32,0,34.9,0.825,56,tested_positive +4,84,90,23,56,39.5,0.159,25,tested_negative +1,88,78,29,76,32,0.365,29,tested_negative +8,186,90,35,225,34.5,0.423,37,tested_positive +5,187,76,27,207,43.6,1.034,53,tested_positive +4,131,68,21,166,33.1,0.16,28,tested_negative +1,164,82,43,67,32.8,0.341,50,tested_negative +4,189,110,31,0,28.5,0.68,37,tested_negative +1,116,70,28,0,27.4,0.204,21,tested_negative +3,84,68,30,106,31.9,0.591,25,tested_negative +6,114,88,0,0,27.8,0.247,66,tested_negative +1,88,62,24,44,29.9,0.422,23,tested_negative +1,84,64,23,115,36.9,0.471,28,tested_negative +7,124,70,33,215,25.5,0.161,37,tested_negative +1,97,70,40,0,38.1,0.218,30,tested_negative +8,110,76,0,0,27.8,0.237,58,tested_negative +11,103,68,40,0,46.2,0.126,42,tested_negative +11,85,74,0,0,30.1,0.3,35,tested_negative +6,125,76,0,0,33.8,0.121,54,tested_positive +0,198,66,32,274,41.3,0.502,28,tested_positive +1,87,68,34,77,37.6,0.401,24,tested_negative +6,99,60,19,54,26.9,0.497,32,tested_negative +0,91,80,0,0,32.4,0.601,27,tested_negative +2,95,54,14,88,26.1,0.748,22,tested_negative +1,99,72,30,18,38.6,0.412,21,tested_negative +6,92,62,32,126,32,0.085,46,tested_negative +4,154,72,29,126,31.3,0.338,37,tested_negative +0,121,66,30,165,34.3,0.203,33,tested_positive +3,78,70,0,0,32.5,0.27,39,tested_negative +2,130,96,0,0,22.6,0.268,21,tested_negative +3,111,58,31,44,29.5,0.43,22,tested_negative +2,98,60,17,120,34.7,0.198,22,tested_negative +1,143,86,30,330,30.1,0.892,23,tested_negative +1,119,44,47,63,35.5,0.28,25,tested_negative +6,108,44,20,130,24,0.813,35,tested_negative +2,118,80,0,0,42.9,0.693,21,tested_positive +10,133,68,0,0,27,0.245,36,tested_negative +2,197,70,99,0,34.7,0.575,62,tested_positive +0,151,90,46,0,42.1,0.371,21,tested_positive +6,109,60,27,0,25,0.206,27,tested_negative +12,121,78,17,0,26.5,0.259,62,tested_negative +8,100,76,0,0,38.7,0.19,42,tested_negative +8,124,76,24,600,28.7,0.687,52,tested_positive +1,93,56,11,0,22.5,0.417,22,tested_negative +8,143,66,0,0,34.9,0.129,41,tested_positive +6,103,66,0,0,24.3,0.249,29,tested_negative +3,176,86,27,156,33.3,1.154,52,tested_positive +0,73,0,0,0,21.1,0.342,25,tested_negative +11,111,84,40,0,46.8,0.925,45,tested_positive +2,112,78,50,140,39.4,0.175,24,tested_negative +3,132,80,0,0,34.4,0.402,44,tested_positive +2,82,52,22,115,28.5,1.699,25,tested_negative +6,123,72,45,230,33.6,0.733,34,tested_negative +0,188,82,14,185,32,0.682,22,tested_positive +0,67,76,0,0,45.3,0.194,46,tested_negative +1,89,24,19,25,27.8,0.559,21,tested_negative +1,173,74,0,0,36.8,0.088,38,tested_positive +1,109,38,18,120,23.1,0.407,26,tested_negative +1,108,88,19,0,27.1,0.4,24,tested_negative +6,96,0,0,0,23.7,0.19,28,tested_negative +1,124,74,36,0,27.8,0.1,30,tested_negative +7,150,78,29,126,35.2,0.692,54,tested_positive +4,183,0,0,0,28.4,0.212,36,tested_positive +1,124,60,32,0,35.8,0.514,21,tested_negative +1,181,78,42,293,40,1.258,22,tested_positive +1,92,62,25,41,19.5,0.482,25,tested_negative +0,152,82,39,272,41.5,0.27,27,tested_negative +1,111,62,13,182,24,0.138,23,tested_negative +3,106,54,21,158,30.9,0.292,24,tested_negative +3,174,58,22,194,32.9,0.593,36,tested_positive +7,168,88,42,321,38.2,0.787,40,tested_positive +6,105,80,28,0,32.5,0.878,26,tested_negative +11,138,74,26,144,36.1,0.557,50,tested_positive +3,106,72,0,0,25.8,0.207,27,tested_negative +6,117,96,0,0,28.7,0.157,30,tested_negative +2,68,62,13,15,20.1,0.257,23,tested_negative +9,112,82,24,0,28.2,1.282,50,tested_positive +0,119,0,0,0,32.4,0.141,24,tested_positive +2,112,86,42,160,38.4,0.246,28,tested_negative +2,92,76,20,0,24.2,1.698,28,tested_negative +6,183,94,0,0,40.8,1.461,45,tested_negative +0,94,70,27,115,43.5,0.347,21,tested_negative +2,108,64,0,0,30.8,0.158,21,tested_negative +4,90,88,47,54,37.7,0.362,29,tested_negative +0,125,68,0,0,24.7,0.206,21,tested_negative +0,132,78,0,0,32.4,0.393,21,tested_negative +5,128,80,0,0,34.6,0.144,45,tested_negative +4,94,65,22,0,24.7,0.148,21,tested_negative +7,114,64,0,0,27.4,0.732,34,tested_positive +0,102,78,40,90,34.5,0.238,24,tested_negative +2,111,60,0,0,26.2,0.343,23,tested_negative +1,128,82,17,183,27.5,0.115,22,tested_negative +10,92,62,0,0,25.9,0.167,31,tested_negative +13,104,72,0,0,31.2,0.465,38,tested_positive +5,104,74,0,0,28.8,0.153,48,tested_negative +2,94,76,18,66,31.6,0.649,23,tested_negative +7,97,76,32,91,40.9,0.871,32,tested_positive +1,100,74,12,46,19.5,0.149,28,tested_negative +0,102,86,17,105,29.3,0.695,27,tested_negative +4,128,70,0,0,34.3,0.303,24,tested_negative +6,147,80,0,0,29.5,0.178,50,tested_positive +4,90,0,0,0,28,0.61,31,tested_negative +3,103,72,30,152,27.6,0.73,27,tested_negative +2,157,74,35,440,39.4,0.134,30,tested_negative +1,167,74,17,144,23.4,0.447,33,tested_positive +0,179,50,36,159,37.8,0.455,22,tested_positive +11,136,84,35,130,28.3,0.26,42,tested_positive +0,107,60,25,0,26.4,0.133,23,tested_negative +1,91,54,25,100,25.2,0.234,23,tested_negative +1,117,60,23,106,33.8,0.466,27,tested_negative +5,123,74,40,77,34.1,0.269,28,tested_negative +2,120,54,0,0,26.8,0.455,27,tested_negative +1,106,70,28,135,34.2,0.142,22,tested_negative +2,155,52,27,540,38.7,0.24,25,tested_positive +2,101,58,35,90,21.8,0.155,22,tested_negative +1,120,80,48,200,38.9,1.162,41,tested_negative +11,127,106,0,0,39,0.19,51,tested_negative +3,80,82,31,70,34.2,1.292,27,tested_positive +10,162,84,0,0,27.7,0.182,54,tested_negative +1,199,76,43,0,42.9,1.394,22,tested_positive +8,167,106,46,231,37.6,0.165,43,tested_positive +9,145,80,46,130,37.9,0.637,40,tested_positive +6,115,60,39,0,33.7,0.245,40,tested_positive +1,112,80,45,132,34.8,0.217,24,tested_negative +4,145,82,18,0,32.5,0.235,70,tested_positive +10,111,70,27,0,27.5,0.141,40,tested_positive +6,98,58,33,190,34,0.43,43,tested_negative +9,154,78,30,100,30.9,0.164,45,tested_negative +6,165,68,26,168,33.6,0.631,49,tested_negative +1,99,58,10,0,25.4,0.551,21,tested_negative +10,68,106,23,49,35.5,0.285,47,tested_negative +3,123,100,35,240,57.3,0.88,22,tested_negative +8,91,82,0,0,35.6,0.587,68,tested_negative +6,195,70,0,0,30.9,0.328,31,tested_positive +9,156,86,0,0,24.8,0.23,53,tested_positive +0,93,60,0,0,35.3,0.263,25,tested_negative +3,121,52,0,0,36,0.127,25,tested_positive +2,101,58,17,265,24.2,0.614,23,tested_negative +2,56,56,28,45,24.2,0.332,22,tested_negative +0,162,76,36,0,49.6,0.364,26,tested_positive +0,95,64,39,105,44.6,0.366,22,tested_negative +4,125,80,0,0,32.3,0.536,27,tested_positive +5,136,82,0,0,0,0.64,69,tested_negative +2,129,74,26,205,33.2,0.591,25,tested_negative +3,130,64,0,0,23.1,0.314,22,tested_negative +1,107,50,19,0,28.3,0.181,29,tested_negative +1,140,74,26,180,24.1,0.828,23,tested_negative +1,144,82,46,180,46.1,0.335,46,tested_positive +8,107,80,0,0,24.6,0.856,34,tested_negative +13,158,114,0,0,42.3,0.257,44,tested_positive +2,121,70,32,95,39.1,0.886,23,tested_negative +7,129,68,49,125,38.5,0.439,43,tested_positive +2,90,60,0,0,23.5,0.191,25,tested_negative +7,142,90,24,480,30.4,0.128,43,tested_positive +3,169,74,19,125,29.9,0.268,31,tested_positive +0,99,0,0,0,25,0.253,22,tested_negative +4,127,88,11,155,34.5,0.598,28,tested_negative +4,118,70,0,0,44.5,0.904,26,tested_negative +2,122,76,27,200,35.9,0.483,26,tested_negative +6,125,78,31,0,27.6,0.565,49,tested_positive +1,168,88,29,0,35,0.905,52,tested_positive +2,129,0,0,0,38.5,0.304,41,tested_negative +4,110,76,20,100,28.4,0.118,27,tested_negative +6,80,80,36,0,39.8,0.177,28,tested_negative +10,115,0,0,0,0,0.261,30,tested_positive +2,127,46,21,335,34.4,0.176,22,tested_negative +9,164,78,0,0,32.8,0.148,45,tested_positive +2,93,64,32,160,38,0.674,23,tested_positive +3,158,64,13,387,31.2,0.295,24,tested_negative +5,126,78,27,22,29.6,0.439,40,tested_negative +10,129,62,36,0,41.2,0.441,38,tested_positive +0,134,58,20,291,26.4,0.352,21,tested_negative +3,102,74,0,0,29.5,0.121,32,tested_negative +7,187,50,33,392,33.9,0.826,34,tested_positive +3,173,78,39,185,33.8,0.97,31,tested_positive +10,94,72,18,0,23.1,0.595,56,tested_negative +1,108,60,46,178,35.5,0.415,24,tested_negative +5,97,76,27,0,35.6,0.378,52,tested_positive +4,83,86,19,0,29.3,0.317,34,tested_negative +1,114,66,36,200,38.1,0.289,21,tested_negative +1,149,68,29,127,29.3,0.349,42,tested_positive +5,117,86,30,105,39.1,0.251,42,tested_negative +1,111,94,0,0,32.8,0.265,45,tested_negative +4,112,78,40,0,39.4,0.236,38,tested_negative +1,116,78,29,180,36.1,0.496,25,tested_negative +0,141,84,26,0,32.4,0.433,22,tested_negative +2,175,88,0,0,22.9,0.326,22,tested_negative +2,92,52,0,0,30.1,0.141,22,tested_negative +3,130,78,23,79,28.4,0.323,34,tested_positive +8,120,86,0,0,28.4,0.259,22,tested_positive +2,174,88,37,120,44.5,0.646,24,tested_positive +2,106,56,27,165,29,0.426,22,tested_negative +2,105,75,0,0,23.3,0.56,53,tested_negative +4,95,60,32,0,35.4,0.284,28,tested_negative +0,126,86,27,120,27.4,0.515,21,tested_negative +8,65,72,23,0,32,0.6,42,tested_negative +2,99,60,17,160,36.6,0.453,21,tested_negative +1,102,74,0,0,39.5,0.293,42,tested_positive +11,120,80,37,150,42.3,0.785,48,tested_positive +3,102,44,20,94,30.8,0.4,26,tested_negative +1,109,58,18,116,28.5,0.219,22,tested_negative +9,140,94,0,0,32.7,0.734,45,tested_positive +13,153,88,37,140,40.6,1.174,39,tested_negative +12,100,84,33,105,30,0.488,46,tested_negative +1,147,94,41,0,49.3,0.358,27,tested_positive +1,81,74,41,57,46.3,1.096,32,tested_negative +3,187,70,22,200,36.4,0.408,36,tested_positive +6,162,62,0,0,24.3,0.178,50,tested_positive +4,136,70,0,0,31.2,1.182,22,tested_positive +1,121,78,39,74,39,0.261,28,tested_negative +3,108,62,24,0,26,0.223,25,tested_negative +0,181,88,44,510,43.3,0.222,26,tested_positive +8,154,78,32,0,32.4,0.443,45,tested_positive +1,128,88,39,110,36.5,1.057,37,tested_positive +7,137,90,41,0,32,0.391,39,tested_negative +0,123,72,0,0,36.3,0.258,52,tested_positive +1,106,76,0,0,37.5,0.197,26,tested_negative +6,190,92,0,0,35.5,0.278,66,tested_positive +2,88,58,26,16,28.4,0.766,22,tested_negative +9,170,74,31,0,44,0.403,43,tested_positive +9,89,62,0,0,22.5,0.142,33,tested_negative +10,101,76,48,180,32.9,0.171,63,tested_negative +2,122,70,27,0,36.8,0.34,27,tested_negative +5,121,72,23,112,26.2,0.245,30,tested_negative +1,126,60,0,0,30.1,0.349,47,tested_positive +1,93,70,31,0,30.4,0.315,23,tested_negative diff --git a/tests/data/iris.arff b/tests/data/iris.arff new file mode 100755 index 0000000..780480c --- /dev/null +++ b/tests/data/iris.arff @@ -0,0 +1,225 @@ +% 1. Title: Iris Plants Database +% +% 2. Sources: +% (a) Creator: R.A. Fisher +% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov) +% (c) Date: July, 1988 +% +% 3. Past Usage: +% - Publications: too many to mention!!! Here are a few. +% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems" +% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions +% to Mathematical Statistics" (John Wiley, NY, 1950). +% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis. +% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218. +% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System +% Structure and Classification Rule for Recognition in Partially Exposed +% Environments". IEEE Transactions on Pattern Analysis and Machine +% Intelligence, Vol. PAMI-2, No. 1, 67-71. +% -- Results: +% -- very low misclassification rates (0% for the setosa class) +% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE +% Transactions on Information Theory, May 1972, 431-433. +% -- Results: +% -- very low misclassification rates again +% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II +% conceptual clustering system finds 3 classes in the data. +% +% 4. Relevant Information: +% --- This is perhaps the best known database to be found in the pattern +% recognition literature. Fisher's paper is a classic in the field +% and is referenced frequently to this day. (See Duda & Hart, for +% example.) The data set contains 3 classes of 50 instances each, +% where each class refers to a type of iris plant. One class is +% linearly separable from the other 2; the latter are NOT linearly +% separable from each other. +% --- Predicted attribute: class of iris plant. +% --- This is an exceedingly simple domain. +% +% 5. Number of Instances: 150 (50 in each of three classes) +% +% 6. Number of Attributes: 4 numeric, predictive attributes and the class +% +% 7. Attribute Information: +% 1. sepal length in cm +% 2. sepal width in cm +% 3. petal length in cm +% 4. petal width in cm +% 5. class: +% -- Iris Setosa +% -- Iris Versicolour +% -- Iris Virginica +% +% 8. Missing Attribute Values: None +% +% Summary Statistics: +% Min Max Mean SD Class Correlation +% sepal length: 4.3 7.9 5.84 0.83 0.7826 +% sepal width: 2.0 4.4 3.05 0.43 -0.4194 +% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!) +% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!) +% +% 9. Class Distribution: 33.3% for each of 3 classes. + +@RELATION iris + +@ATTRIBUTE sepallength REAL +@ATTRIBUTE sepalwidth REAL +@ATTRIBUTE petallength REAL +@ATTRIBUTE petalwidth REAL +@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica} + +@DATA +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica +% +% +%