Initial Commit

This commit is contained in:
2024-01-09 17:45:06 +01:00
parent 73cf64d8c2
commit 455d9f3330
87 changed files with 41694 additions and 1 deletions

16
.clang-tidy Normal file
View File

@@ -0,0 +1,16 @@
---
Checks: '-*,
clang-*,
bugprone-*,
cppcoreguidelines-*,
modernize-*,
performance-*,
-cppcoreguidelines-pro-type-vararg,
-modernize-use-trailing-return-type,
-bugprone-exception-escape'
HeaderFilterRegex: 'src/*'
AnalyzeTemporaryDtors: false
WarningsAsErrors: ''
FormatStyle: file
...

31
.clang-uml Normal file
View File

@@ -0,0 +1,31 @@
compilation_database_dir: build
output_directory: puml
diagrams:
Platform:
type: class
glob:
- src/Platform/*.cc
- src/Command/*.cc
using_namespace: platform
include:
namespaces:
- bayesnet
- platform
plantuml:
after:
- "note left of {{ alias(\"MyProjectMain\") }}: Main class of myproject library."
sequence:
type: sequence
glob:
- src/Command/b_main.cc
combine_free_functions_into_file_participants: true
using_namespace:
- std
- bayesnet
- platform
include:
paths:
- src/Command
- src/Platform
start_from:
- function: main(int,const char **)

40
.gitignore vendored Normal file
View File

@@ -0,0 +1,40 @@
# ---> C++
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
build/**
build_*/**
*.dSYM/**
cmake-build*/**
.idea
puml/**
.vscode/settings.json

15
.gitmodules vendored Normal file
View File

@@ -0,0 +1,15 @@
[submodule "lib/catch2"]
path = lib/catch2
url = https://github.com/catchorg/Catch2.git
[submodule "lib/argparse"]
path = lib/argparse
url = https://github.com/p-ranav/argparse
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json
[submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp

18
.vscode/c_cpp_properties.json vendored Normal file
View File

@@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "Mac",
"includePath": [
"${workspaceFolder}/**"
],
"defines": [],
"macFrameworkPath": [
"/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks"
],
"cStandard": "c17",
"cppStandard": "c++17",
"compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json"
}
],
"version": 4
}

130
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,130 @@
{
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "sample",
"program": "${workspaceFolder}/build_debug/sample/BayesNetSample",
"args": [
"-d",
"iris",
"-m",
"TANLd",
"-s",
"271",
"-p",
"/Users/rmontanana/Code/discretizbench/datasets/",
],
//"cwd": "${workspaceFolder}/build/sample/",
},
{
"type": "lldb",
"request": "launch",
"name": "experimentPy",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"STree",
"--stratified",
"-d",
"iris",
//"--discretize"
// "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "gridsearch",
"program": "${workspaceFolder}/build_debug/src/Platform/b_grid",
"args": [
"-m",
"KDB",
"--discretize",
"--continue",
"glass",
"--only",
"--compute"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "experimentBayes",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"TAN",
"--stratified",
"--discretize",
"-d",
"iris",
"--hyperparameters",
"{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/home/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "best",
"program": "${workspaceFolder}/build_debug/src/Platform/b_best",
"args": [
"-m",
"BoostAODE",
"-s",
"accuracy",
"--build",
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "manage",
"program": "${workspaceFolder}/build_debug/src/Platform/b_manage",
"args": [
"-n",
"20"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "list",
"program": "${workspaceFolder}/build_debug/src/Platform/b_list",
"args": [],
//"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "test",
"program": "${workspaceFolder}/build_debug/tests/unit_tests",
"args": [
"-c=\"Metrics Test\"",
// "-s",
],
"cwd": "${workspaceFolder}/build/tests",
},
{
"name": "Build & debug active file",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build_debug/bayesnet",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "lldb",
"preLaunchTask": "CMake: build"
}
]
}

60
.vscode/tasks.json vendored Normal file
View File

@@ -0,0 +1,60 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "cmake",
"label": "CMake: build",
"command": "build",
"targets": [
"all"
],
"group": "build",
"problemMatcher": [],
"detail": "CMake template build task"
},
{
"type": "cppbuild",
"label": "C/C++: clang build active file",
"command": "/usr/bin/clang",
"args": [
"-fcolor-diagnostics",
"-fansi-escape-codes",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": "build",
"detail": "Task generated by Debugger."
},
{
"type": "cppbuild",
"label": "C/C++: g++ build active file",
"command": "/usr/bin/g++",
"args": [
"-fdiagnostics-color=always",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "Task generated by Debugger."
}
]
}

95
CMakeLists.txt Normal file
View File

@@ -0,0 +1,95 @@
cmake_minimum_required(VERSION 3.20)
project(Platform
VERSION 1.0.0
DESCRIPTION "Platform to run Experiments with classifiers."
HOMEPAGE_URL "https://github.com/rmontanana/platform"
LANGUAGES CXX
)
if (CODE_COVERAGE AND NOT ENABLE_TESTING)
MESSAGE(FATAL_ERROR "Code coverage requires testing enabled")
endif (CODE_COVERAGE AND NOT ENABLE_TESTING)
find_package(Torch REQUIRED)
if (POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif ()
# Global CMake variables
# ----------------------
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# Options
# -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
# MPI
find_package(MPI REQUIRED)
message("MPI_CXX_LIBRARIES=${MPI_CXX_LIBRARIES}")
message("MPI_CXX_INCLUDE_DIRS=${MPI_CXX_INCLUDE_DIRS}")
# Boost Library
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.66.0 REQUIRED COMPONENTS python3 numpy3)
if(Boost_FOUND)
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
include_directories(${Boost_INCLUDE_DIRS})
endif()
# Python
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}")
# CMakes modules
# --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule)
if (CODE_COVERAGE)
enable_testing()
include(CodeCoverage)
MESSAGE("Code coverage enabled")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE)
if (ENABLE_CLANG_TIDY)
include(StaticAnalyzers) # clang-tidy
endif (ENABLE_CLANG_TIDY)
# External libraries - dependencies of BayesNet
# ---------------------------------------------
add_git_submodule("lib/PyClassifiers")
add_git_submodule("lib/argparse")
find_library(XLSXWRITER_LIB NAMES libxlsxwriter.dylib libxlsxwriter.so PATHS ${Platform_SOURCE_DIR}/lib/libxlsxwriter/lib)
message("XLSXWRITER_LIB=${XLSXWRITER_LIB}")
# Subdirectories
# --------------
add_subdirectory(config)
add_subdirectory(src/Platform)
add_subdirectory(sample)
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${Platform_SOURCE_DIR}/src/Platform/*.cc)
# Testing
# -------
if (ENABLE_TESTING)
MESSAGE("Testing enabled")
if (NOT TARGET Catch2::Catch2)
add_git_submodule("lib/catch2")
endif (NOT TARGET Catch2::Catch2)
include(CTest)
add_subdirectory(tests)
endif (ENABLE_TESTING)

136
Makefile Normal file
View File

@@ -0,0 +1,136 @@
SHELL := /bin/bash
.DEFAULT_GOAL := help
.PHONY: coverage setup help build test clean debug release submodules buildr buildd install dependency testp testb clang-uml
f_release = build_release
f_debug = build_debug
app_targets = b_best b_list b_main b_manage b_grid
test_targets = unit_tests_bayesnet unit_tests_platform
n_procs = -j 16
define ClearTests
@for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \
echo ">>> Cleaning $$t..." ; \
rm -f $(f_debug)/tests/$$t ; \
fi ; \
done
@nfiles="$(find . -name "*.gcda" -print0)" ; \
if test "${nfiles}" != "" ; then \
find . -name "*.gcda" -print0 | xargs -0 rm 2>/dev/null ;\
fi ;
endef
submodules: ## Update submodules
@git submodule update --init --recursive
@git submodule update --remote --merge
@git submodule foreach git pull origin master
setup: ## Install dependencies for tests and coverage
@if [ "$(shell uname)" = "Darwin" ]; then \
brew install gcovr; \
brew install lcov; \
fi
@if [ "$(shell uname)" = "Linux" ]; then \
pip install gcovr; \
fi
dest ?= ${HOME}/bin
install: ## Copy binary files to bin folder
@echo "Destination folder: $(dest)"
make buildr
@echo "*******************************************"
@echo ">>> Copying files to $(dest)"
@echo "*******************************************"
@for item in $(app_targets); do \
echo ">>> Copying $$item" ; \
cp $(f_release)/src/Platform/$$item $(dest) ; \
done
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
@echo ">>> Creating dependency graph diagram of the project...";
$(MAKE) debug
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) BayesNetSample $(n_procs)
buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) BayesNetSample $(n_procs)
clean: ## Clean the tests info
@echo ">>> Cleaning Debug BayesNet tests...";
$(call ClearTests)
@echo ">>> Done";
clang-uml: ## Create uml class and sequence diagrams
clang-uml -p --add-compile-flag -I /usr/lib/gcc/x86_64-redhat-linux/8/include/
debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet...";
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@mkdir $(f_debug);
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
@echo ">>> Done";
release: ## Build a Release version of the project
@echo ">>> Building Release BayesNet...";
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release);
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
@echo ">>> Done";
opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet & Platform tests...";
@$(MAKE) clean
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
@for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \
cd $(f_debug)/tests ; \
./$$t $(opt) ; \
fi ; \
done
@echo ">>> Done";
opt = ""
testp: ## Run platform tests (opt="-s") to verbose output the tests, (opt="-c='Stratified Fold Test'") to run only that section
@echo ">>> Running Platform tests...";
@$(MAKE) clean
@cmake --build $(f_debug) --target unit_tests_platform $(n_procs)
@if [ -f $(f_debug)/tests/unit_tests_platform ]; then cd $(f_debug)/tests ; ./unit_tests_platform $(opt) ; fi ;
@echo ">>> Done";
opt = ""
testb: ## Run BayesNet tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet tests...";
@$(MAKE) clean
@cmake --build $(f_debug) --target unit_tests_bayesnet $(n_procs)
@if [ -f $(f_debug)/tests/unit_tests_bayesnet ]; then cd $(f_debug)/tests ; ./unit_tests_bayesnet $(opt) ; fi ;
@echo ">>> Done";
coverage: ## Run tests and generate coverage report (build/index.html)
@echo ">>> Building tests with coverage...";
@$(MAKE) test
@cd $(f_debug) ; \
gcovr --config ../gcovr.cfg tests ;
@echo ">>> Done";
help: ## Show help message
@IFS=$$'\n' ; \
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
printf "%s\n\n" "Usage: make [task]"; \
printf "%-20s %s\n" "task" "help" ; \
printf "%-20s %s\n" "------" "----" ; \
for help_line in $${help_lines[@]}; do \
IFS=$$':' ; \
help_split=($$help_line) ; \
help_command=`echo $${help_split[0]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
help_info=`echo $${help_split[2]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
printf '\033[36m'; \
printf "%-20s %s" $$help_command ; \
printf '\033[0m'; \
printf "%s\n" $$help_info; \
done

View File

@@ -1,3 +1,89 @@
# Platform
Platform to run Bayesian Networks and Machine Learning Classifiers experiments.
Platform to run Bayesian Networks and Machine Learning Classifiers experiments.
# Platform
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
Platform to run Bayesian Networks and Machine Learning Classifiers experiments.
## 0. Setup
Before compiling BayesNet.
### Miniconda
To be able to run Python Classifiers such as STree, ODTE, SVC, etc. it is needed to install Miniconda. To do so, download the installer from [Miniconda](https://docs.conda.io/en/latest/miniconda.html) and run it. It is recommended to install it in the home folder.
In Linux sometimes the library libstdc++ is mistaken from the miniconda installation and produces the next message when running the b_xxxx executables:
```bash
libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by b_xxxx)
```
The solution is to erase the libstdc++ library from the miniconda installation:
### MPI
In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
```bash
export MPI_HOME="/usr/lib64/openmpi"
```
In Mac OS X, install mpich with brew and if cmake doesn't find it, edit mpicxx wrapper to remove the ",-commons,use_dylibs" from final_ldflags
```bash
vi /opt/homebrew/bin/mpicx
```
### boost library
[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
The best option is install the packages that the Linux distribution have in its repository. If this is the case:
```bash
sudo dnf install boost-devel
```
If this is not possible and the compressed packaged is installed, the following environment variable has to be set pointing to the folder where it was unzipped to:
```bash
export BOOST_ROOT=/path/to/library/
```
In some cases, it is needed to build the library, to do so:
```bash
cd /path/to/library
mkdir own
./bootstrap.sh --prefix=/path/to/library/own
./b2 install
export BOOST_ROOT=/path/to/library/own/
```
Don't forget to add the export BOOST_ROOT statement to .bashrc or wherever it is meant to be.
### libxlswriter
```bash
cd lib/libxlsxwriter
make
make install DESTDIR=/home/rmontanana/Code PREFIX=
```
### Release
```bash
make release
```
### Debug & Tests
```bash
make debug
```
## 1. Introduction

View File

@@ -0,0 +1,12 @@
function(add_git_submodule dir)
find_package(Git REQUIRED)
if(NOT EXISTS ${dir}/CMakeLists.txt)
message(STATUS "🚨 Adding git submodule => ${dir}")
execute_process(COMMAND ${GIT_EXECUTABLE}
submodule update --init --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
endif()
add_subdirectory(${dir})
endfunction(add_git_submodule)

View File

@@ -0,0 +1,742 @@
# Copyright (c) 2012 - 2017, Lars Bilke
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# CHANGES:
#
# 2012-01-31, Lars Bilke
# - Enable Code Coverage
#
# 2013-09-17, Joakim Söderberg
# - Added support for Clang.
# - Some additional usage instructions.
#
# 2016-02-03, Lars Bilke
# - Refactored functions to use named parameters
#
# 2017-06-02, Lars Bilke
# - Merged with modified version from github.com/ufz/ogs
#
# 2019-05-06, Anatolii Kurotych
# - Remove unnecessary --coverage flag
#
# 2019-12-13, FeRD (Frank Dana)
# - Deprecate COVERAGE_LCOVR_EXCLUDES and COVERAGE_GCOVR_EXCLUDES lists in favor
# of tool-agnostic COVERAGE_EXCLUDES variable, or EXCLUDE setup arguments.
# - CMake 3.4+: All excludes can be specified relative to BASE_DIRECTORY
# - All setup functions: accept BASE_DIRECTORY, EXCLUDE list
# - Set lcov basedir with -b argument
# - Add automatic --demangle-cpp in lcovr, if 'c++filt' is available (can be
# overridden with NO_DEMANGLE option in setup_target_for_coverage_lcovr().)
# - Delete output dir, .info file on 'make clean'
# - Remove Python detection, since version mismatches will break gcovr
# - Minor cleanup (lowercase function names, update examples...)
#
# 2019-12-19, FeRD (Frank Dana)
# - Rename Lcov outputs, make filtered file canonical, fix cleanup for targets
#
# 2020-01-19, Bob Apthorpe
# - Added gfortran support
#
# 2020-02-17, FeRD (Frank Dana)
# - Make all add_custom_target()s VERBATIM to auto-escape wildcard characters
# in EXCLUDEs, and remove manual escaping from gcovr targets
#
# 2021-01-19, Robin Mueller
# - Add CODE_COVERAGE_VERBOSE option which will allow to print out commands which are run
# - Added the option for users to set the GCOVR_ADDITIONAL_ARGS variable to supply additional
# flags to the gcovr command
#
# 2020-05-04, Mihchael Davis
# - Add -fprofile-abs-path to make gcno files contain absolute paths
# - Fix BASE_DIRECTORY not working when defined
# - Change BYPRODUCT from folder to index.html to stop ninja from complaining about double defines
#
# 2021-05-10, Martin Stump
# - Check if the generator is multi-config before warning about non-Debug builds
#
# 2022-02-22, Marko Wehle
# - Change gcovr output from -o <filename> for --xml <filename> and --html <filename> output respectively.
# This will allow for Multiple Output Formats at the same time by making use of GCOVR_ADDITIONAL_ARGS, e.g. GCOVR_ADDITIONAL_ARGS "--txt".
#
# 2022-09-28, Sebastian Mueller
# - fix append_coverage_compiler_flags_to_target to correctly add flags
# - replace "-fprofile-arcs -ftest-coverage" with "--coverage" (equivalent)
#
# USAGE:
#
# 1. Copy this file into your cmake modules path.
#
# 2. Add the following line to your CMakeLists.txt (best inside an if-condition
# using a CMake option() to enable it just optionally):
# include(CodeCoverage)
#
# 3. Append necessary compiler flags for all supported source files:
# append_coverage_compiler_flags()
# Or for specific target:
# append_coverage_compiler_flags_to_target(YOUR_TARGET_NAME)
#
# 3.a (OPTIONAL) Set appropriate optimization flags, e.g. -O0, -O1 or -Og
#
# 4. If you need to exclude additional directories from the report, specify them
# using full paths in the COVERAGE_EXCLUDES variable before calling
# setup_target_for_coverage_*().
# Example:
# set(COVERAGE_EXCLUDES
# '${PROJECT_SOURCE_DIR}/src/dir1/*'
# '/path/to/my/src/dir2/*')
# Or, use the EXCLUDE argument to setup_target_for_coverage_*().
# Example:
# setup_target_for_coverage_lcov(
# NAME coverage
# EXECUTABLE testrunner
# EXCLUDE "${PROJECT_SOURCE_DIR}/src/dir1/*" "/path/to/my/src/dir2/*")
#
# 4.a NOTE: With CMake 3.4+, COVERAGE_EXCLUDES or EXCLUDE can also be set
# relative to the BASE_DIRECTORY (default: PROJECT_SOURCE_DIR)
# Example:
# set(COVERAGE_EXCLUDES "dir1/*")
# setup_target_for_coverage_gcovr_html(
# NAME coverage
# EXECUTABLE testrunner
# BASE_DIRECTORY "${PROJECT_SOURCE_DIR}/src"
# EXCLUDE "dir2/*")
#
# 5. Use the functions described below to create a custom make target which
# runs your test executable and produces a code coverage report.
#
# 6. Build a Debug build:
# cmake -DCMAKE_BUILD_TYPE=Debug ..
# make
# make my_coverage_target
#
include(CMakeParseArguments)
option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE)
# Check prereqs
find_program( GCOV_PATH gcov )
find_program( LCOV_PATH NAMES lcov lcov.bat lcov.exe lcov.perl)
find_program( FASTCOV_PATH NAMES fastcov fastcov.py )
find_program( GENHTML_PATH NAMES genhtml genhtml.perl genhtml.bat )
find_program( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/scripts/test)
find_program( CPPFILT_PATH NAMES c++filt )
if(NOT GCOV_PATH)
message(FATAL_ERROR "gcov not found! Aborting...")
endif() # NOT GCOV_PATH
# Check supported compiler (Clang, GNU and Flang)
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
foreach(LANG ${LANGUAGES})
if("${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang")
if("${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS 3)
message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...")
endif()
elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
endif()
endforeach()
set(COVERAGE_COMPILER_FLAGS "-g --coverage"
CACHE INTERNAL "")
if(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Clang)")
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag(-fprofile-abs-path HAVE_fprofile_abs_path)
if(HAVE_fprofile_abs_path)
set(COVERAGE_COMPILER_FLAGS "${COVERAGE_COMPILER_FLAGS} -fprofile-abs-path")
endif()
endif()
set(CMAKE_Fortran_FLAGS_COVERAGE
${COVERAGE_COMPILER_FLAGS}
CACHE STRING "Flags used by the Fortran compiler during coverage builds."
FORCE )
set(CMAKE_CXX_FLAGS_COVERAGE
${COVERAGE_COMPILER_FLAGS}
CACHE STRING "Flags used by the C++ compiler during coverage builds."
FORCE )
set(CMAKE_C_FLAGS_COVERAGE
${COVERAGE_COMPILER_FLAGS}
CACHE STRING "Flags used by the C compiler during coverage builds."
FORCE )
set(CMAKE_EXE_LINKER_FLAGS_COVERAGE
""
CACHE STRING "Flags used for linking binaries during coverage builds."
FORCE )
set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE
""
CACHE STRING "Flags used by the shared libraries linker during coverage builds."
FORCE )
mark_as_advanced(
CMAKE_Fortran_FLAGS_COVERAGE
CMAKE_CXX_FLAGS_COVERAGE
CMAKE_C_FLAGS_COVERAGE
CMAKE_EXE_LINKER_FLAGS_COVERAGE
CMAKE_SHARED_LINKER_FLAGS_COVERAGE )
get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG))
message(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading")
endif() # NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG)
if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
link_libraries(gcov)
endif()
# Defines a target for running and collection code coverage information
# Builds dependencies, runs the given executable and outputs reports.
# NOTE! The executable should always have a ZERO as exit code otherwise
# the coverage generation will not complete.
#
# setup_target_for_coverage_lcov(
# NAME testrunner_coverage # New target name
# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
# DEPENDENCIES testrunner # Dependencies to build first
# BASE_DIRECTORY "../" # Base directory for report
# # (defaults to PROJECT_SOURCE_DIR)
# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative
# # to BASE_DIRECTORY, with CMake 3.4+)
# NO_DEMANGLE # Don't demangle C++ symbols
# # even if c++filt is found
# )
function(setup_target_for_coverage_lcov)
set(options NO_DEMANGLE SONARQUBE)
set(oneValueArgs BASE_DIRECTORY NAME)
set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES LCOV_ARGS GENHTML_ARGS)
cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(NOT LCOV_PATH)
message(FATAL_ERROR "lcov not found! Aborting...")
endif() # NOT LCOV_PATH
if(NOT GENHTML_PATH)
message(FATAL_ERROR "genhtml not found! Aborting...")
endif() # NOT GENHTML_PATH
# Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
if(DEFINED Coverage_BASE_DIRECTORY)
get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
else()
set(BASEDIR ${PROJECT_SOURCE_DIR})
endif()
# Collect excludes (CMake 3.4+: Also compute absolute paths)
set(LCOV_EXCLUDES "")
foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_LCOV_EXCLUDES})
if(CMAKE_VERSION VERSION_GREATER 3.4)
get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR})
endif()
list(APPEND LCOV_EXCLUDES "${EXCLUDE}")
endforeach()
list(REMOVE_DUPLICATES LCOV_EXCLUDES)
# Conditional arguments
if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE})
set(GENHTML_EXTRA_ARGS "--demangle-cpp")
endif()
# Setting up commands which will be run to generate coverage data.
# Cleanup lcov
set(LCOV_CLEAN_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -directory .
-b ${BASEDIR} --zerocounters
)
# Create baseline to make sure untouched files show up in the report
set(LCOV_BASELINE_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -c -i -d . -b
${BASEDIR} -o ${Coverage_NAME}.base
)
# Run tests
set(LCOV_EXEC_TESTS_CMD
${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}
)
# Capturing lcov counters and generating report
set(LCOV_CAPTURE_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --directory . -b
${BASEDIR} --capture --output-file ${Coverage_NAME}.capture
)
# add baseline counters
set(LCOV_BASELINE_COUNT_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -a ${Coverage_NAME}.base
-a ${Coverage_NAME}.capture --output-file ${Coverage_NAME}.total
)
# filter collected data to final coverage report
set(LCOV_FILTER_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --remove
${Coverage_NAME}.total ${LCOV_EXCLUDES} --output-file ${Coverage_NAME}.info
)
# Generate HTML output
set(LCOV_GEN_HTML_CMD
${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} -o
${Coverage_NAME} ${Coverage_NAME}.info
)
if(${Coverage_SONARQUBE})
# Generate SonarQube output
set(GCOVR_XML_CMD
${GCOVR_PATH} --sonarqube ${Coverage_NAME}_sonarqube.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS}
${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR}
)
set(GCOVR_XML_CMD_COMMAND
COMMAND ${GCOVR_XML_CMD}
)
set(GCOVR_XML_CMD_BYPRODUCTS ${Coverage_NAME}_sonarqube.xml)
set(GCOVR_XML_CMD_COMMENT COMMENT "SonarQube code coverage info report saved in ${Coverage_NAME}_sonarqube.xml.")
endif()
if(CODE_COVERAGE_VERBOSE)
message(STATUS "Executed command report")
message(STATUS "Command to clean up lcov: ")
string(REPLACE ";" " " LCOV_CLEAN_CMD_SPACED "${LCOV_CLEAN_CMD}")
message(STATUS "${LCOV_CLEAN_CMD_SPACED}")
message(STATUS "Command to create baseline: ")
string(REPLACE ";" " " LCOV_BASELINE_CMD_SPACED "${LCOV_BASELINE_CMD}")
message(STATUS "${LCOV_BASELINE_CMD_SPACED}")
message(STATUS "Command to run the tests: ")
string(REPLACE ";" " " LCOV_EXEC_TESTS_CMD_SPACED "${LCOV_EXEC_TESTS_CMD}")
message(STATUS "${LCOV_EXEC_TESTS_CMD_SPACED}")
message(STATUS "Command to capture counters and generate report: ")
string(REPLACE ";" " " LCOV_CAPTURE_CMD_SPACED "${LCOV_CAPTURE_CMD}")
message(STATUS "${LCOV_CAPTURE_CMD_SPACED}")
message(STATUS "Command to add baseline counters: ")
string(REPLACE ";" " " LCOV_BASELINE_COUNT_CMD_SPACED "${LCOV_BASELINE_COUNT_CMD}")
message(STATUS "${LCOV_BASELINE_COUNT_CMD_SPACED}")
message(STATUS "Command to filter collected data: ")
string(REPLACE ";" " " LCOV_FILTER_CMD_SPACED "${LCOV_FILTER_CMD}")
message(STATUS "${LCOV_FILTER_CMD_SPACED}")
message(STATUS "Command to generate lcov HTML output: ")
string(REPLACE ";" " " LCOV_GEN_HTML_CMD_SPACED "${LCOV_GEN_HTML_CMD}")
message(STATUS "${LCOV_GEN_HTML_CMD_SPACED}")
if(${Coverage_SONARQUBE})
message(STATUS "Command to generate SonarQube XML output: ")
string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}")
message(STATUS "${GCOVR_XML_CMD_SPACED}")
endif()
endif()
# Setup target
add_custom_target(${Coverage_NAME}
COMMAND ${LCOV_CLEAN_CMD}
COMMAND ${LCOV_BASELINE_CMD}
COMMAND ${LCOV_EXEC_TESTS_CMD}
COMMAND ${LCOV_CAPTURE_CMD}
COMMAND ${LCOV_BASELINE_COUNT_CMD}
COMMAND ${LCOV_FILTER_CMD}
COMMAND ${LCOV_GEN_HTML_CMD}
${GCOVR_XML_CMD_COMMAND}
# Set output files as GENERATED (will be removed on 'make clean')
BYPRODUCTS
${Coverage_NAME}.base
${Coverage_NAME}.capture
${Coverage_NAME}.total
${Coverage_NAME}.info
${GCOVR_XML_CMD_BYPRODUCTS}
${Coverage_NAME}/index.html
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS ${Coverage_DEPENDENCIES}
VERBATIM # Protect arguments to commands
COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report."
)
# Show where to find the lcov info report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ;
COMMENT "Lcov code coverage info report saved in ${Coverage_NAME}.info."
${GCOVR_XML_CMD_COMMENT}
)
# Show info where to find the report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ;
COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report."
)
endfunction() # setup_target_for_coverage_lcov
# Defines a target for running and collection code coverage information
# Builds dependencies, runs the given executable and outputs reports.
# NOTE! The executable should always have a ZERO as exit code otherwise
# the coverage generation will not complete.
#
# setup_target_for_coverage_gcovr_xml(
# NAME ctest_coverage # New target name
# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
# DEPENDENCIES executable_target # Dependencies to build first
# BASE_DIRECTORY "../" # Base directory for report
# # (defaults to PROJECT_SOURCE_DIR)
# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative
# # to BASE_DIRECTORY, with CMake 3.4+)
# )
# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the
# GCVOR command.
function(setup_target_for_coverage_gcovr_xml)
set(options NONE)
set(oneValueArgs BASE_DIRECTORY NAME)
set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES)
cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(NOT GCOVR_PATH)
message(FATAL_ERROR "gcovr not found! Aborting...")
endif() # NOT GCOVR_PATH
# Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
if(DEFINED Coverage_BASE_DIRECTORY)
get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
else()
set(BASEDIR ${PROJECT_SOURCE_DIR})
endif()
# Collect excludes (CMake 3.4+: Also compute absolute paths)
set(GCOVR_EXCLUDES "")
foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES})
if(CMAKE_VERSION VERSION_GREATER 3.4)
get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR})
endif()
list(APPEND GCOVR_EXCLUDES "${EXCLUDE}")
endforeach()
list(REMOVE_DUPLICATES GCOVR_EXCLUDES)
# Combine excludes to several -e arguments
set(GCOVR_EXCLUDE_ARGS "")
foreach(EXCLUDE ${GCOVR_EXCLUDES})
list(APPEND GCOVR_EXCLUDE_ARGS "-e")
list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}")
endforeach()
# Set up commands which will be run to generate coverage data
# Run tests
set(GCOVR_XML_EXEC_TESTS_CMD
${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}
)
# Running gcovr
set(GCOVR_XML_CMD
${GCOVR_PATH} --xml ${Coverage_NAME}.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS}
${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR}
)
if(CODE_COVERAGE_VERBOSE)
message(STATUS "Executed command report")
message(STATUS "Command to run tests: ")
string(REPLACE ";" " " GCOVR_XML_EXEC_TESTS_CMD_SPACED "${GCOVR_XML_EXEC_TESTS_CMD}")
message(STATUS "${GCOVR_XML_EXEC_TESTS_CMD_SPACED}")
message(STATUS "Command to generate gcovr XML coverage data: ")
string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}")
message(STATUS "${GCOVR_XML_CMD_SPACED}")
endif()
add_custom_target(${Coverage_NAME}
COMMAND ${GCOVR_XML_EXEC_TESTS_CMD}
COMMAND ${GCOVR_XML_CMD}
BYPRODUCTS ${Coverage_NAME}.xml
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS ${Coverage_DEPENDENCIES}
VERBATIM # Protect arguments to commands
COMMENT "Running gcovr to produce Cobertura code coverage report."
)
# Show info where to find the report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ;
COMMENT "Cobertura code coverage report saved in ${Coverage_NAME}.xml."
)
endfunction() # setup_target_for_coverage_gcovr_xml
# Defines a target for running and collection code coverage information
# Builds dependencies, runs the given executable and outputs reports.
# NOTE! The executable should always have a ZERO as exit code otherwise
# the coverage generation will not complete.
#
# setup_target_for_coverage_gcovr_html(
# NAME ctest_coverage # New target name
# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
# DEPENDENCIES executable_target # Dependencies to build first
# BASE_DIRECTORY "../" # Base directory for report
# # (defaults to PROJECT_SOURCE_DIR)
# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative
# # to BASE_DIRECTORY, with CMake 3.4+)
# )
# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the
# GCVOR command.
function(setup_target_for_coverage_gcovr_html)
set(options NONE)
set(oneValueArgs BASE_DIRECTORY NAME)
set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES)
cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(NOT GCOVR_PATH)
message(FATAL_ERROR "gcovr not found! Aborting...")
endif() # NOT GCOVR_PATH
# Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
if(DEFINED Coverage_BASE_DIRECTORY)
get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
else()
set(BASEDIR ${PROJECT_SOURCE_DIR})
endif()
# Collect excludes (CMake 3.4+: Also compute absolute paths)
set(GCOVR_EXCLUDES "")
foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES})
if(CMAKE_VERSION VERSION_GREATER 3.4)
get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR})
endif()
list(APPEND GCOVR_EXCLUDES "${EXCLUDE}")
endforeach()
list(REMOVE_DUPLICATES GCOVR_EXCLUDES)
# Combine excludes to several -e arguments
set(GCOVR_EXCLUDE_ARGS "")
foreach(EXCLUDE ${GCOVR_EXCLUDES})
list(APPEND GCOVR_EXCLUDE_ARGS "-e")
list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}")
endforeach()
# Set up commands which will be run to generate coverage data
# Run tests
set(GCOVR_HTML_EXEC_TESTS_CMD
${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}
)
# Create folder
set(GCOVR_HTML_FOLDER_CMD
${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/${Coverage_NAME}
)
# Running gcovr
set(GCOVR_HTML_CMD
${GCOVR_PATH} --html ${Coverage_NAME}/index.html --html-details -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS}
${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR}
)
if(CODE_COVERAGE_VERBOSE)
message(STATUS "Executed command report")
message(STATUS "Command to run tests: ")
string(REPLACE ";" " " GCOVR_HTML_EXEC_TESTS_CMD_SPACED "${GCOVR_HTML_EXEC_TESTS_CMD}")
message(STATUS "${GCOVR_HTML_EXEC_TESTS_CMD_SPACED}")
message(STATUS "Command to create a folder: ")
string(REPLACE ";" " " GCOVR_HTML_FOLDER_CMD_SPACED "${GCOVR_HTML_FOLDER_CMD}")
message(STATUS "${GCOVR_HTML_FOLDER_CMD_SPACED}")
message(STATUS "Command to generate gcovr HTML coverage data: ")
string(REPLACE ";" " " GCOVR_HTML_CMD_SPACED "${GCOVR_HTML_CMD}")
message(STATUS "${GCOVR_HTML_CMD_SPACED}")
endif()
add_custom_target(${Coverage_NAME}
COMMAND ${GCOVR_HTML_EXEC_TESTS_CMD}
COMMAND ${GCOVR_HTML_FOLDER_CMD}
COMMAND ${GCOVR_HTML_CMD}
BYPRODUCTS ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html # report directory
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS ${Coverage_DEPENDENCIES}
VERBATIM # Protect arguments to commands
COMMENT "Running gcovr to produce HTML code coverage report."
)
# Show info where to find the report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ;
COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report."
)
endfunction() # setup_target_for_coverage_gcovr_html
# Defines a target for running and collection code coverage information
# Builds dependencies, runs the given executable and outputs reports.
# NOTE! The executable should always have a ZERO as exit code otherwise
# the coverage generation will not complete.
#
# setup_target_for_coverage_fastcov(
# NAME testrunner_coverage # New target name
# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
# DEPENDENCIES testrunner # Dependencies to build first
# BASE_DIRECTORY "../" # Base directory for report
# # (defaults to PROJECT_SOURCE_DIR)
# EXCLUDE "src/dir1/" "src/dir2/" # Patterns to exclude.
# NO_DEMANGLE # Don't demangle C++ symbols
# # even if c++filt is found
# SKIP_HTML # Don't create html report
# POST_CMD perl -i -pe s!${PROJECT_SOURCE_DIR}/!!g ctest_coverage.json # E.g. for stripping source dir from file paths
# )
function(setup_target_for_coverage_fastcov)
set(options NO_DEMANGLE SKIP_HTML)
set(oneValueArgs BASE_DIRECTORY NAME)
set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES FASTCOV_ARGS GENHTML_ARGS POST_CMD)
cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(NOT FASTCOV_PATH)
message(FATAL_ERROR "fastcov not found! Aborting...")
endif()
if(NOT Coverage_SKIP_HTML AND NOT GENHTML_PATH)
message(FATAL_ERROR "genhtml not found! Aborting...")
endif()
# Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
if(Coverage_BASE_DIRECTORY)
get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
else()
set(BASEDIR ${PROJECT_SOURCE_DIR})
endif()
# Collect excludes (Patterns, not paths, for fastcov)
set(FASTCOV_EXCLUDES "")
foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_FASTCOV_EXCLUDES})
list(APPEND FASTCOV_EXCLUDES "${EXCLUDE}")
endforeach()
list(REMOVE_DUPLICATES FASTCOV_EXCLUDES)
# Conditional arguments
if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE})
set(GENHTML_EXTRA_ARGS "--demangle-cpp")
endif()
# Set up commands which will be run to generate coverage data
set(FASTCOV_EXEC_TESTS_CMD ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS})
set(FASTCOV_CAPTURE_CMD ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH}
--search-directory ${BASEDIR}
--process-gcno
--output ${Coverage_NAME}.json
--exclude ${FASTCOV_EXCLUDES}
)
set(FASTCOV_CONVERT_CMD ${FASTCOV_PATH}
-C ${Coverage_NAME}.json --lcov --output ${Coverage_NAME}.info
)
if(Coverage_SKIP_HTML)
set(FASTCOV_HTML_CMD ";")
else()
set(FASTCOV_HTML_CMD ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS}
-o ${Coverage_NAME} ${Coverage_NAME}.info
)
endif()
set(FASTCOV_POST_CMD ";")
if(Coverage_POST_CMD)
set(FASTCOV_POST_CMD ${Coverage_POST_CMD})
endif()
if(CODE_COVERAGE_VERBOSE)
message(STATUS "Code coverage commands for target ${Coverage_NAME} (fastcov):")
message(" Running tests:")
string(REPLACE ";" " " FASTCOV_EXEC_TESTS_CMD_SPACED "${FASTCOV_EXEC_TESTS_CMD}")
message(" ${FASTCOV_EXEC_TESTS_CMD_SPACED}")
message(" Capturing fastcov counters and generating report:")
string(REPLACE ";" " " FASTCOV_CAPTURE_CMD_SPACED "${FASTCOV_CAPTURE_CMD}")
message(" ${FASTCOV_CAPTURE_CMD_SPACED}")
message(" Converting fastcov .json to lcov .info:")
string(REPLACE ";" " " FASTCOV_CONVERT_CMD_SPACED "${FASTCOV_CONVERT_CMD}")
message(" ${FASTCOV_CONVERT_CMD_SPACED}")
if(NOT Coverage_SKIP_HTML)
message(" Generating HTML report: ")
string(REPLACE ";" " " FASTCOV_HTML_CMD_SPACED "${FASTCOV_HTML_CMD}")
message(" ${FASTCOV_HTML_CMD_SPACED}")
endif()
if(Coverage_POST_CMD)
message(" Running post command: ")
string(REPLACE ";" " " FASTCOV_POST_CMD_SPACED "${FASTCOV_POST_CMD}")
message(" ${FASTCOV_POST_CMD_SPACED}")
endif()
endif()
# Setup target
add_custom_target(${Coverage_NAME}
# Cleanup fastcov
COMMAND ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH}
--search-directory ${BASEDIR}
--zerocounters
COMMAND ${FASTCOV_EXEC_TESTS_CMD}
COMMAND ${FASTCOV_CAPTURE_CMD}
COMMAND ${FASTCOV_CONVERT_CMD}
COMMAND ${FASTCOV_HTML_CMD}
COMMAND ${FASTCOV_POST_CMD}
# Set output files as GENERATED (will be removed on 'make clean')
BYPRODUCTS
${Coverage_NAME}.info
${Coverage_NAME}.json
${Coverage_NAME}/index.html # report directory
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS ${Coverage_DEPENDENCIES}
VERBATIM # Protect arguments to commands
COMMENT "Resetting code coverage counters to zero. Processing code coverage counters and generating report."
)
set(INFO_MSG "fastcov code coverage info report saved in ${Coverage_NAME}.info and ${Coverage_NAME}.json.")
if(NOT Coverage_SKIP_HTML)
string(APPEND INFO_MSG " Open ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html in your browser to view the coverage report.")
endif()
# Show where to find the fastcov info report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E echo ${INFO_MSG}
)
endfunction() # setup_target_for_coverage_fastcov
function(append_coverage_compiler_flags)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE)
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE)
message(STATUS "Appending code coverage compiler flags: ${COVERAGE_COMPILER_FLAGS}")
endfunction() # append_coverage_compiler_flags
# Setup coverage for specific library
function(append_coverage_compiler_flags_to_target name)
separate_arguments(_flag_list NATIVE_COMMAND "${COVERAGE_COMPILER_FLAGS}")
target_compile_options(${name} PRIVATE ${_flag_list})
if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
target_link_libraries(${name} PRIVATE gcov)
endif()
endfunction()

View File

@@ -0,0 +1,22 @@
if(ENABLE_CLANG_TIDY)
find_program(CLANG_TIDY_COMMAND NAMES clang-tidy)
if(NOT CLANG_TIDY_COMMAND)
message(WARNING "🔴 CMake_RUN_CLANG_TIDY is ON but clang-tidy is not found!")
set(CMAKE_CXX_CLANG_TIDY "" CACHE STRING "" FORCE)
else()
message(STATUS "🟢 CMake_RUN_CLANG_TIDY is ON")
set(CLANGTIDY_EXTRA_ARGS
"-extra-arg=-Wno-unknown-warning-option"
)
set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND};-p=${CMAKE_BINARY_DIR};${CLANGTIDY_EXTRA_ARGS}" CACHE STRING "" FORCE)
add_custom_target(clang-tidy
COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target ${CMAKE_PROJECT_NAME}
COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target clang-tidy
COMMENT "Running clang-tidy..."
)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
endif()
endif(ENABLE_CLANG_TIDY)

4
config/CMakeLists.txt Normal file
View File

@@ -0,0 +1,4 @@
configure_file(
"config.h.in"
"${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES
)

14
config/config.h.in Normal file
View File

@@ -0,0 +1,14 @@
#pragma once
#include <string>
#include <string_view>
#define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR @
#define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR @
#define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH @
static constexpr std::string_view project_name = "@PROJECT_NAME@";
static constexpr std::string_view project_version = "@PROJECT_VERSION@";
static constexpr std::string_view project_description = "@PROJECT_DESCRIPTION@";
static constexpr std::string_view git_sha = "@GIT_SHA@";
static constexpr std::string_view data_path = "@Platform_SOURCE_DIR@/tests/data/";

BIN
diagrams/BayesNet.pdf Executable file

Binary file not shown.

4
gcovr.cfg Normal file
View File

@@ -0,0 +1,4 @@
filter = src/
exclude-directories = build/lib/
print-summary = yes
sort-percentage = yes

31
gitmodules Normal file
View File

@@ -0,0 +1,31 @@
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"]
path = lib/catch2
main = v2.x
update = merge
url = https://github.com/catchorg/Catch2.git
[submodule "lib/argparse"]
path = lib/argparse
url = https://github.com/p-ranav/argparse
master = master
update = merge
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
master = master
update = merge
[submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git
main = main
update = merge
[submodule "lib/PyClassifiers"]
path = lib/PyClassifiers
url = https://github.com/rmontanana/PyClassifiers
[submodule "lib/folding"]
path = lib/folding
url = https://github.com/rmontanana/Folding

168
lib/Files/ArffFiles.cc Normal file
View File

@@ -0,0 +1,168 @@
#include "ArffFiles.h"
#include <fstream>
#include <sstream>
#include <map>
#include <iostream>
ArffFiles::ArffFiles() = default;
std::vector<std::string> ArffFiles::getLines() const
{
return lines;
}
unsigned long int ArffFiles::getSize() const
{
return lines.size();
}
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
{
return attributes;
}
std::string ArffFiles::getClassName() const
{
return className;
}
std::string ArffFiles::getClassType() const
{
return classType;
}
std::vector<std::vector<float>>& ArffFiles::getX()
{
return X;
}
std::vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::loadCommon(std::string fileName)
{
std::ifstream file(fileName);
if (!file.is_open()) {
throw std::invalid_argument("Unable to open file");
}
std::string line;
std::string keyword;
std::string attribute;
std::string type;
std::string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
std::stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
type += type_w + " ";
attributes.emplace_back(trim(attribute), trim(type));
continue;
}
if (line[0] == '@') {
continue;
}
lines.push_back(line);
}
file.close();
if (attributes.empty())
throw std::invalid_argument("No attributes found");
}
void ArffFiles::load(const std::string& fileName, bool classLast)
{
int labelIndex;
loadCommon(fileName);
if (classLast) {
className = std::get<0>(attributes.back());
classType = std::get<1>(attributes.back());
attributes.pop_back();
labelIndex = static_cast<int>(attributes.size());
} else {
className = std::get<0>(attributes.front());
classType = std::get<1>(attributes.front());
attributes.erase(attributes.begin());
labelIndex = 0;
}
generateDataset(labelIndex);
}
void ArffFiles::load(const std::string& fileName, const std::string& name)
{
int labelIndex;
loadCommon(fileName);
bool found = false;
for (int i = 0; i < attributes.size(); ++i) {
if (attributes[i].first == name) {
className = std::get<0>(attributes[i]);
classType = std::get<1>(attributes[i]);
attributes.erase(attributes.begin() + i);
labelIndex = i;
found = true;
break;
}
}
if (!found) {
throw std::invalid_argument("Class name not found");
}
generateDataset(labelIndex);
}
void ArffFiles::generateDataset(int labelIndex)
{
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
auto yy = std::vector<std::string>(lines.size(), "");
auto removeLines = std::vector<int>(); // Lines with missing values
for (size_t i = 0; i < lines.size(); i++) {
std::stringstream ss(lines[i]);
std::string value;
int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) {
if (pos++ == labelIndex) {
yy[i] = value;
} else {
if (value == "?") {
X[xIndex++][i] = -1;
removeLines.push_back(i);
} else
X[xIndex++][i] = stof(value);
}
}
}
for (auto i : removeLines) {
yy.erase(yy.begin() + i);
for (auto& x : X) {
x.erase(x.begin() + i);
}
}
y = factorize(yy);
}
std::string ArffFiles::trim(const std::string& source)
{
std::string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s;
}
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
{
std::vector<int> yy;
yy.reserve(labels_t.size());
std::map<std::string, int> labelMap;
int i = 0;
for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}
yy.push_back(labelMap[label]);
}
return yy;
}

32
lib/Files/ArffFiles.h Normal file
View File

@@ -0,0 +1,32 @@
#ifndef ARFFFILES_H
#define ARFFFILES_H
#include <string>
#include <vector>
class ArffFiles {
private:
std::vector<std::string> lines;
std::vector<std::pair<std::string, std::string>> attributes;
std::string className;
std::string classType;
std::vector<std::vector<float>> X;
std::vector<int> y;
void generateDataset(int);
void loadCommon(std::string);
public:
ArffFiles();
void load(const std::string&, bool = true);
void load(const std::string&, const std::string&);
std::vector<std::string> getLines() const;
unsigned long int getSize() const;
std::string getClassName() const;
std::string getClassType() const;
static std::string trim(const std::string&);
std::vector<std::vector<float>>& getX();
std::vector<int>& getY();
std::vector<std::pair<std::string, std::string>> getAttributes() const;
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
};
#endif

1
lib/Files/CMakeLists.txt Normal file
View File

@@ -0,0 +1 @@
add_library(ArffFiles ArffFiles.cc)

1
lib/argparse Submodule

Submodule lib/argparse added at 69dabd88a8

1
lib/libxlsxwriter Submodule

Submodule lib/libxlsxwriter added at 6a2364c42c

11
sample/CMakeLists.txt Normal file
View File

@@ -0,0 +1,11 @@
include_directories(
${Platform_SOURCE_DIR}/src/Platform
${Platform_SOURCE_DIR}/src/PyClassifiers
${Python3_INCLUDE_DIRS}
${Platform_SOURCE_DIR}/lib/Files
${Platform_SOURCE_DIR}/lib/mdlp
${Platform_SOURCE_DIR}/lib/argparse/include
${Platform_SOURCE_DIR}/lib/json/include
)
add_executable(PlatformSample sample.cc ${Platform_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(PlatformSample Platform ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap)

236
sample/sample.cc Normal file
View File

@@ -0,0 +1,236 @@
#include <iostream>
#include <torch/torch.h>
#include <string>
#include <map>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "ArffFiles.h"
#include "BayesMetrics.h"
#include "CPPFImdlp.h"
#include "folding.hpp"
#include "Models.h"
#include "modelRegister.h"
#include <fstream>
#include "config.h"
const std::string PATH = { data_path.begin(), data_path.end() };
pair<std::vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<std::string> features)
{
std::vector<mdlp::labels_t>Xd;
map<std::string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xd.push_back(xd);
}
return { Xd, maxes };
}
bool file_exists(const std::string& name)
{
if (FILE* file = fopen(name.c_str(), "r")) {
fclose(file);
return true;
} else {
return false;
}
}
pair<std::vector<std::vector<int>>, std::vector<int>> extract_indices(std::vector<int> indices, std::vector<std::vector<int>> X, std::vector<int> y)
{
std::vector<std::vector<int>> Xr; // nxm
std::vector<int> yr;
for (int col = 0; col < X.size(); ++col) {
Xr.push_back(std::vector<int>());
}
for (auto index : indices) {
for (int col = 0; col < X.size(); ++col) {
Xr[col].push_back(X[col][index]);
}
yr.push_back(y[index]);
}
return { Xr, yr };
}
int main(int argc, char** argv)
{
map<std::string, bool> datasets = {
{"diabetes", true},
{"ecoli", true},
{"glass", true},
{"iris", true},
{"kdd_JapaneseVowels", false},
{"letter", true},
{"liver-disorders", true},
{"mfeat-factors", true},
};
auto valid_datasets = std::vector<std::string>();
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
[](const pair<std::string, bool>& pair) { return pair.first; });
argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-d", "--dataset")
.help("Dataset file name")
.action([valid_datasets](const std::string& value) {
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
return value;
}
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
}
);
program.add_argument("-p", "--path")
.help(" folder where the data files are located, default")
.default_value(std::string{ PATH }
);
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw runtime_error("Model must be one of " + platform::Models::instance()->tostring());
}
);
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw runtime_error(err.what());
}
catch (...) {
throw runtime_error("Number of folds must be an integer");
}});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors, dump_cpt;
std::string model_name, file_name, path, complete_file_name;
int nFolds, seed;
try {
program.parse_args(argc, argv);
file_name = program.get<std::string>("dataset");
path = program.get<std::string>("path");
model_name = program.get<std::string>("model");
complete_file_name = path + file_name + ".arff";
stratified = program.get<bool>("stratified");
tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds");
seed = program.get<int>("seed");
dump_cpt = program.get<bool>("dumpcpt");
class_last = datasets[file_name];
if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
}
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
/*
* Begin Processing
*/
auto handler = ArffFiles();
handler.load(complete_file_name, class_last);
// Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
std::vector<std::string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features),
[](const pair<std::string, std::string>& item) { return item.first; });
// Discretize Dataset
auto [Xd, maxes] = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<std::string, std::vector<int>> states;
for (auto feature : features) {
states[feature] = std::vector<int>(maxes[feature]);
}
states[className] = std::vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states);
if (dump_cpt) {
std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt();
}
auto lines = clf->show();
for (auto line : lines) {
std::cout << line << std::endl;
}
std::cout << "--- Topological Order ---" << std::endl;
auto order = clf->topological_order();
for (auto name : order) {
std::cout << name << ", ";
}
std::cout << "end." << std::endl;
auto score = clf->score(Xd, y);
std::cout << "Score: " << score << std::endl;
auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot");
file << graph;
file.close();
std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
std::string stratified_string = stratified ? " Stratified" : "";
std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
std::cout << "==========================================" << std::endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
}
float total_score = 0, total_score_train = 0, score_train, score_test;
platform::Fold* fold;
if (stratified)
fold = new folding::StratifiedKFold(nFolds, y, seed);
else
fold = new folding::KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i);
std::cout << "Fold: " << i + 1 << std::endl;
if (tensors) {
auto ttrain = torch::tensor(train, torch::kInt64);
auto ttest = torch::tensor(test, torch::kInt64);
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
torch::Tensor ytraint = yt.index({ ttrain });
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest });
clf->fit(Xtraint, ytraint, features, className, states);
auto temp = clf->predict(Xtraint);
score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt);
} else {
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
auto [Xtest, ytest] = extract_indices(test, Xd, y);
clf->fit(Xtrain, ytrain, features, className, states);
score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest);
}
if (dump_cpt) {
std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt();
}
total_score_train += score_train;
total_score += score_test;
std::cout << "Score Train: " << score_train << std::endl;
std::cout << "Score Test : " << score_test << std::endl;
std::cout << "-------------------------------------------------------------------------------" << std::endl;
}
std::cout << "**********************************************************************************" << std::endl;
std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
}

343
src/Platform/BestResults.cc Normal file
View File

@@ -0,0 +1,343 @@
#include <filesystem>
#include <set>
#include <fstream>
#include <iostream>
#include <sstream>
#include <algorithm>
#include "BestResults.h"
#include "Result.h"
#include "Colors.h"
#include "Statistics.h"
#include "BestResultsExcel.h"
#include "CLocale.h"
namespace fs = std::filesystem;
// function ftime_to_std::string, Code taken from
// https://stackoverflow.com/a/58237530/1389271
template <typename TP>
std::string ftime_to_string(TP tp)
{
auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(tp - TP::clock::now()
+ std::chrono::system_clock::now());
auto tt = std::chrono::system_clock::to_time_t(sctp);
std::tm* gmt = std::gmtime(&tt);
std::stringstream buffer;
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
return buffer.str();
}
namespace platform {
std::string BestResults::build()
{
auto files = loadResultFiles();
if (files.size() == 0) {
std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
exit(1);
}
json bests;
for (const auto& file : files) {
auto result = Result(path, file);
auto data = result.load();
for (auto const& item : data.at("results")) {
bool update = false;
// Check if results file contains only one dataset
auto datasetName = item.at("dataset").get<std::string>();
if (bests.contains(datasetName)) {
if (item.at("score").get<double>() > bests[datasetName].at(0).get<double>()) {
update = true;
}
} else {
update = true;
}
if (update) {
bests[datasetName] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
}
}
}
std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
std::cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << std::endl;
}
std::ofstream file(bestFileName);
file << bests;
file.close();
return bestFileName;
}
std::string BestResults::bestResultFile()
{
return "best_results_" + score + "_" + model + ".json";
}
std::pair<std::string, std::string> getModelScore(std::string name)
{
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
int i = 0;
auto pos = name.find("_");
auto pos2 = name.find("_", pos + 1);
std::string score = name.substr(pos + 1, pos2 - pos - 1);
pos = name.find("_", pos2 + 1);
std::string model = name.substr(pos2 + 1, pos - pos2 - 1);
return { model, score };
}
std::vector<std::string> BestResults::loadResultFiles()
{
std::vector<std::string> files;
using std::filesystem::directory_iterator;
std::string fileModel, fileScore;
for (const auto& file : directory_iterator(path)) {
auto fileName = file.path().filename().string();
if (fileName.find(".json") != std::string::npos && fileName.find("results_") == 0) {
tie(fileModel, fileScore) = getModelScore(fileName);
if (score == fileScore && (model == fileModel || model == "any")) {
files.push_back(fileName);
}
}
}
return files;
}
json BestResults::loadFile(const std::string& fileName)
{
std::ifstream resultData(fileName);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
}
std::vector<std::string> BestResults::getModels()
{
std::set<std::string> models;
std::vector<std::string> result;
auto files = loadResultFiles();
if (files.size() == 0) {
std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
exit(1);
}
std::string fileModel, fileScore;
for (const auto& file : files) {
// extract the model from the file name
tie(fileModel, fileScore) = getModelScore(file);
// add the model to the std::vector of models
models.insert(fileModel);
}
result = std::vector<std::string>(models.begin(), models.end());
return result;
}
std::vector<std::string> BestResults::getDatasets(json table)
{
std::vector<std::string> datasets;
for (const auto& dataset : table.items()) {
datasets.push_back(dataset.key());
}
return datasets;
}
void BestResults::buildAll()
{
auto models = getModels();
for (const auto& model : models) {
std::cout << "Building best results for model: " << model << std::endl;
this->model = model;
build();
}
model = "any";
}
void BestResults::listFile()
{
std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
exit(1);
}
auto temp = ConfigLocale();
auto date = ftime_to_string(std::filesystem::last_write_time(bestFileName));
auto data = loadFile(bestFileName);
auto datasets = getDatasets(data);
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
int maxFileName = 0;
int maxHyper = 15;
for (auto const& item : data.items()) {
maxHyper = std::max(maxHyper, (int)item.value().at(1).dump().size());
maxFileName = std::max(maxFileName, (int)item.value().at(2).get<std::string>().size());
}
std::stringstream oss;
oss << Colors::GREEN() << "Best results for " << model << " as of " << date << std::endl;
std::cout << oss.str();
std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << "Dataset" << "Score " << std::setw(maxFileName) << "File" << " Hyperparameters" << std::endl;
std::cout << "=== " << std::string(maxDatasetName, '=') << " =========== " << std::string(maxFileName, '=') << " " << std::string(maxHyper, '=') << std::endl;
auto i = 0;
bool odd = true;
double total = 0;
for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
double value = item.value().at(0).get<double>();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
std::cout << std::setw(maxDatasetName) << std::left << item.key() << " ";
std::cout << std::setw(11) << std::setprecision(9) << std::fixed << value << " ";
std::cout << std::setw(maxFileName) << item.value().at(2).get<std::string>() << " ";
std::cout << item.value().at(1) << " ";
std::cout << std::endl;
total += value;
odd = !odd;
}
std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ===========" << std::endl;
std::cout << std::setw(5 + maxDatasetName) << "Total.................. " << std::setw(11) << std::setprecision(8) << std::fixed << total << std::endl;
}
json BestResults::buildTableResults(std::vector<std::string> models)
{
json table;
auto maxDate = std::filesystem::file_time_type::max();
for (const auto& model : models) {
this->model = model;
std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
exit(1);
}
auto dateWrite = std::filesystem::last_write_time(bestFileName);
if (dateWrite < maxDate) {
maxDate = dateWrite;
}
auto data = loadFile(bestFileName);
table[model] = data;
}
table["dateTable"] = ftime_to_string(maxDate);
return table;
}
void BestResults::printTableResults(std::vector<std::string> models, json table)
{
std::stringstream oss;
oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<std::string>() << std::endl;
std::cout << oss.str();
std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset");
for (const auto& model : models) {
std::cout << std::setw(maxModelName) << std::left << model << " ";
}
std::cout << std::endl;
std::cout << "=== " << std::string(maxDatasetName, '=') << " ";
for (const auto& model : models) {
std::cout << std::string(maxModelName, '=') << " ";
}
std::cout << std::endl;
auto i = 0;
bool odd = true;
std::map<std::string, double> totals;
int nDatasets = table.begin().value().size();
for (const auto& model : models) {
totals[model] = 0.0;
}
auto datasets = getDatasets(table.begin().value());
for (auto const& dataset : datasets) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
std::cout << std::setw(maxDatasetName) << std::left << dataset << " ";
double maxValue = 0;
// Find out the max value for this dataset
for (const auto& model : models) {
double value = table[model].at(dataset).at(0).get<double>();
if (value > maxValue) {
maxValue = value;
}
}
// Print the row with red colors on max values
for (const auto& model : models) {
std::string efectiveColor = color;
double value = table[model].at(dataset).at(0).get<double>();
if (value == maxValue) {
efectiveColor = Colors::RED();
}
totals[model] += value;
std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " ";
}
std::cout << std::endl;
odd = !odd;
}
std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ";
for (const auto& model : models) {
std::cout << std::string(maxModelName, '=') << " ";
}
std::cout << std::endl;
std::cout << Colors::GREEN() << std::setw(5 + maxDatasetName) << " Totals...................";
double max = 0.0;
for (const auto& total : totals) {
if (total.second > max) {
max = total.second;
}
}
for (const auto& model : models) {
std::string efectiveColor = Colors::GREEN();
if (totals[model] == max) {
efectiveColor = Colors::RED();
}
std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << totals[model] << " ";
}
std::cout << std::endl;
}
void BestResults::reportSingle(bool excel)
{
listFile();
if (excel) {
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value());
BestResultsExcel excel(score, datasets);
excel.reportSingle(model, path + bestResultFile());
messageExcelFile(excel.getFileName());
}
}
void BestResults::reportAll(bool excel)
{
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value());
maxModelName = (*max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxModelName = std::max(12, maxModelName);
maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxDatasetName = std::max(25, maxDatasetName);
// Print the table of results
printTableResults(models, table);
// Compute the Friedman test
std::map<std::string, std::map<std::string, float>> ranksModels;
if (friedman) {
Statistics stats(models, datasets, table, significance);
auto result = stats.friedmanTest();
stats.postHocHolmTest(result);
ranksModels = stats.getRanks();
}
if (excel) {
BestResultsExcel excel(score, datasets);
excel.reportAll(models, table, ranksModels, friedman, significance);
if (friedman) {
int idx = -1;
double min = 2000;
// Find out the control model
auto totals = std::vector<double>(models.size(), 0.0);
for (const auto& dataset : datasets) {
for (int i = 0; i < models.size(); ++i) {
totals[i] += ranksModels[dataset][models[i]];
}
}
for (int i = 0; i < models.size(); ++i) {
if (totals[i] < min) {
min = totals[i];
idx = i;
}
}
model = models.at(idx);
excel.reportSingle(model, path + bestResultFile());
}
messageExcelFile(excel.getFileName());
}
}
void BestResults::messageExcelFile(const std::string& fileName)
{
std::cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << std::endl;
}
}

View File

@@ -0,0 +1,36 @@
#ifndef BESTRESULTS_H
#define BESTRESULTS_H
#include <string>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
class BestResults {
public:
explicit BestResults(const std::string& path, const std::string& score, const std::string& model, bool friedman, double significance = 0.05)
: path(path), score(score), model(model), friedman(friedman), significance(significance)
{
}
std::string build();
void reportSingle(bool excel);
void reportAll(bool excel);
void buildAll();
private:
std::vector<std::string> getModels();
std::vector<std::string> getDatasets(json table);
std::vector<std::string> loadResultFiles();
void messageExcelFile(const std::string& fileName);
json buildTableResults(std::vector<std::string> models);
void printTableResults(std::vector<std::string> models, json table);
std::string bestResultFile();
json loadFile(const std::string& fileName);
void listFile();
std::string path;
std::string score;
std::string model;
bool friedman;
double significance;
int maxModelName = 0;
int maxDatasetName = 0;
};
}
#endif //BESTRESULTS_H

View File

@@ -0,0 +1,300 @@
#include <sstream>
#include "BestResultsExcel.h"
#include "Paths.h"
#include <map>
#include <nlohmann/json.hpp>
#include "Statistics.h"
#include "ReportExcel.h"
namespace platform {
json loadResultData(const std::string& fileName)
{
json data;
std::ifstream resultData(fileName);
if (resultData.is_open()) {
data = json::parse(resultData);
} else {
throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
}
return data;
}
std::string getColumnName(int colNum)
{
std::string columnName = "";
if (colNum == 0)
return "A";
while (colNum > 0) {
int modulo = colNum % 26;
columnName = char(65 + modulo) + columnName;
colNum = (int)((colNum - modulo) / 26);
}
return columnName;
}
BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets)
{
workbook = workbook_new((Paths::excel() + fileName).c_str());
setProperties("Best Results");
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
datasetNameSize = std::max(datasetNameSize, maxDatasetName);
createFormats();
}
void BestResultsExcel::reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance)
{
this->table = table;
this->models = models;
ranksModels = ranks;
this->friedman = friedman;
this->significance = significance;
worksheet = workbook_add_worksheet(workbook, "Best Results");
int maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
modelNameSize = std::max(modelNameSize, maxModelName);
formatColumns();
build();
}
void BestResultsExcel::reportSingle(const std::string& model, const std::string& fileName)
{
worksheet = workbook_add_worksheet(workbook, "Report");
if (FILE* fileTest = fopen(fileName.c_str(), "r")) {
fclose(fileTest);
} else {
std::cerr << "File " << fileName << " doesn't exist." << std::endl;
exit(1);
}
json data = loadResultData(fileName);
std::string title = "Best results for " + model;
worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]);
// Body header
row = 3;
int col = 1;
writeString(row, 0, "", "bodyHeader");
writeString(row, 1, "Dataset", "bodyHeader");
writeString(row, 2, "Score", "bodyHeader");
writeString(row, 3, "File", "bodyHeader");
writeString(row, 4, "Hyperparameters", "bodyHeader");
auto i = 0;
std::string hyperparameters;
int hypSize = 22;
std::map<std::string, std::string> files; // map of files imported and their tabs
for (auto const& item : data.items()) {
row++;
writeInt(row, 0, i++, "ints");
writeString(row, 1, item.key().c_str(), "text");
writeDouble(row, 2, item.value().at(0).get<double>(), "result");
auto fileName = item.value().at(2).get<std::string>();
std::string hyperlink = "";
try {
hyperlink = files.at(fileName);
}
catch (const std::out_of_range& oor) {
auto tabName = "table_" + std::to_string(i);
auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str());
json data = loadResultData(Paths::results() + fileName);
auto report = ReportExcel(data, false, workbook, worksheetNew);
report.show();
hyperlink = "#table_" + std::to_string(i);
files[fileName] = hyperlink;
}
hyperlink += "!H" + std::to_string(i + 6);
std::string fileNameText = "=HYPERLINK(\"" + hyperlink + "\",\"" + fileName + "\")";
worksheet_write_formula(worksheet, row, 3, fileNameText.c_str(), efectiveStyle("text"));
hyperparameters = item.value().at(1).dump();
if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, 4, hyperparameters, "text");
}
row++;
// Set Totals
writeString(row, 1, "Total", "bodyHeader");
std::stringstream oss;
auto colName = getColumnName(2);
oss << "=sum(" << colName << "5:" << colName << row << ")";
worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]);
// Set format
worksheet_freeze_panes(worksheet, 4, 2);
std::vector<int> columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
BestResultsExcel::~BestResultsExcel()
{
workbook_close(workbook);
}
void BestResultsExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 4, 2);
std::vector<int> columns_sizes = { 5, datasetNameSize };
for (int i = 0; i < models.size(); ++i) {
columns_sizes.push_back(modelNameSize);
}
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void BestResultsExcel::addConditionalFormat(std::string formula)
{
// Add conditional format for max/min values in scores/ranks sheets
lxw_format* custom_format = workbook_add_format(workbook);
format_set_bg_color(custom_format, 0xFFC7CE);
format_set_font_color(custom_format, 0x9C0006);
// Create a conditional format object. A static object would also work.
lxw_conditional_format* conditional_format = (lxw_conditional_format*)calloc(1, sizeof(lxw_conditional_format));
conditional_format->type = LXW_CONDITIONAL_TYPE_FORMULA;
std::string col = getColumnName(models.size() + 1);
std::stringstream oss;
oss << "=C5=" << formula << "($C5:$" << col << "5)";
auto formulaValue = oss.str();
conditional_format->value_string = formulaValue.c_str();
conditional_format->format = custom_format;
worksheet_conditional_format_range(worksheet, 4, 2, datasets.size() + 3, models.size() + 1, conditional_format);
}
void BestResultsExcel::build()
{
// Create Sheet with scores
header(false);
body(false);
// Add conditional format for max values
addConditionalFormat("max");
footer(false);
if (friedman) {
// Create Sheet with ranks
worksheet = workbook_add_worksheet(workbook, "Ranks");
formatColumns();
header(true);
body(true);
addConditionalFormat("min");
footer(true);
// Create Sheet with Friedman Test
doFriedman();
}
}
std::string BestResultsExcel::getFileName()
{
return Paths::excel() + fileName;
}
void BestResultsExcel::header(bool ranks)
{
row = 0;
std::string message = ranks ? "Ranks for score " + score : "Best results for " + score;
worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), message.c_str(), styles["headerFirst"]);
// Body header
row = 3;
int col = 1;
writeString(row, 0, "", "bodyHeader");
writeString(row, 1, "Dataset", "bodyHeader");
for (const auto& model : models) {
writeString(row, ++col, model.c_str(), "bodyHeader");
}
}
void BestResultsExcel::body(bool ranks)
{
row = 4;
int i = 0;
json origin = table.begin().value();
for (auto const& item : origin.items()) {
writeInt(row, 0, i++, "ints");
writeString(row, 1, item.key().c_str(), "text");
int col = 1;
for (const auto& model : models) {
double value = ranks ? ranksModels[item.key()][model] : table[model].at(item.key()).at(0).get<double>();
writeDouble(row, ++col, value, "result");
}
++row;
}
}
void BestResultsExcel::footer(bool ranks)
{
// Set Totals
writeString(row, 1, "Total", "bodyHeader");
int col = 1;
for (const auto& model : models) {
std::stringstream oss;
auto colName = getColumnName(col + 1);
oss << "=SUM(" << colName << "5:" << colName << row << ")";
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
}
if (ranks) {
row++;
writeString(row, 1, "Average ranks", "bodyHeader");
int col = 1;
for (const auto& model : models) {
auto colName = getColumnName(col + 1);
std::stringstream oss;
oss << "=SUM(" << colName << "5:" << colName << row - 1 << ")/" << datasets.size();
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
}
}
}
void BestResultsExcel::doFriedman()
{
worksheet = workbook_add_worksheet(workbook, "Friedman");
std::vector<int> columns_sizes = { 5, datasetNameSize };
for (int i = 0; i < models.size(); ++i) {
columns_sizes.push_back(modelNameSize);
}
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), "Friedman Test", styles["headerFirst"]);
row = 2;
Statistics stats(models, datasets, table, significance, false);
auto result = stats.friedmanTest();
stats.postHocHolmTest(result);
auto friedmanResult = stats.getFriedmanResult();
auto holmResult = stats.getHolmResult();
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]);
row += 2;
writeString(row, 1, "Friedman Q", "bodyHeader");
writeDouble(row, 2, friedmanResult.statistic, "bodyHeader");
row++;
writeString(row, 1, "Critical χ2 value", "bodyHeader");
writeDouble(row, 2, friedmanResult.criticalValue, "bodyHeader");
row++;
writeString(row, 1, "p-value", "bodyHeader");
writeDouble(row, 2, friedmanResult.pvalue, "bodyHeader");
writeString(row, 3, friedmanResult.reject ? "<" : ">", "bodyHeader");
writeDouble(row, 4, significance, "bodyHeader");
writeString(row, 5, friedmanResult.reject ? "Reject H0" : "Accept H0", "bodyHeader");
row += 3;
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Holm Test", styles["headerFirst"]);
row += 2;
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between the control model and the other models.'", styles["headerSmall"]);
row += 2;
std::string controlModel = "Control Model: " + holmResult.model;
worksheet_merge_range(worksheet, row, 1, row, 7, controlModel.c_str(), styles["bodyHeader_odd"]);
row++;
writeString(row, 1, "Model", "bodyHeader");
writeString(row, 2, "p-value", "bodyHeader");
writeString(row, 3, "Rank", "bodyHeader");
writeString(row, 4, "Win", "bodyHeader");
writeString(row, 5, "Tie", "bodyHeader");
writeString(row, 6, "Loss", "bodyHeader");
writeString(row, 7, "Reject H0", "bodyHeader");
row++;
bool first = true;
for (const auto& item : holmResult.holmLines) {
writeString(row, 1, item.model, "text");
if (first) {
// Control model info
first = false;
writeString(row, 2, "", "text");
writeDouble(row, 3, item.rank, "result");
writeString(row, 4, "", "text");
writeString(row, 5, "", "text");
writeString(row, 6, "", "text");
writeString(row, 7, "", "textCentered");
} else {
// Rest of the models info
writeDouble(row, 2, item.pvalue, "result");
writeDouble(row, 3, item.rank, "result");
writeInt(row, 4, item.wtl.win, "ints");
writeInt(row, 5, item.wtl.tie, "ints");
writeInt(row, 6, item.wtl.loss, "ints");
writeString(row, 7, item.reject ? "Yes" : "No", "textCentered");
}
row++;
}
}
}

View File

@@ -0,0 +1,39 @@
#ifndef BESTRESULTS_EXCEL_H
#define BESTRESULTS_EXCEL_H
#include "ExcelFile.h"
#include <vector>
#include <map>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
class BestResultsExcel : ExcelFile {
public:
BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets);
~BestResultsExcel();
void reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance);
void reportSingle(const std::string& model, const std::string& fileName);
std::string getFileName();
private:
void build();
void header(bool ranks);
void body(bool ranks);
void footer(bool ranks);
void formatColumns();
void doFriedman();
void addConditionalFormat(std::string formula);
const std::string fileName = "BestResults.xlsx";
std::string score;
std::vector<std::string> models;
std::vector<std::string> datasets;
json table;
std::map<std::string, std::map<std::string, float>> ranksModels;
bool friedman;
double significance;
int modelNameSize = 12; // Min size of the column
int datasetNameSize = 25; // Min size of the column
};
}
#endif //BESTRESULTS_EXCEL_H

28
src/Platform/BestScore.h Normal file
View File

@@ -0,0 +1,28 @@
#ifndef BESTSCORE_H
#define BESTSCORE_H
#include <string>
#include <map>
#include <utility>
#include "DotEnv.h"
namespace platform {
class BestScore {
public:
static std::pair<std::string, double> getScore(const std::string& metric)
{
static std::map<std::pair<std::string, std::string>, std::pair<std::string, double>> data = {
{{"discretiz", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
{{"odte", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
};
auto env = platform::DotEnv();
std::string experiment = env.get("experiment");
try {
return data[{experiment, metric}];
}
catch (...) {
return { "", 0.0 };
}
}
};
}
#endif

22
src/Platform/CLocale.h Normal file
View File

@@ -0,0 +1,22 @@
#ifndef LOCALE_H
#define LOCALE_H
#include <locale>
#include <iostream>
#include <string>
namespace platform {
struct separation : std::numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
std::string do_grouping() const { return "\03"; }
};
class ConfigLocale {
public:
explicit ConfigLocale()
{
std::locale mylocale(std::cout.getloc(), new separation);
std::locale::global(mylocale);
std::cout.imbue(mylocale);
}
};
}
#endif

View File

@@ -0,0 +1,28 @@
include_directories(
${Platform_SOURCE_DIR}/lib/PyClassifiers/lib/BayesNet/src/BayesNet
${Platform_SOURCE_DIR}/lib/PyClassifiers/lib/BayesNet/lib/folding
${Platform_SOURCE_DIR}/lib/PyClassifiers/lib/BayesNet/lib/mdlp
${Platform_SOURCE_DIR}/lib/PyClassifiers/lib/BayesNet/lib/json/include
${Platform_SOURCE_DIR}/lib/PyClassifiers/src/PyClassifiers
${Platform_SOURCE_DIR}/src/Platform
${Platform_SOURCE_DIR}/lib/Files
${Platform_SOURCE_DIR}/lib/mdlp
${Platform_SOURCE_DIR}/lib/argparse/include
${Platform_SOURCE_DIR}/lib/json/include
${Platform_SOURCE_DIR}/lib/libxlsxwriter/include
${Python3_INCLUDE_DIRS}
${MPI_CXX_INCLUDE_DIRS}
${CMAKE_BINARY_DIR}/configured_files/include
)
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc HyperParameters.cc Datasets.cc Dataset.cc Models.cc)
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
add_executable(b_main b_main.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc)
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp)
target_link_libraries(b_grid PyClassifiers ${MPI_CXX_LIBRARIES})
target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(b_main PyClassifiers BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)

15
src/Platform/Colors.h Normal file
View File

@@ -0,0 +1,15 @@
#ifndef COLORS_H
#define COLORS_H
class Colors {
public:
static std::string MAGENTA() { return "\033[1;35m"; }
static std::string BLUE() { return "\033[1;34m"; }
static std::string CYAN() { return "\033[1;36m"; }
static std::string GREEN() { return "\033[1;32m"; }
static std::string YELLOW() { return "\033[1;33m"; }
static std::string RED() { return "\033[1;31m"; }
static std::string WHITE() { return "\033[1;37m"; }
static std::string IBLUE() { return "\033[0;94m"; }
static std::string RESET() { return "\033[0m"; }
};
#endif // COLORS_H

View File

@@ -0,0 +1,87 @@
#include "CommandParser.h"
#include <iostream>
#include <sstream>
#include <algorithm>
#include "Colors.h"
#include "Utils.h"
namespace platform {
void CommandParser::messageError(const std::string& message)
{
std::cout << Colors::RED() << message << Colors::RESET() << std::endl;
}
std::pair<char, int> CommandParser::parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex)
{
bool finished = false;
while (!finished) {
std::stringstream oss;
std::string line;
oss << color << "Choose option (";
bool first = true;
for (auto& option : options) {
if (first) {
first = false;
} else {
oss << ", ";
}
oss << std::get<char>(option) << "=" << std::get<std::string>(option);
}
oss << "): ";
std::cout << oss.str();
getline(std::cin, line);
std::cout << Colors::RESET();
line = trim(line);
if (line.size() == 0)
continue;
if (all_of(line.begin(), line.end(), ::isdigit)) {
command = defaultCommand;
index = stoi(line);
if (index > maxIndex || index < 0) {
messageError("Index out of range");
continue;
}
finished = true;
break;
}
bool found = false;
for (auto& option : options) {
if (line[0] == std::get<char>(option)) {
found = true;
// it's a match
line.erase(line.begin());
line = trim(line);
if (std::get<bool>(option)) {
// The option requires a value
if (line.size() == 0) {
messageError("Option " + std::get<std::string>(option) + " requires a value");
break;
}
try {
index = stoi(line);
if (index > maxIndex || index < 0) {
messageError("Index out of range");
break;
}
}
catch (const std::invalid_argument& ia) {
messageError("Invalid value: " + line);
break;
}
} else {
if (line.size() > 0) {
messageError("option " + std::get<std::string>(option) + " doesn't accept values");
break;
}
}
command = std::get<char>(option);
finished = true;
break;
}
}
if (!found) {
messageError("I don't know " + line);
}
}
return { command, index };
}
} /* namespace platform */

View File

@@ -0,0 +1,20 @@
#ifndef COMMAND_PARSER_H
#define COMMAND_PARSER_H
#include <string>
#include <vector>
#include <tuple>
namespace platform {
class CommandParser {
public:
CommandParser() = default;
std::pair<char, int> parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex);
char getCommand() const { return command; };
int getIndex() const { return index; };
private:
void messageError(const std::string& message);
char command;
int index;
};
} /* namespace platform */
#endif /* COMMAND_PARSER_H */

215
src/Platform/Dataset.cc Normal file
View File

@@ -0,0 +1,215 @@
#include "Dataset.h"
#include "ArffFiles.h"
#include <fstream>
namespace platform {
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{
}
std::string Dataset::getName() const
{
return name;
}
std::string Dataset::getClassName() const
{
return className;
}
std::vector<std::string> Dataset::getFeatures() const
{
if (loaded) {
return features;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNFeatures() const
{
if (loaded) {
return n_features;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNSamples() const
{
if (loaded) {
return n_samples;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
std::map<std::string, std::vector<int>> Dataset::getStates() const
{
if (loaded) {
return states;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<std::vector<std::vector<float>>&, std::vector<int>&> Dataset::getVectors()
{
if (loaded) {
return { Xv, yv };
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<std::vector<std::vector<int>>&, std::vector<int>&> Dataset::getVectorsDiscretized()
{
if (loaded) {
return { Xd, yv };
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
{
if (loaded) {
buildTensors();
return { X, y };
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
void Dataset::load_csv()
{
ifstream file(path + "/" + name + ".csv");
if (file.is_open()) {
std::string line;
getline(file, line);
std::vector<std::string> tokens = split(line, ',');
features = std::vector<std::string>(tokens.begin(), tokens.end() - 1);
if (className == "-1") {
className = tokens.back();
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = split(line, ',');
for (auto i = 0; i < features.size(); ++i) {
Xv[i].push_back(stof(tokens[i]));
}
yv.push_back(stoi(tokens.back()));
}
file.close();
} else {
throw std::invalid_argument("Unable to open dataset file.");
}
}
void Dataset::computeStates()
{
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = std::vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
}
states[className] = std::vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0);
}
void Dataset::load_arff()
{
auto arff = ArffFiles();
arff.load(path + "/" + name + ".arff", className);
// Get Dataset X, y
Xv = arff.getX();
yv = arff.getY();
// Get className & Features
className = arff.getClassName();
auto attributes = arff.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
}
std::vector<std::string> tokenize(std::string line)
{
std::vector<std::string> tokens;
for (auto i = 0; i < line.size(); ++i) {
if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
std::string token = line.substr(0, i);
tokens.push_back(token);
line.erase(line.begin(), line.begin() + i + 1);
i = 0;
while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n')
line.erase(line.begin(), line.begin() + i + 1);
}
}
if (line.size() > 0) {
tokens.push_back(line);
}
return tokens;
}
void Dataset::load_rdata()
{
ifstream file(path + "/" + name + "_R.dat");
if (file.is_open()) {
std::string line;
getline(file, line);
line = ArffFiles::trim(line);
std::vector<std::string> tokens = tokenize(line);
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
if (className == "-1") {
className = ArffFiles::trim(tokens.back());
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = tokenize(line);
// We have to skip the first token, which is the instance number.
for (auto i = 1; i < features.size() + 1; ++i) {
const float value = stof(tokens[i]);
Xv[i - 1].push_back(value);
}
yv.push_back(stoi(tokens.back()));
}
file.close();
} else {
throw std::invalid_argument("Unable to open dataset file.");
}
}
void Dataset::load()
{
if (loaded) {
return;
}
if (fileType == CSV) {
load_csv();
} else if (fileType == ARFF) {
load_arff();
} else if (fileType == RDATA) {
load_rdata();
}
if (discretize) {
Xd = discretizeDataset(Xv, yv);
computeStates();
}
n_samples = Xv[0].size();
n_features = Xv.size();
loaded = true;
}
void Dataset::buildTensors()
{
if (discretize) {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
} else {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
}
for (int i = 0; i < features.size(); ++i) {
if (discretize) {
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
} else {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
}
}
y = torch::tensor(yv, torch::kInt32);
}
std::vector<mdlp::labels_t> Dataset::discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{
std::vector<mdlp::labels_t> Xd;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
Xd.push_back(xd);
}
return Xd;
}
}

78
src/Platform/Dataset.h Normal file
View File

@@ -0,0 +1,78 @@
#ifndef DATASET_H
#define DATASET_H
#include <torch/torch.h>
#include <map>
#include <vector>
#include <string>
#include "CPPFImdlp.h"
#include "Utils.h"
namespace platform {
enum fileType_t { CSV, ARFF, RDATA };
class SourceData {
public:
SourceData(std::string source)
{
if (source == "Surcov") {
path = "datasets/";
fileType = CSV;
} else if (source == "Arff") {
path = "datasets/";
fileType = ARFF;
} else if (source == "Tanveer") {
path = "data/";
fileType = RDATA;
} else {
throw std::invalid_argument("Unknown source.");
}
}
std::string getPath()
{
return path;
}
fileType_t getFileType()
{
return fileType;
}
private:
std::string path;
fileType_t fileType;
};
class Dataset {
private:
std::string path;
std::string name;
fileType_t fileType;
std::string className;
int n_samples{ 0 }, n_features{ 0 };
std::vector<std::string> features;
std::map<std::string, std::vector<int>> states;
bool loaded;
bool discretize;
torch::Tensor X, y;
std::vector<std::vector<float>> Xv;
std::vector<std::vector<int>> Xd;
std::vector<int> yv;
void buildTensors();
void load_csv();
void load_arff();
void load_rdata();
void computeStates();
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
public:
Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&);
std::string getName() const;
std::string getClassName() const;
std::vector<string> getFeatures() const;
std::map<std::string, std::vector<int>> getStates() const;
std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
std::pair<vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized();
std::pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures() const;
int getNSamples() const;
void load();
const bool inline isLoaded() const { return loaded; };
};
};
#endif

129
src/Platform/Datasets.cc Normal file
View File

@@ -0,0 +1,129 @@
#include "Datasets.h"
#include <fstream>
namespace platform {
void Datasets::load()
{
auto sd = SourceData(sfileType);
fileType = sd.getFileType();
path = sd.getPath();
ifstream catalog(path + "all.txt");
if (catalog.is_open()) {
std::string line;
while (getline(catalog, line)) {
if (line.empty() || line[0] == '#') {
continue;
}
std::vector<std::string> tokens = split(line, ',');
std::string name = tokens[0];
std::string className;
if (tokens.size() == 1) {
className = "-1";
} else {
className = tokens[1];
}
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
}
catalog.close();
} else {
throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]");
}
}
std::vector<std::string> Datasets::getNames()
{
std::vector<std::string> result;
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result;
}
std::vector<std::string> Datasets::getFeatures(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getFeatures();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
map<std::string, std::vector<int>> Datasets::getStates(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getStates();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
void Datasets::loadDataset(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return;
} else {
datasets.at(name)->load();
}
}
std::string Datasets::getClassName(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getClassName();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNSamples(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getNSamples();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNClasses(const std::string& name)
{
if (datasets.at(name)->isLoaded()) {
auto className = datasets.at(name)->getClassName();
if (discretize) {
auto states = getStates(name);
return states.at(className).size();
}
auto [Xv, yv] = getVectors(name);
return *std::max_element(yv.begin(), yv.end()) + 1;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
std::vector<int> Datasets::getClassesCounts(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
auto [Xv, yv] = datasets.at(name)->getVectors();
std::vector<int> counts(*std::max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) {
counts[y]++;
}
return counts;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<std::vector<std::vector<float>>&, std::vector<int>&> Datasets::getVectors(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectors();
}
pair<std::vector<std::vector<int>>&, std::vector<int>&> Datasets::getVectorsDiscretized(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectorsDiscretized();
}
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getTensors();
}
bool Datasets::isDataset(const std::string& name) const
{
return datasets.find(name) != datasets.end();
}
}

30
src/Platform/Datasets.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef DATASETS_H
#define DATASETS_H
#include "Dataset.h"
namespace platform {
class Datasets {
private:
std::string path;
fileType_t fileType;
std::string sfileType;
std::map<std::string, std::unique_ptr<Dataset>> datasets;
bool discretize;
void load(); // Loads the list of datasets
public:
explicit Datasets(bool discretize, std::string sfileType) : discretize(discretize), sfileType(sfileType) { load(); };
std::vector<string> getNames();
std::vector<string> getFeatures(const std::string& name) const;
int getNSamples(const std::string& name) const;
std::string getClassName(const std::string& name) const;
int getNClasses(const std::string& name);
std::vector<int> getClassesCounts(const std::string& name) const;
std::map<std::string, std::vector<int>> getStates(const std::string& name) const;
std::pair<std::vector<std::vector<float>>&, std::vector<int>&> getVectors(const std::string& name);
std::pair<std::vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized(const std::string& name);
std::pair<torch::Tensor&, torch::Tensor&> getTensors(const std::string& name);
bool isDataset(const std::string& name) const;
void loadDataset(const std::string& name) const;
};
};
#endif

55
src/Platform/DotEnv.h Normal file
View File

@@ -0,0 +1,55 @@
#ifndef DOTENV_H
#define DOTENV_H
#include <string>
#include <map>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <iostream>
#include "Utils.h"
//#include "Dataset.h"
namespace platform {
class DotEnv {
private:
std::map<std::string, std::string> env;
public:
DotEnv()
{
std::ifstream file(".env");
if (!file.is_open()) {
std::cerr << "File .env not found" << std::endl;
exit(1);
}
std::string line;
while (std::getline(file, line)) {
line = trim(line);
if (line.empty() || line[0] == '#') {
continue;
}
std::istringstream iss(line);
std::string key, value;
if (std::getline(iss, key, '=') && std::getline(iss, value)) {
env[key] = value;
}
}
}
std::string get(const std::string& key)
{
return env.at(key);
}
std::vector<int> getSeeds()
{
auto seeds = std::vector<int>();
auto seeds_str = env["seeds"];
seeds_str = trim(seeds_str);
seeds_str = seeds_str.substr(1, seeds_str.size() - 2);
auto seeds_str_split = split(seeds_str, ',');
transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) {
return stoi(str);
});
return seeds;
}
};
}
#endif

168
src/Platform/ExcelFile.cc Normal file
View File

@@ -0,0 +1,168 @@
#include "ExcelFile.h"
namespace platform {
ExcelFile::ExcelFile()
{
setDefault();
}
ExcelFile::ExcelFile(lxw_workbook* workbook) : workbook(workbook)
{
setDefault();
}
ExcelFile::ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet) : workbook(workbook), worksheet(worksheet)
{
setDefault();
}
void ExcelFile::setDefault()
{
normalSize = 14; //font size for report body
row = 0;
colorTitle = 0xB1A0C7;
colorOdd = 0xDCE6F1;
colorEven = 0xFDE9D9;
}
lxw_workbook* ExcelFile::getWorkbook()
{
return workbook;
}
void ExcelFile::setProperties(std::string title)
{
char line[title.size() + 1];
strcpy(line, title.c_str());
lxw_doc_properties properties = {
.title = line,
.subject = (char*)"Machine learning results",
.author = (char*)"Ricardo Montañana Gómez",
.manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta",
.company = (char*)"UCLM",
.comments = (char*)"Created with libxlsxwriter and c++",
};
workbook_set_properties(workbook, &properties);
}
lxw_format* ExcelFile::efectiveStyle(const std::string& style)
{
lxw_format* efectiveStyle = NULL;
if (style != "") {
std::string suffix = row % 2 ? "_odd" : "_even";
try {
efectiveStyle = styles.at(style + suffix);
}
catch (const std::out_of_range& oor) {
try {
efectiveStyle = styles.at(style);
}
catch (const std::out_of_range& oor) {
throw std::invalid_argument("Style " + style + " not found");
}
}
}
return efectiveStyle;
}
void ExcelFile::writeString(int row, int col, const std::string& text, const std::string& style)
{
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
}
void ExcelFile::writeInt(int row, int col, const int number, const std::string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ExcelFile::writeDouble(int row, int col, const double number, const std::string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ExcelFile::addColor(lxw_format* style, bool odd)
{
uint32_t efectiveColor = odd ? colorEven : colorOdd;
format_set_bg_color(style, lxw_color_t(efectiveColor));
}
void ExcelFile::createStyle(const std::string& name, lxw_format* style, bool odd)
{
addColor(style, odd);
if (name == "textCentered") {
format_set_align(style, LXW_ALIGN_CENTER);
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "text") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "bodyHeader") {
format_set_bold(style);
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_CENTER);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
format_set_bg_color(style, lxw_color_t(colorTitle));
} else if (name == "result") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "0.0000000");
} else if (name == "time") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "#,##0.000000");
} else if (name == "ints") {
format_set_font_size(style, normalSize);
format_set_num_format(style, "###,##0");
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "floats") {
format_set_border(style, LXW_BORDER_THIN);
format_set_font_size(style, normalSize);
format_set_num_format(style, "#,##0.00");
}
}
void ExcelFile::createFormats()
{
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
lxw_format* style;
for (std::string name : styleNames) {
lxw_format* style = workbook_add_format(workbook);
style = workbook_add_format(workbook);
createStyle(name, style, true);
styles[name + "_odd"] = style;
style = workbook_add_format(workbook);
createStyle(name, style, false);
styles[name + "_even"] = style;
}
// Header 1st line
lxw_format* headerFirst = workbook_add_format(workbook);
format_set_bold(headerFirst);
format_set_font_size(headerFirst, 18);
format_set_align(headerFirst, LXW_ALIGN_CENTER);
format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerFirst, LXW_BORDER_THIN);
format_set_bg_color(headerFirst, lxw_color_t(colorTitle));
// Header rest
lxw_format* headerRest = workbook_add_format(workbook);
format_set_bold(headerRest);
format_set_align(headerRest, LXW_ALIGN_CENTER);
format_set_font_size(headerRest, 16);
format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerRest, LXW_BORDER_THIN);
format_set_bg_color(headerRest, lxw_color_t(colorOdd));
// Header small
lxw_format* headerSmall = workbook_add_format(workbook);
format_set_bold(headerSmall);
format_set_align(headerSmall, LXW_ALIGN_LEFT);
format_set_font_size(headerSmall, 12);
format_set_border(headerSmall, LXW_BORDER_THIN);
format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER);
format_set_bg_color(headerSmall, lxw_color_t(colorOdd));
// Summary style
lxw_format* summaryStyle = workbook_add_format(workbook);
format_set_bold(summaryStyle);
format_set_font_size(summaryStyle, 16);
format_set_border(summaryStyle, LXW_BORDER_THIN);
format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER);
styles["headerFirst"] = headerFirst;
styles["headerRest"] = headerRest;
styles["headerSmall"] = headerSmall;
styles["summaryStyle"] = summaryStyle;
}
}

43
src/Platform/ExcelFile.h Normal file
View File

@@ -0,0 +1,43 @@
#ifndef EXCELFILE_H
#define EXCELFILE_H
#include <locale>
#include <string>
#include <map>
#include "xlsxwriter.h"
namespace platform {
struct separated : std::numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
std::string do_grouping() const { return "\03"; }
};
class ExcelFile {
public:
ExcelFile();
ExcelFile(lxw_workbook* workbook);
ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet);
lxw_workbook* getWorkbook();
protected:
void setProperties(std::string title);
void writeString(int row, int col, const std::string& text, const std::string& style = "");
void writeInt(int row, int col, const int number, const std::string& style = "");
void writeDouble(int row, int col, const double number, const std::string& style = "");
void createFormats();
void createStyle(const std::string& name, lxw_format* style, bool odd);
void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const std::string& name);
lxw_workbook* workbook;
lxw_worksheet* worksheet;
std::map<std::string, lxw_format*> styles;
int row;
int normalSize; //font size for report body
uint32_t colorTitle;
uint32_t colorOdd;
uint32_t colorEven;
private:
void setDefault();
};
}
#endif // !EXCELFILE_H

226
src/Platform/Experiment.cc Normal file
View File

@@ -0,0 +1,226 @@
#include <fstream>
#include "Experiment.h"
#include "Datasets.h"
#include "Models.h"
#include "ReportConsole.h"
#include "Paths.h"
namespace platform {
using json = nlohmann::json;
std::string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
std::string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
std::string Experiment::get_file_name()
{
std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
return result;
}
json Experiment::build_json()
{
json result;
result["title"] = title;
result["date"] = get_date();
result["time"] = get_time();
result["model"] = model;
result["version"] = model_version;
result["platform"] = platform;
result["score_name"] = score_name;
result["language"] = language;
result["language_version"] = language_version;
result["discretized"] = discretized;
result["stratified"] = stratified;
result["folds"] = nfolds;
result["seeds"] = randomSeeds;
result["duration"] = duration;
result["results"] = json::array();
for (const auto& r : results) {
json j;
j["dataset"] = r.getDataset();
j["hyperparameters"] = r.getHyperparameters();
j["samples"] = r.getSamples();
j["features"] = r.getFeatures();
j["classes"] = r.getClasses();
j["score_train"] = r.getScoreTrain();
j["score_test"] = r.getScoreTest();
j["score"] = r.getScoreTest();
j["score_std"] = r.getScoreTestStd();
j["score_train_std"] = r.getScoreTrainStd();
j["score_test_std"] = r.getScoreTestStd();
j["train_time"] = r.getTrainTime();
j["train_time_std"] = r.getTrainTimeStd();
j["test_time"] = r.getTestTime();
j["test_time_std"] = r.getTestTimeStd();
j["time"] = r.getTestTime() + r.getTrainTime();
j["time_std"] = r.getTestTimeStd() + r.getTrainTimeStd();
j["scores_train"] = r.getScoresTrain();
j["scores_test"] = r.getScoresTest();
j["times_train"] = r.getTimesTrain();
j["times_test"] = r.getTimesTest();
j["nodes"] = r.getNodes();
j["leaves"] = r.getLeaves();
j["depth"] = r.getDepth();
result["results"].push_back(j);
}
return result;
}
void Experiment::save(const std::string& path)
{
json data = build_json();
ofstream file(path + "/" + get_file_name());
file << data;
file.close();
}
void Experiment::report()
{
json data = build_json();
ReportConsole report(data);
report.show();
}
void Experiment::show()
{
json data = build_json();
std::cout << data.dump(4) << std::endl;
}
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet)
{
std::cout << "*** Starting experiment: " << title << " ***" << std::endl;
for (auto fileName : filesToProcess) {
std::cout << "- " << setw(20) << left << fileName << " " << right << flush;
cross_validation(fileName, quiet);
std::cout << std::endl;
}
}
std::string getColor(bayesnet::status_t status)
{
switch (status) {
case bayesnet::NORMAL:
return Colors::GREEN();
case bayesnet::WARNING:
return Colors::YELLOW();
case bayesnet::ERROR:
return Colors::RED();
default:
return Colors::RESET();
}
}
void showProgress(int fold, const std::string& color, const std::string& phase)
{
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
}
void Experiment::cross_validation(const std::string& fileName, bool quiet)
{
auto datasets = Datasets(discretized, Paths::datasets());
// Get dataset
auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName);
auto features = datasets.getFeatures(fileName);
auto samples = datasets.getNSamples(fileName);
auto className = datasets.getClassName(fileName);
if (!quiet) {
std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
}
// Prepare Result
auto result = Result();
auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters.get(fileName));
// Initialize results std::vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
auto edges = torch::zeros({ nResults }, torch::kFloat64);
auto num_states = torch::zeros({ nResults }, torch::kFloat64);
Timer train_timer, test_timer;
int item = 0;
for (auto seed : randomSeeds) {
if (!quiet)
std::cout << "(" << seed << ") doing Fold: " << flush;
folding::Fold* fold;
if (stratified)
fold = new folding::StratifiedKFold(nfolds, y, seed);
else
fold = new folding::KFold(nfolds, y.size(0), seed);
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion());
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, fileName);
clf->setHyperparameters(hyperparameters.get(fileName));
// Split train - test dataset
train_timer.start();
auto [train, test] = fold->getFold(nfold);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
auto X_train = X.index({ "...", train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
// Train model
clf->fit(X_train, y_train, features, className, states);
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration();
// Score train
auto accuracy_train_value = clf->score(X_train, y_train);
// Test model
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value;
if (!quiet)
std::cout << "\b\b\b, " << flush;
// Store results and times in std::vector
result.addScoreTrain(accuracy_train_value);
result.addScoreTest(accuracy_test_value);
result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>());
item++;
}
if (!quiet)
std::cout << "end. " << flush;
delete fold;
}
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
result.setDataset(fileName);
addResult(result);
}
}

103
src/Platform/Experiment.h Normal file
View File

@@ -0,0 +1,103 @@
#ifndef EXPERIMENT_H
#define EXPERIMENT_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <string>
#include "folding.hpp"
#include "BaseClassifier.h"
#include "HyperParameters.h"
#include "TAN.h"
#include "KDB.h"
#include "AODE.h"
#include "Timer.h"
namespace platform {
using json = nlohmann::json;
class Result {
private:
std::string dataset, model_version;
json hyperparameters;
int samples{ 0 }, features{ 0 }, classes{ 0 };
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
std::vector<double> scores_train, scores_test, times_train, times_test;
public:
Result() = default;
Result& setDataset(const std::string& dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; }
Result& setClasses(int classes) { this->classes = classes; return *this; }
Result& setScoreTrain(double score) { this->score_train = score; return *this; }
Result& setScoreTest(double score) { this->score_test = score; return *this; }
Result& setScoreTrainStd(double score_std) { this->score_train_std = score_std; return *this; }
Result& setScoreTestStd(double score_std) { this->score_test_std = score_std; return *this; }
Result& setTrainTime(double train_time) { this->train_time = train_time; return *this; }
Result& setTrainTimeStd(double train_time_std) { this->train_time_std = train_time_std; return *this; }
Result& setTestTime(double test_time) { this->test_time = test_time; return *this; }
Result& setTestTimeStd(double test_time_std) { this->test_time_std = test_time_std; return *this; }
Result& setNodes(float nodes) { this->nodes = nodes; return *this; }
Result& setLeaves(float leaves) { this->leaves = leaves; return *this; }
Result& setDepth(float depth) { this->depth = depth; return *this; }
Result& addScoreTrain(double score) { scores_train.push_back(score); return *this; }
Result& addScoreTest(double score) { scores_test.push_back(score); return *this; }
Result& addTimeTrain(double time) { times_train.push_back(time); return *this; }
Result& addTimeTest(double time) { times_test.push_back(time); return *this; }
const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; }
const std::string& getDataset() const { return dataset; }
const json& getHyperparameters() const { return hyperparameters; }
const int getSamples() const { return samples; }
const int getFeatures() const { return features; }
const int getClasses() const { return classes; }
const double getScoreTrain() const { return score_train; }
const double getScoreTest() const { return score_test; }
const double getScoreTrainStd() const { return score_train_std; }
const double getScoreTestStd() const { return score_test_std; }
const double getTrainTime() const { return train_time; }
const double getTrainTimeStd() const { return train_time_std; }
const double getTestTime() const { return test_time; }
const double getTestTimeStd() const { return test_time_std; }
const float getNodes() const { return nodes; }
const float getLeaves() const { return leaves; }
const float getDepth() const { return depth; }
const std::vector<double>& getScoresTrain() const { return scores_train; }
const std::vector<double>& getScoresTest() const { return scores_test; }
const std::vector<double>& getTimesTrain() const { return times_train; }
const std::vector<double>& getTimesTest() const { return times_test; }
};
class Experiment {
public:
Experiment() = default;
Experiment& setTitle(const std::string& title) { this->title = title; return *this; }
Experiment& setModel(const std::string& model) { this->model = model; return *this; }
Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; }
Experiment& setScoreName(const std::string& score_name) { this->score_name = score_name; return *this; }
Experiment& setModelVersion(const std::string& model_version) { this->model_version = model_version; return *this; }
Experiment& setLanguage(const std::string& language) { this->language = language; return *this; }
Experiment& setLanguageVersion(const std::string& language_version) { this->language_version = language_version; return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
std::string get_file_name();
void save(const std::string& path);
void cross_validation(const std::string& fileName, bool quiet);
void go(std::vector<std::string> filesToProcess, bool quiet);
void show();
void report();
private:
std::string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
std::vector<Result> results;
std::vector<int> randomSeeds;
HyperParameters hyperparameters;
int nfolds{ 0 };
float duration{ 0 };
json build_json();
};
}
#endif

75
src/Platform/GridData.cc Normal file
View File

@@ -0,0 +1,75 @@
#include "GridData.h"
#include <fstream>
namespace platform {
GridData::GridData(const std::string& fileName)
{
json grid_file;
std::ifstream resultData(fileName);
if (resultData.is_open()) {
grid_file = json::parse(resultData);
} else {
throw std::invalid_argument("Unable to open input file. [" + fileName + "]");
}
for (const auto& item : grid_file.items()) {
auto key = item.key();
auto value = item.value();
grid[key] = value;
}
}
int GridData::computeNumCombinations(const json& line)
{
int numCombinations = 1;
for (const auto& item : line.items()) {
numCombinations *= item.value().size();
}
return numCombinations;
}
int GridData::getNumCombinations(const std::string& dataset)
{
int numCombinations = 0;
auto selected = decide_dataset(dataset);
for (const auto& line : grid.at(selected)) {
numCombinations += computeNumCombinations(line);
}
return numCombinations;
}
json GridData::generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination)
{
if (index == last) {
// If we reached the end of input, store the current combination
output.push_back(currentCombination);
return currentCombination;
}
const auto& key = index.key();
const auto& values = index.value();
for (const auto& value : values) {
auto combination = currentCombination;
combination[key] = value;
json::iterator nextIndex = index;
generateCombinations(++nextIndex, last, output, combination);
}
return currentCombination;
}
std::vector<json> GridData::getGrid(const std::string& dataset)
{
auto selected = decide_dataset(dataset);
auto result = std::vector<json>();
for (json line : grid.at(selected)) {
generateCombinations(line.begin(), line.end(), result, json({}));
}
return result;
}
json& GridData::getInputGrid(const std::string& dataset)
{
auto selected = decide_dataset(dataset);
return grid.at(selected);
}
std::string GridData::decide_dataset(const std::string& dataset)
{
if (grid.find(dataset) != grid.end())
return dataset;
return ALL_DATASETS;
}
} /* namespace platform */

26
src/Platform/GridData.h Normal file
View File

@@ -0,0 +1,26 @@
#ifndef GRIDDATA_H
#define GRIDDATA_H
#include <string>
#include <vector>
#include <map>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
const std::string ALL_DATASETS = "all";
class GridData {
public:
explicit GridData(const std::string& fileName);
~GridData() = default;
std::vector<json> getGrid(const std::string& dataset = ALL_DATASETS);
int getNumCombinations(const std::string& dataset = ALL_DATASETS);
json& getInputGrid(const std::string& dataset = ALL_DATASETS);
std::map<std::string, json>& getGridFile() { return grid; }
private:
std::string decide_dataset(const std::string& dataset);
json generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination);
int computeNumCombinations(const json& line);
std::map<std::string, json> grid;
};
} /* namespace platform */
#endif /* GRIDDATA_H */

441
src/Platform/GridSearch.cc Normal file
View File

@@ -0,0 +1,441 @@
#include <iostream>
#include <cstddef>
#include <torch/torch.h>
#include "GridSearch.h"
#include "Models.h"
#include "Paths.h"
#include "folding.hpp"
#include "Colors.h"
namespace platform {
std::string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
std::string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
std::string get_color_rank(int rank)
{
auto colors = { Colors::WHITE(), Colors::RED(), Colors::GREEN(), Colors::BLUE(), Colors::MAGENTA(), Colors::CYAN() };
return *(colors.begin() + rank % colors.size());
}
GridSearch::GridSearch(struct ConfigGrid& config) : config(config)
{
}
json GridSearch::loadResults()
{
std::ifstream file(Paths::grid_output(config.model));
if (file.is_open()) {
return json::parse(file);
}
return json();
}
std::vector<std::string> GridSearch::filterDatasets(Datasets& datasets) const
{
// Load datasets
auto datasets_names = datasets.getNames();
if (config.continue_from != NO_CONTINUE()) {
// Continue previous execution:
if (std::find(datasets_names.begin(), datasets_names.end(), config.continue_from) == datasets_names.end()) {
throw std::invalid_argument("Dataset " + config.continue_from + " not found");
}
// Remove datasets already processed
std::vector<string>::iterator it = datasets_names.begin();
while (it != datasets_names.end()) {
if (*it != config.continue_from) {
it = datasets_names.erase(it);
} else {
if (config.only)
++it;
else
break;
}
}
}
// Exclude datasets
for (const auto& name : config.excluded) {
auto dataset = name.get<std::string>();
auto it = std::find(datasets_names.begin(), datasets_names.end(), dataset);
if (it == datasets_names.end()) {
throw std::invalid_argument("Dataset " + dataset + " already excluded or doesn't exist!");
}
datasets_names.erase(it);
}
return datasets_names;
}
json GridSearch::build_tasks_mpi(int rank)
{
auto tasks = json::array();
auto grid = GridData(Paths::grid_input(config.model));
auto datasets = Datasets(false, Paths::datasets());
auto all_datasets = datasets.getNames();
auto datasets_names = filterDatasets(datasets);
for (int idx_dataset = 0; idx_dataset < datasets_names.size(); ++idx_dataset) {
auto dataset = datasets_names[idx_dataset];
for (const auto& seed : config.seeds) {
auto combinations = grid.getGrid(dataset);
for (int n_fold = 0; n_fold < config.n_folds; n_fold++) {
json task = {
{ "dataset", dataset },
{ "idx_dataset", idx_dataset},
{ "seed", seed },
{ "fold", n_fold},
};
tasks.push_back(task);
}
}
}
// Shuffle the array so heavy datasets are spread across the workers
std::mt19937 g{ 271 }; // Use fixed seed to obtain the same shuffle
std::shuffle(tasks.begin(), tasks.end(), g);
std::cout << get_color_rank(rank) << "* Number of tasks: " << tasks.size() << std::endl;
std::cout << "|";
for (int i = 0; i < tasks.size(); ++i) {
std::cout << (i + 1) % 10;
}
std::cout << "|" << std::endl << "|" << std::flush;
return tasks;
}
void process_task_mpi_consumer(struct ConfigGrid& config, struct ConfigMPI& config_mpi, json& tasks, int n_task, Datasets& datasets, Task_Result* result)
{
// initialize
Timer timer;
timer.start();
json task = tasks[n_task];
auto model = config.model;
auto grid = GridData(Paths::grid_input(model));
auto dataset = task["dataset"].get<std::string>();
auto idx_dataset = task["idx_dataset"].get<int>();
auto seed = task["seed"].get<int>();
auto n_fold = task["fold"].get<int>();
bool stratified = config.stratified;
// Generate the hyperparamters combinations
auto combinations = grid.getGrid(dataset);
auto [X, y] = datasets.getTensors(dataset);
auto states = datasets.getStates(dataset);
auto features = datasets.getFeatures(dataset);
auto className = datasets.getClassName(dataset);
//
// Start working on task
//
folding::Fold* fold;
if (stratified)
fold = new folding::StratifiedKFold(config.n_folds, y, seed);
else
fold = new folding::KFold(config.n_folds, y.size(0), seed);
auto [train, test] = fold->getFold(n_fold);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
auto X_train = X.index({ "...", train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
double best_fold_score = 0.0;
int best_idx_combination = -1;
json best_fold_hyper;
for (int idx_combination = 0; idx_combination < combinations.size(); ++idx_combination) {
auto hyperparam_line = combinations[idx_combination];
auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line);
folding::Fold* nested_fold;
if (config.stratified)
nested_fold = new folding::StratifiedKFold(config.nested, y_train, seed);
else
nested_fold = new folding::KFold(config.nested, y_train.size(0), seed);
double score = 0.0;
for (int n_nested_fold = 0; n_nested_fold < config.nested; n_nested_fold++) {
// Nested level fold
auto [train_nested, test_nested] = nested_fold->getFold(n_nested_fold);
auto train_nested_t = torch::tensor(train_nested);
auto test_nested_t = torch::tensor(test_nested);
auto X_nested_train = X_train.index({ "...", train_nested_t });
auto y_nested_train = y_train.index({ train_nested_t });
auto X_nested_test = X_train.index({ "...", test_nested_t });
auto y_nested_test = y_train.index({ test_nested_t });
// Build Classifier with selected hyperparameters
auto clf = Models::instance()->create(config.model);
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, dataset);
clf->setHyperparameters(hyperparameters.get(dataset));
// Train model
clf->fit(X_nested_train, y_nested_train, features, className, states);
// Test model
score += clf->score(X_nested_test, y_nested_test);
}
delete nested_fold;
score /= config.nested;
if (score > best_fold_score) {
best_fold_score = score;
best_idx_combination = idx_combination;
best_fold_hyper = hyperparam_line;
}
}
delete fold;
// Build Classifier with the best hyperparameters to obtain the best score
auto hyperparameters = platform::HyperParameters(datasets.getNames(), best_fold_hyper);
auto clf = Models::instance()->create(config.model);
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, dataset);
clf->setHyperparameters(best_fold_hyper);
clf->fit(X_train, y_train, features, className, states);
best_fold_score = clf->score(X_test, y_test);
// Return the result
result->idx_dataset = task["idx_dataset"].get<int>();
result->idx_combination = best_idx_combination;
result->score = best_fold_score;
result->n_fold = n_fold;
result->time = timer.getDuration();
// Update progress bar
std::cout << get_color_rank(config_mpi.rank) << "*" << std::flush;
}
json store_result(std::vector<std::string>& names, Task_Result& result, json& results)
{
json json_result = {
{ "score", result.score },
{ "combination", result.idx_combination },
{ "fold", result.n_fold },
{ "time", result.time },
{ "dataset", result.idx_dataset }
};
auto name = names[result.idx_dataset];
if (!results.contains(name)) {
results[name] = json::array();
}
results[name].push_back(json_result);
return results;
}
json producer(std::vector<std::string>& names, json& tasks, struct ConfigMPI& config_mpi, MPI_Datatype& MPI_Result)
{
Task_Result result;
json results;
int num_tasks = tasks.size();
//
// 2a.1 Producer will loop to send all the tasks to the consumers and receive the results
//
for (int i = 0; i < num_tasks; ++i) {
MPI_Status status;
MPI_Recv(&result, 1, MPI_Result, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == TAG_RESULT) {
//Store result
store_result(names, result, results);
}
MPI_Send(&i, 1, MPI_INT, status.MPI_SOURCE, TAG_TASK, MPI_COMM_WORLD);
}
//
// 2a.2 Producer will send the end message to all the consumers
//
for (int i = 0; i < config_mpi.n_procs - 1; ++i) {
MPI_Status status;
MPI_Recv(&result, 1, MPI_Result, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == TAG_RESULT) {
//Store result
store_result(names, result, results);
}
MPI_Send(&i, 1, MPI_INT, status.MPI_SOURCE, TAG_END, MPI_COMM_WORLD);
}
return results;
}
void select_best_results_folds(json& results, json& all_results, std::string& model)
{
Timer timer;
auto grid = GridData(Paths::grid_input(model));
//
// Select the best result of the computed outer folds
//
for (const auto& result : all_results.items()) {
// each result has the results of all the outer folds as each one were a different task
double best_score = 0.0;
json best;
for (const auto& result_fold : result.value()) {
double score = result_fold["score"].get<double>();
if (score > best_score) {
best_score = score;
best = result_fold;
}
}
auto dataset = result.key();
auto combinations = grid.getGrid(dataset);
json json_best = {
{ "score", best_score },
{ "hyperparameters", combinations[best["combination"].get<int>()] },
{ "date", get_date() + " " + get_time() },
{ "grid", grid.getInputGrid(dataset) },
{ "duration", timer.translate2String(best["time"].get<double>()) }
};
results[dataset] = json_best;
}
}
void consumer(Datasets& datasets, json& tasks, struct ConfigGrid& config, struct ConfigMPI& config_mpi, MPI_Datatype& MPI_Result)
{
Task_Result result;
//
// 2b.1 Consumers announce to the producer that they are ready to receive a task
//
MPI_Send(&result, 1, MPI_Result, config_mpi.manager, TAG_QUERY, MPI_COMM_WORLD);
int task;
while (true) {
MPI_Status status;
//
// 2b.2 Consumers receive the task from the producer and process it
//
MPI_Recv(&task, 1, MPI_INT, config_mpi.manager, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == TAG_END) {
break;
}
process_task_mpi_consumer(config, config_mpi, tasks, task, datasets, &result);
//
// 2b.3 Consumers send the result to the producer
//
MPI_Send(&result, 1, MPI_Result, config_mpi.manager, TAG_RESULT, MPI_COMM_WORLD);
}
}
void GridSearch::go(struct ConfigMPI& config_mpi)
{
/*
* Each task is a json object with the following structure:
* {
* "dataset": "dataset_name",
* "idx_dataset": idx_dataset, // used to identify the dataset in the results
* // this index is relative to the used datasets in the actual run not to the whole datasets
* "seed": # of seed to use,
* "Fold": # of fold to process
* }
*
* The overall process consists in these steps:
* 0. Create the MPI result type & tasks
* 0.1 Create the MPI result type
* 0.2 Manager creates the tasks
* 1. Manager will broadcast the tasks to all the processes
* 1.1 Broadcast the number of tasks
* 1.2 Broadcast the length of the following string
* 1.2 Broadcast the tasks as a char* string
* 2a. Producer delivers the tasks to the consumers
* 2a.1 Producer will loop to send all the tasks to the consumers and receive the results
* 2a.2 Producer will send the end message to all the consumers
* 2b. Consumers process the tasks and send the results to the producer
* 2b.1 Consumers announce to the producer that they are ready to receive a task
* 2b.2 Consumers receive the task from the producer and process it
* 2b.3 Consumers send the result to the producer
* 3. Manager select the bests sccores for each dataset
* 3.1 Loop thru all the results obtained from each outer fold (task) and select the best
* 3.2 Save the results
*/
//
// 0.1 Create the MPI result type
//
Task_Result result;
int tasks_size;
MPI_Datatype MPI_Result;
MPI_Datatype type[5] = { MPI_UNSIGNED, MPI_UNSIGNED, MPI_INT, MPI_DOUBLE, MPI_DOUBLE };
int blocklen[5] = { 1, 1, 1, 1, 1 };
MPI_Aint disp[5];
disp[0] = offsetof(Task_Result, idx_dataset);
disp[1] = offsetof(Task_Result, idx_combination);
disp[2] = offsetof(Task_Result, n_fold);
disp[3] = offsetof(Task_Result, score);
disp[4] = offsetof(Task_Result, time);
MPI_Type_create_struct(5, blocklen, disp, type, &MPI_Result);
MPI_Type_commit(&MPI_Result);
//
// 0.2 Manager creates the tasks
//
char* msg;
json tasks;
if (config_mpi.rank == config_mpi.manager) {
timer.start();
tasks = build_tasks_mpi(config_mpi.rank);
auto tasks_str = tasks.dump();
tasks_size = tasks_str.size();
msg = new char[tasks_size + 1];
strcpy(msg, tasks_str.c_str());
}
//
// 1. Manager will broadcast the tasks to all the processes
//
MPI_Bcast(&tasks_size, 1, MPI_INT, config_mpi.manager, MPI_COMM_WORLD);
if (config_mpi.rank != config_mpi.manager) {
msg = new char[tasks_size + 1];
}
MPI_Bcast(msg, tasks_size + 1, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD);
tasks = json::parse(msg);
delete[] msg;
auto datasets = Datasets(config.discretize, Paths::datasets());
if (config_mpi.rank == config_mpi.manager) {
//
// 2a. Producer delivers the tasks to the consumers
//
auto datasets_names = filterDatasets(datasets);
json all_results = producer(datasets_names, tasks, config_mpi, MPI_Result);
std::cout << get_color_rank(config_mpi.rank) << "|" << std::endl;
//
// 3. Manager select the bests sccores for each dataset
//
auto results = initializeResults();
select_best_results_folds(results, all_results, config.model);
//
// 3.2 Save the results
//
save(results);
} else {
//
// 2b. Consumers process the tasks and send the results to the producer
//
consumer(datasets, tasks, config, config_mpi, MPI_Result);
}
}
json GridSearch::initializeResults()
{
// Load previous results if continue is set
json results;
if (config.continue_from != NO_CONTINUE()) {
if (!config.quiet)
std::cout << "* Loading previous results" << std::endl;
try {
std::ifstream file(Paths::grid_output(config.model));
if (file.is_open()) {
results = json::parse(file);
results = results["results"];
}
}
catch (const std::exception& e) {
std::cerr << "* There were no previous results" << std::endl;
std::cerr << "* Initizalizing new results" << std::endl;
results = json();
}
}
return results;
}
void GridSearch::save(json& results)
{
std::ofstream file(Paths::grid_output(config.model));
json output = {
{ "model", config.model },
{ "score", config.score },
{ "discretize", config.discretize },
{ "stratified", config.stratified },
{ "n_folds", config.n_folds },
{ "seeds", config.seeds },
{ "date", get_date() + " " + get_time()},
{ "nested", config.nested},
{ "platform", config.platform },
{ "duration", timer.getDurationString(true)},
{ "results", results }
};
file << output.dump(4);
}
} /* namespace platform */

60
src/Platform/GridSearch.h Normal file
View File

@@ -0,0 +1,60 @@
#ifndef GRIDSEARCH_H
#define GRIDSEARCH_H
#include <string>
#include <map>
#include <mpi.h>
#include <nlohmann/json.hpp>
#include "Datasets.h"
#include "HyperParameters.h"
#include "GridData.h"
#include "Timer.h"
namespace platform {
using json = nlohmann::json;
struct ConfigGrid {
std::string model;
std::string score;
std::string continue_from;
std::string platform;
bool quiet;
bool only; // used with continue_from to only compute that dataset
bool discretize;
bool stratified;
int nested;
int n_folds;
json excluded;
std::vector<int> seeds;
};
struct ConfigMPI {
int rank;
int n_procs;
int manager;
};
typedef struct {
uint idx_dataset;
uint idx_combination;
int n_fold;
double score;
double time;
} Task_Result;
const int TAG_QUERY = 1;
const int TAG_RESULT = 2;
const int TAG_TASK = 3;
const int TAG_END = 4;
class GridSearch {
public:
explicit GridSearch(struct ConfigGrid& config);
void go(struct ConfigMPI& config_mpi);
~GridSearch() = default;
json loadResults();
static inline std::string NO_CONTINUE() { return "NO_CONTINUE"; }
private:
void save(json& results);
json initializeResults();
std::vector<std::string> filterDatasets(Datasets& datasets) const;
struct ConfigGrid config;
json build_tasks_mpi(int rank);
Timer timer; // used to measure the time of the whole process
};
} /* namespace platform */
#endif /* GRIDSEARCH_H */

View File

@@ -0,0 +1,55 @@
#include "HyperParameters.h"
#include <fstream>
#include <sstream>
#include <iostream>
namespace platform {
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_)
{
// Initialize all datasets with the given hyperparameters
for (const auto& item : datasets) {
hyperparameters[item] = hyperparameters_;
}
}
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/
std::string join(std::vector<std::string> const& strings, std::string delim)
{
std::stringstream ss;
std::copy(strings.begin(), strings.end(),
std::ostream_iterator<std::string>(ss, delim.c_str()));
return ss.str();
}
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file)
{
// Check if file exists
std::ifstream file(hyperparameters_file);
if (!file.is_open()) {
throw std::runtime_error("File " + hyperparameters_file + " not found");
}
// Check if file is a json
json input_hyperparameters = json::parse(file);
// Check if hyperparameters are valid
for (const auto& dataset : datasets) {
if (!input_hyperparameters.contains(dataset)) {
std::cerr << "*Warning: Dataset " << dataset << " not found in hyperparameters file" << " assuming default hyperparameters" << std::endl;
hyperparameters[dataset] = json({});
continue;
}
hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get<json>();
}
}
void HyperParameters::check(const std::vector<std::string>& valid, const std::string& fileName)
{
json result = hyperparameters.at(fileName);
for (const auto& item : result.items()) {
if (find(valid.begin(), valid.end(), item.key()) == valid.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid. Passed Hyperparameters are: "
+ result.dump(4) + "\n Valid hyperparameters are: {" + join(valid, ",") + "}");
}
}
}
json HyperParameters::get(const std::string& fileName)
{
return hyperparameters.at(fileName);
}
} /* namespace platform */

View File

@@ -0,0 +1,23 @@
#ifndef HYPERPARAMETERS_H
#define HYPERPARAMETERS_H
#include <string>
#include <map>
#include <vector>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
class HyperParameters {
public:
HyperParameters() = default;
explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_);
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file);
~HyperParameters() = default;
bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); }
void check(const std::vector<std::string>& valid, const std::string& fileName);
json get(const std::string& fileName);
private:
std::map<std::string, json> hyperparameters;
};
} /* namespace platform */
#endif /* HYPERPARAMETERS_H */

View File

@@ -0,0 +1,213 @@
#include "ManageResults.h"
#include "CommandParser.h"
#include <filesystem>
#include <tuple>
#include "Colors.h"
#include "CLocale.h"
#include "Paths.h"
#include "ReportConsole.h"
#include "ReportExcel.h"
namespace platform {
ManageResults::ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare) :
numFiles{ numFiles }, complete{ complete }, partial{ partial }, compare{ compare }, results(Results(Paths::results(), model, score, complete, partial))
{
indexList = true;
openExcel = false;
workbook = NULL;
if (numFiles == 0) {
this->numFiles = results.size();
}
}
void ManageResults::doMenu()
{
if (results.empty()) {
std::cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl;
return;
}
results.sortDate();
list();
menu();
if (openExcel) {
workbook_close(workbook);
}
std::cout << Colors::RESET() << "Done!" << std::endl;
}
void ManageResults::list()
{
auto temp = ConfigLocale();
std::string suffix = numFiles != results.size() ? " of " + std::to_string(results.size()) : "";
std::stringstream oss;
oss << "Results on screen: " << numFiles << suffix;
std::cout << Colors::GREEN() << oss.str() << std::endl;
std::cout << std::string(oss.str().size(), '-') << std::endl;
if (complete) {
std::cout << Colors::MAGENTA() << "Only listing complete results" << std::endl;
}
if (partial) {
std::cout << Colors::MAGENTA() << "Only listing partial results" << std::endl;
}
auto i = 0;
int maxModel = results.maxModelSize();
std::cout << Colors::GREEN() << " # Date " << std::setw(maxModel) << std::left << "Model" << " Score Name Score C/P Duration Title" << std::endl;
std::cout << "=== ========== " << std::string(maxModel, '=') << " =========== =========== === ========= =============================================================" << std::endl;
bool odd = true;
for (auto& result : results) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
std::cout << result.to_string(maxModel) << std::endl;
if (i == numFiles) {
break;
}
odd = !odd;
}
}
bool ManageResults::confirmAction(const std::string& intent, const std::string& fileName) const
{
std::string color;
if (intent == "delete") {
color = Colors::RED();
} else {
color = Colors::YELLOW();
}
std::string line;
bool finished = false;
while (!finished) {
std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): ";
getline(std::cin, line);
finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n'));
}
if (tolower(line[0]) == 'y') {
return true;
}
std::cout << "Not done!" << std::endl;
return false;
}
void ManageResults::report(const int index, const bool excelReport)
{
std::cout << Colors::YELLOW() << "Reporting " << results.at(index).getFilename() << std::endl;
auto data = results.at(index).load();
if (excelReport) {
ReportExcel reporter(data, compare, workbook);
reporter.show();
openExcel = true;
workbook = reporter.getWorkbook();
std::cout << "Adding sheet to " << Paths::excel() + Paths::excelResults() << std::endl;
} else {
ReportConsole reporter(data, compare);
reporter.show();
}
}
void ManageResults::showIndex(const int index, const int idx)
{
// Show a dataset result inside a report
auto data = results.at(index).load();
std::cout << Colors::YELLOW() << "Showing " << results.at(index).getFilename() << std::endl;
ReportConsole reporter(data, compare, idx);
reporter.show();
}
void ManageResults::sortList()
{
std::cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): ";
std::string line;
char option;
getline(std::cin, line);
if (line.size() == 0)
return;
if (line.size() > 1) {
std::cout << "Invalid option" << std::endl;
return;
}
option = line[0];
switch (option) {
case 'd':
results.sortDate();
break;
case 's':
results.sortScore();
break;
case 'u':
results.sortDuration();
break;
case 'm':
results.sortModel();
break;
default:
std::cout << "Invalid option" << std::endl;
}
}
void ManageResults::menu()
{
char option;
int index, subIndex;
bool finished = false;
std::string filename;
// tuple<Option, digit, requires value>
std::vector<std::tuple<std::string, char, bool>> mainOptions = {
{"quit", 'q', false},
{"list", 'l', false},
{"delete", 'd', true},
{"hide", 'h', true},
{"sort", 's', false},
{"report", 'r', true},
{"excel", 'e', true}
};
std::vector<std::tuple<std::string, char, bool>> listOptions = {
{"report", 'r', true},
{"list", 'l', false},
{"quit", 'q', false}
};
auto parser = CommandParser();
while (!finished) {
if (indexList) {
std::tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1);
} else {
std::tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).load()["results"].size() - 1);
}
switch (option) {
case 'q':
finished = true;
break;
case 'l':
list();
indexList = true;
break;
case 'd':
filename = results.at(index).getFilename();
if (!confirmAction("delete", filename))
break;
std::cout << "Deleting " << filename << std::endl;
results.deleteResult(index);
std::cout << "File: " + filename + " deleted!" << std::endl;
list();
break;
case 'h':
filename = results.at(index).getFilename();
if (!confirmAction("hide", filename))
break;
filename = results.at(index).getFilename();
std::cout << "Hiding " << filename << std::endl;
results.hideResult(index, Paths::hiddenResults());
std::cout << "File: " + filename + " hidden! (moved to " << Paths::hiddenResults() << ")" << std::endl;
list();
break;
case 's':
sortList();
list();
break;
case 'r':
if (indexList) {
report(index, false);
indexList = false;
} else {
showIndex(index, subIndex);
}
break;
case 'e':
report(index, true);
break;
}
}
}
} /* namespace platform */

View File

@@ -0,0 +1,31 @@
#ifndef MANAGE_RESULTS_H
#define MANAGE_RESULTS_H
#include "Results.h"
#include "xlsxwriter.h"
namespace platform {
class ManageResults {
public:
ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare);
~ManageResults() = default;
void doMenu();
private:
void list();
bool confirmAction(const std::string& intent, const std::string& fileName) const;
void report(const int index, const bool excelReport);
void showIndex(const int index, const int idx);
void sortList();
void menu();
int numFiles;
bool indexList;
bool openExcel;
bool complete;
bool partial;
bool compare;
Results results;
lxw_workbook* workbook;
};
}
#endif /* MANAGE_RESULTS_H */

52
src/Platform/Models.cc Normal file
View File

@@ -0,0 +1,52 @@
#include "Models.h"
namespace platform {
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
Models* Models::factory = nullptr;;
Models* Models::instance()
{
//manages singleton
if (factory == nullptr)
factory = new Models();
return factory;
}
void Models::registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
functionRegistry[name] = classFactoryFunction;
}
shared_ptr<bayesnet::BaseClassifier> Models::create(const std::string& name)
{
bayesnet::BaseClassifier* instance = nullptr;
// find name in the registry and call factory method.
auto it = functionRegistry.find(name);
if (it != functionRegistry.end())
instance = it->second();
// wrap instance in a shared ptr and return
if (instance != nullptr)
return unique_ptr<bayesnet::BaseClassifier>(instance);
else
return nullptr;
}
std::vector<std::string> Models::getNames()
{
std::vector<std::string> names;
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
[](const pair<std::string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
return names;
}
std::string Models::tostring()
{
std::string result = "";
for (const auto& pair : functionRegistry) {
result += pair.first + ", ";
}
return "{" + result.substr(0, result.size() - 2) + "}";
}
Registrar::Registrar(const std::string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
Models::instance()->registerFactoryFunction(name, classFactoryFunction);
}
}

41
src/Platform/Models.h Normal file
View File

@@ -0,0 +1,41 @@
#ifndef MODELS_H
#define MODELS_H
#include <map>
#include "BaseClassifier.h"
#include "AODE.h"
#include "TAN.h"
#include "KDB.h"
#include "SPODE.h"
#include "TANLd.h"
#include "KDBLd.h"
#include "SPODELd.h"
#include "AODELd.h"
#include "BoostAODE.h"
#include "STree.h"
#include "ODTE.h"
#include "SVC.h"
#include "RandomForest.h"
namespace platform {
class Models {
private:
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
static Models* factory; //singleton
Models() {};
public:
Models(Models&) = delete;
void operator=(const Models&) = delete;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
static Models* instance();
shared_ptr<bayesnet::BaseClassifier> create(const std::string& name);
void registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
std::vector<string> getNames();
std::string tostring();
};
class Registrar {
public:
Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
};
}
#endif

39
src/Platform/Paths.h Normal file
View File

@@ -0,0 +1,39 @@
#ifndef PATHS_H
#define PATHS_H
#include <string>
#include <filesystem>
#include "DotEnv.h"
namespace platform {
class Paths {
public:
static std::string results() { return "results/"; }
static std::string hiddenResults() { return "hidden_results/"; }
static std::string excel() { return "excel/"; }
static std::string grid() { return "grid/"; }
static std::string datasets()
{
auto env = platform::DotEnv();
return env.get("source_data");
}
static void createPath(const std::string& path)
{
// Create directory if it does not exist
try {
std::filesystem::create_directory(path);
}
catch (std::exception& e) {
throw std::runtime_error("Could not create directory " + path);
}
}
static std::string excelResults() { return "some_results.xlsx"; }
static std::string grid_input(const std::string& model)
{
return grid() + "grid_" + model + "_input.json";
}
static std::string grid_output(const std::string& model)
{
return grid() + "grid_" + model + "_output.json";
}
};
}
#endif

113
src/Platform/ReportBase.cc Normal file
View File

@@ -0,0 +1,113 @@
#include <sstream>
#include <locale>
#include "Datasets.h"
#include "ReportBase.h"
#include "DotEnv.h"
namespace platform {
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
{
std::stringstream oss;
oss << "Better than ZeroR + " << std::setprecision(1) << fixed << margin * 100 << "%";
meaning = {
{Symbols::equal_best, "Equal to best"},
{Symbols::better_best, "Better than best"},
{Symbols::cross, "Less than or equal to ZeroR"},
{Symbols::upward_arrow, oss.str()}
};
}
std::string ReportBase::fromVector(const std::string& key)
{
std::stringstream oss;
std::string sep = "";
oss << "[";
for (auto& item : data[key]) {
oss << sep << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
std::string ReportBase::fVector(const std::string& title, const json& data, const int width, const int precision)
{
std::stringstream oss;
std::string sep = "";
oss << title << "[";
for (const auto& item : data) {
oss << sep << fixed << setw(width) << std::setprecision(precision) << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
void ReportBase::show()
{
header();
body();
}
std::string ReportBase::compareResult(const std::string& dataset, double result)
{
std::string status = " ";
if (compare) {
double best = bestResult(dataset, data["model"].get<std::string>());
if (result == best) {
status = Symbols::equal_best;
} else if (result > best) {
status = Symbols::better_best;
}
} else {
if (data["score_name"].get<std::string>() == "accuracy") {
auto dt = Datasets(false, Paths::datasets());
dt.loadDataset(dataset);
auto numClasses = dt.getNClasses(dataset);
if (numClasses == 2) {
std::vector<int> distribution = dt.getClassesCounts(dataset);
double nSamples = dt.getNSamples(dataset);
std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin);
if (mark > 1) {
mark = 0.9995;
}
status = result < mark ? Symbols::cross : result > mark ? Symbols::upward_arrow : "=";
}
}
}
if (status != " ") {
auto item = summary.find(status);
if (item != summary.end()) {
summary[status]++;
} else {
summary[status] = 1;
}
}
return status;
}
double ReportBase::bestResult(const std::string& dataset, const std::string& model)
{
double value = 0.0;
if (bestResults.size() == 0) {
// try to load the best results
std::string score = data["score_name"];
replace(score.begin(), score.end(), '_', '-');
std::string fileName = "best_results_" + score + "_" + model + ".json";
ifstream resultData(Paths::results() + "/" + fileName);
if (resultData.is_open()) {
bestResults = json::parse(resultData);
} else {
existBestFile = false;
}
}
try {
value = bestResults.at(dataset).at(0);
}
catch (exception) {
value = 1.0;
}
return value;
}
bool ReportBase::getExistBestFile()
{
return existBestFile;
}
}

36
src/Platform/ReportBase.h Normal file
View File

@@ -0,0 +1,36 @@
#ifndef REPORTBASE_H
#define REPORTBASE_H
#include <string>
#include <iostream>
#include "Paths.h"
#include "Symbols.h"
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
class ReportBase {
public:
explicit ReportBase(json data_, bool compare);
virtual ~ReportBase() = default;
void show();
protected:
json data;
std::string fromVector(const std::string& key);
std::string fVector(const std::string& title, const json& data, const int width, const int precision);
bool getExistBestFile();
virtual void header() = 0;
virtual void body() = 0;
virtual void showSummary() = 0;
std::string compareResult(const std::string& dataset, double result);
std::map<std::string, int> summary;
double margin;
std::map<std::string, std::string> meaning;
bool compare;
private:
double bestResult(const std::string& dataset, const std::string& model);
json bestResults;
bool existBestFile = true;
};
};
#endif

View File

@@ -0,0 +1,114 @@
#include <iostream>
#include <sstream>
#include <locale>
#include "ReportConsole.h"
#include "BestScore.h"
#include "CLocale.h"
namespace platform {
std::string ReportConsole::headerLine(const std::string& text, int utf = 0)
{
int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n;
return "* " + text + std::string(n + utf, ' ') + "*\n";
}
void ReportConsole::header()
{
std::stringstream oss;
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
std::cout << headerLine(
"Report " + data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>()
+ " with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size())
+ " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
);
std::cout << headerLine(data["title"].get<std::string>());
std::cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
oss << "Execution took " << std::setprecision(2) << std::fixed << data["duration"].get<float>()
<< " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<std::string>();
std::cout << headerLine(oss.str());
std::cout << headerLine("Score is " + data["score_name"].get<std::string>());
std::cout << std::string(MAXL, '*') << std::endl;
std::cout << std::endl;
}
void ReportConsole::body()
{
auto tmp = ConfigLocale();
int maxHyper = 15;
int maxDataset = 7;
for (const auto& r : data["results"]) {
maxHyper = std::max(maxHyper, (int)r["hyperparameters"].dump().size());
maxDataset = std::max(maxDataset, (int)r["dataset"].get<std::string>().size());
}
std::cout << Colors::GREEN() << " # " << std::setw(maxDataset) << std::left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << std::endl;
std::cout << "=== " << std::string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << std::string(maxHyper, '=') << std::endl;
json lastResult;
double totalScore = 0.0;
bool odd = true;
int index = 0;
for (const auto& r : data["results"]) {
if (selectedIndex != -1 && index != selectedIndex) {
index++;
continue;
}
auto color = odd ? Colors::CYAN() : Colors::BLUE();
std::cout << color;
std::cout << std::setw(3) << std::right << index++ << " ";
std::cout << std::setw(maxDataset) << std::left << r["dataset"].get<std::string>() << " ";
std::cout << std::setw(6) << std::right << r["samples"].get<int>() << " ";
std::cout << std::setw(5) << std::right << r["features"].get<int>() << " ";
std::cout << std::setw(3) << std::right << r["classes"].get<int>() << " ";
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["nodes"].get<float>() << " ";
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["leaves"].get<float>() << " ";
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["depth"].get<float>() << " ";
std::cout << std::setw(8) << std::right << std::setprecision(6) << std::fixed << r["score"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["score_std"].get<double>();
const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
std::cout << status;
std::cout << std::setw(12) << std::right << std::setprecision(6) << std::fixed << r["time"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["time_std"].get<double>() << " ";
std::cout << r["hyperparameters"].dump();
std::cout << std::endl;
std::cout << std::flush;
lastResult = r;
totalScore += r["score"].get<double>();
odd = !odd;
}
if (data["results"].size() == 1 || selectedIndex != -1) {
std::cout << std::string(MAXL, '*') << std::endl;
std::cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
std::cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
std::cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
std::cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
std::cout << std::string(MAXL, '*') << std::endl;
} else {
footer(totalScore);
}
}
void ReportConsole::showSummary()
{
for (const auto& item : summary) {
std::stringstream oss;
oss << std::setw(3) << std::left << item.first;
oss << std::setw(3) << std::right << item.second << " ";
oss << std::left << meaning.at(item.first);
std::cout << headerLine(oss.str(), 2);
}
}
void ReportConsole::footer(double totalScore)
{
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
showSummary();
auto score = data["score_name"].get<std::string>();
auto best = BestScore::getScore(score);
if (best.first != "") {
std::stringstream oss;
oss << score << " compared to " << best.first << " .: " << totalScore / best.second;
std::cout << headerLine(oss.str());
}
if (!getExistBestFile() && compare) {
std::cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
}
std::cout << std::string(MAXL, '*') << std::endl << Colors::RESET();
}
}

View File

@@ -0,0 +1,22 @@
#ifndef REPORTCONSOLE_H
#define REPORTCONSOLE_H
#include <string>
#include "ReportBase.h"
#include "Colors.h"
namespace platform {
const int MAXL = 133;
class ReportConsole : public ReportBase {
public:
explicit ReportConsole(json data_, bool compare = false, int index = -1) : ReportBase(data_, compare), selectedIndex(index) {};
virtual ~ReportConsole() = default;
private:
int selectedIndex;
std::string headerLine(const std::string& text, int utf);
void header() override;
void body() override;
void footer(double totalScore);
void showSummary() override;
};
};
#endif

180
src/Platform/ReportExcel.cc Normal file
View File

@@ -0,0 +1,180 @@
#include <sstream>
#include <locale>
#include "ReportExcel.h"
#include "BestScore.h"
namespace platform {
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet) : ReportBase(data_, compare), ExcelFile(workbook, worksheet)
{
createFile();
}
void ReportExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 6, 1);
std::vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void ReportExcel::createWorksheet()
{
const std::string name = data["model"].get<std::string>();
std::string suffix = "";
std::string efectiveName;
int num = 1;
// Create a sheet with the name of the model
while (true) {
efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = std::to_string(++num);
} else {
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break;
}
if (num > 100) {
throw std::invalid_argument("Couldn't create sheet " + efectiveName);
}
}
}
void ReportExcel::createFile()
{
if (workbook == NULL) {
workbook = workbook_new((Paths::excel() + Paths::excelResults()).c_str());
}
if (worksheet == NULL) {
createWorksheet();
}
setProperties(data["title"].get<std::string>());
createFormats();
formatColumns();
}
void ReportExcel::closeFile()
{
workbook_close(workbook);
}
void ReportExcel::header()
{
std::locale mylocale(std::cout.getloc(), new separated);
std::locale::global(mylocale);
std::cout.imbue(mylocale);
std::stringstream oss;
std::string message = data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>() + " " +
data["language"].get<std::string>() + " ver. " + data["language_version"].get<std::string>() +
" with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>();
worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<std::string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<std::string>()).c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() << " s";
worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
oss.str("");
oss.clear();
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() / 3600 << " h";
worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<std::string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Stratified: " << (data["stratified"].get<bool>() ? "True" : "False");
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False");
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
}
void ReportExcel::body()
{
auto head = std::vector<std::string>(
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time",
"Time Std.", "Hyperparameters" });
int col = 0;
for (const auto& item : head) {
writeString(5, col++, item, "bodyHeader");
}
row = 6;
col = 0;
int hypSize = 22;
json lastResult;
double totalScore = 0.0;
std::string hyperparameters;
for (const auto& r : data["results"]) {
writeString(row, col, r["dataset"].get<std::string>(), "text");
writeInt(row, col + 1, r["samples"].get<int>(), "ints");
writeInt(row, col + 2, r["features"].get<int>(), "ints");
writeInt(row, col + 3, r["classes"].get<int>(), "ints");
writeDouble(row, col + 4, r["nodes"].get<float>(), "floats");
writeDouble(row, col + 5, r["leaves"].get<float>(), "floats");
writeDouble(row, col + 6, r["depth"].get<double>(), "floats");
writeDouble(row, col + 7, r["score"].get<double>(), "result");
writeDouble(row, col + 8, r["score_std"].get<double>(), "result");
const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
writeString(row, col + 9, status, "textCentered");
writeDouble(row, col + 10, r["time"].get<double>(), "time");
writeDouble(row, col + 11, r["time_std"].get<double>(), "time");
hyperparameters = r["hyperparameters"].dump();
if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, col + 12, hyperparameters, "text");
lastResult = r;
totalScore += r["score"].get<double>();
row++;
}
// Set the right column width of hyperparameters with the maximum length
worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL);
// Show totals if only one dataset is present in the result
if (data["results"].size() == 1) {
for (const std::string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
row++;
col = 1;
writeString(row, col, group, "text");
for (double item : lastResult[group]) {
std::string style = group.find("scores") != std::string::npos ? "result" : "time";
writeDouble(row, ++col, item, style);
}
}
// Set with of columns to show those totals completely
worksheet_set_column(worksheet, 1, 1, 12, NULL);
for (int i = 2; i < 7; ++i) {
// doesn't work with from col to col, so...
worksheet_set_column(worksheet, i, i, 15, NULL);
}
} else {
footer(totalScore, row);
}
}
void ReportExcel::showSummary()
{
for (const auto& item : summary) {
worksheet_write_string(worksheet, row + 2, 1, item.first.c_str(), styles["summaryStyle"]);
worksheet_write_number(worksheet, row + 2, 2, item.second, styles["summaryStyle"]);
worksheet_merge_range(worksheet, row + 2, 3, row + 2, 5, meaning.at(item.first).c_str(), styles["summaryStyle"]);
row += 1;
}
}
void ReportExcel::footer(double totalScore, int row)
{
showSummary();
row += 4 + summary.size();
auto score = data["score_name"].get<std::string>();
auto best = BestScore::getScore(score);
if (best.first != "") {
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + best.first + " .:").c_str(), efectiveStyle("text"));
writeDouble(row, 6, totalScore / best.second, "result");
}
if (!getExistBestFile() && compare) {
worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]);
}
}
}

View File

@@ -0,0 +1,24 @@
#ifndef REPORTEXCEL_H
#define REPORTEXCEL_H
#include<map>
#include "xlsxwriter.h"
#include "ReportBase.h"
#include "ExcelFile.h"
#include "Colors.h"
namespace platform {
class ReportExcel : public ReportBase, public ExcelFile {
public:
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL);
private:
void formatColumns();
void createFile();
void createWorksheet();
void closeFile();
void header() override;
void body() override;
void showSummary() override;
void footer(double totalScore, int row);
};
};
#endif // !REPORTEXCEL_H

58
src/Platform/Result.cc Normal file
View File

@@ -0,0 +1,58 @@
#include "Result.h"
#include "BestScore.h"
#include <filesystem>
#include <fstream>
#include <sstream>
#include "Colors.h"
#include "DotEnv.h"
#include "CLocale.h"
namespace platform {
Result::Result(const std::string& path, const std::string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
auto best = BestScore::getScore(scoreName);
if (best.first != "") {
score /= best.second;
}
title = data["title"];
duration = data["duration"];
model = data["model"];
complete = data["results"].size() > 1;
}
json Result::load() const
{
std::ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw std::invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
std::string Result::to_string(int maxModel) const
{
auto tmp = ConfigLocale();
std::stringstream oss;
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
oss << date << " ";
oss << std::setw(maxModel) << std::left << model << " ";
oss << std::setw(11) << std::left << scoreName << " ";
oss << std::right << std::setw(11) << std::setprecision(7) << std::fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << std::setw(1) << " " << completeString << " ";
oss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
oss << std::setw(50) << std::left << title << " ";
return oss.str();
}
}

35
src/Platform/Result.h Normal file
View File

@@ -0,0 +1,35 @@
#ifndef RESULT_H
#define RESULT_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
class Result {
public:
Result(const std::string& path, const std::string& filename);
json load() const;
std::string to_string(int maxModel) const;
std::string getFilename() const { return filename; };
std::string getDate() const { return date; };
double getScore() const { return score; };
std::string getTitle() const { return title; };
double getDuration() const { return duration; };
std::string getModel() const { return model; };
std::string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; };
private:
std::string path;
std::string filename;
std::string date;
double score;
std::string title;
double duration;
std::string model;
std::string scoreName;
bool complete;
};
};
#endif

74
src/Platform/Results.cc Normal file
View File

@@ -0,0 +1,74 @@
#include "Results.h"
#include <algorithm>
namespace platform {
Results::Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial) :
path(path), model(model), scoreName(score), complete(complete), partial(partial)
{
load();
if (!files.empty()) {
maxModel = (*max_element(files.begin(), files.end(), [](const Result& a, const Result& b) { return a.getModel().size() < b.getModel().size(); })).getModel().size();
} else {
maxModel = 0;
}
};
void Results::load()
{
using std::filesystem::directory_iterator;
for (const auto& file : directory_iterator(path)) {
auto filename = file.path().filename().string();
if (filename.find(".json") != std::string::npos && filename.find("results_") == 0) {
auto result = Result(path, filename);
bool addResult = true;
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete())
addResult = false;
if (addResult)
files.push_back(result);
}
}
}
void Results::hideResult(int index, const std::string& pathHidden)
{
auto filename = files.at(index).getFilename();
rename((path + "/" + filename).c_str(), (pathHidden + "/" + filename).c_str());
files.erase(files.begin() + index);
}
void Results::deleteResult(int index)
{
auto filename = files.at(index).getFilename();
remove((path + "/" + filename).c_str());
files.erase(files.begin() + index);
}
int Results::size() const
{
return files.size();
}
void Results::sortDate()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDate() > b.getDate();
});
}
void Results::sortModel()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getModel() > b.getModel();
});
}
void Results::sortDuration()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDuration() > b.getDuration();
});
}
void Results::sortScore()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getScore() > b.getScore();
});
}
bool Results::empty() const
{
return files.empty();
}
}

36
src/Platform/Results.h Normal file
View File

@@ -0,0 +1,36 @@
#ifndef RESULTS_H
#define RESULTS_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
#include "Result.h"
namespace platform {
using json = nlohmann::json;
class Results {
public:
Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial);
void sortDate();
void sortScore();
void sortModel();
void sortDuration();
int maxModelSize() const { return maxModel; };
void hideResult(int index, const std::string& pathHidden);
void deleteResult(int index);
int size() const;
bool empty() const;
std::vector<Result>::iterator begin() { return files.begin(); };
std::vector<Result>::iterator end() { return files.end(); };
Result& at(int index) { return files.at(index); };
private:
std::string path;
std::string model;
std::string scoreName;
bool complete;
bool partial;
int maxModel;
std::vector<Result> files;
void load(); // Loads the list of results
};
};
#endif

252
src/Platform/Statistics.cc Normal file
View File

@@ -0,0 +1,252 @@
#include <sstream>
#include "Statistics.h"
#include "Colors.h"
#include "Symbols.h"
#include <boost/math/distributions/chi_squared.hpp>
#include <boost/math/distributions/normal.hpp>
#include "CLocale.h"
namespace platform {
Statistics::Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance, bool output) :
models(models), datasets(datasets), data(data), significance(significance), output(output)
{
nModels = models.size();
nDatasets = datasets.size();
auto temp = ConfigLocale();
};
void Statistics::fit()
{
if (nModels < 3 || nDatasets < 3) {
std::cerr << "nModels: " << nModels << std::endl;
std::cerr << "nDatasets: " << nDatasets << std::endl;
throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
}
ranksModels.clear();
computeRanks();
// Set the control model as the one with the lowest average rank
controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
computeWTL();
maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
fitted = true;
}
std::map<std::string, float> assignRanks(std::vector<std::pair<std::string, double>>& ranksOrder)
{
// sort the ranksOrder std::vector by value
std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair<std::string, double>& a, const std::pair<std::string, double>& b) {
return a.second > b.second;
});
//Assign ranks to values and if they are the same they share the same averaged rank
std::map<std::string, float> ranks;
for (int i = 0; i < ranksOrder.size(); i++) {
ranks[ranksOrder[i].first] = i + 1.0;
}
int i = 0;
while (i < static_cast<int>(ranksOrder.size())) {
int j = i + 1;
int sumRanks = ranks[ranksOrder[i].first];
while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
sumRanks += ranks[ranksOrder[j++].first];
}
if (j > i + 1) {
float averageRank = (float)sumRanks / (j - i);
for (int k = i; k < j; k++) {
ranks[ranksOrder[k].first] = averageRank;
}
}
i = j;
}
return ranks;
}
void Statistics::computeRanks()
{
std::map<std::string, float> ranksLine;
for (const auto& dataset : datasets) {
std::vector<std::pair<std::string, double>> ranksOrder;
for (const auto& model : models) {
double value = data[model].at(dataset).at(0).get<double>();
ranksOrder.push_back({ model, value });
}
// Assign the ranks
ranksLine = assignRanks(ranksOrder);
// Store the ranks of the dataset
ranksModels[dataset] = ranksLine;
if (ranks.size() == 0) {
ranks = ranksLine;
} else {
for (const auto& rank : ranksLine) {
ranks[rank.first] += rank.second;
}
}
}
// Average the ranks
for (const auto& rank : ranks) {
ranks[rank.first] /= nDatasets;
}
}
void Statistics::computeWTL()
{
// Compute the WTL matrix
for (int i = 0; i < nModels; ++i) {
wtl[i] = { 0, 0, 0 };
}
json origin = data.begin().value();
for (auto const& item : origin.items()) {
auto controlModel = models.at(controlIdx);
double controlValue = data[controlModel].at(item.key()).at(0).get<double>();
for (int i = 0; i < nModels; ++i) {
if (i == controlIdx) {
continue;
}
double value = data[models[i]].at(item.key()).at(0).get<double>();
if (value < controlValue) {
wtl[i].win++;
} else if (value == controlValue) {
wtl[i].tie++;
} else {
wtl[i].loss++;
}
}
}
}
void Statistics::postHocHolmTest(bool friedmanResult)
{
if (!fitted) {
fit();
}
std::stringstream oss;
// Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
// Post-hoc Holm test
// Calculate the p-value for the models paired with the control model
std::map<int, double> stats; // p-value of each model paired with the control model
boost::math::normal dist(0.0, 1.0);
double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
for (int i = 0; i < nModels; i++) {
if (i == controlIdx) {
stats[i] = 0.0;
continue;
}
double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
double p_value = (long double)2 * (1 - cdf(dist, z));
stats[i] = p_value;
}
// Sort the models by p-value
std::vector<std::pair<int, double>> statsOrder;
for (const auto& stat : stats) {
statsOrder.push_back({ stat.first, stat.second });
}
std::sort(statsOrder.begin(), statsOrder.end(), [](const std::pair<int, double>& a, const std::pair<int, double>& b) {
return a.second < b.second;
});
// Holm adjustment
for (int i = 0; i < statsOrder.size(); ++i) {
auto item = statsOrder.at(i);
double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
double p_value = std::min((double)1.0, item.second * (nModels - i));
p_value = std::max(before, p_value);
statsOrder[i] = { item.first, p_value };
}
holmResult.model = models.at(controlIdx);
auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
oss << color;
oss << " *************************************************************************************************************" << std::endl;
oss << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl;
oss << " Control model: " << models.at(controlIdx) << std::endl;
oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl;
oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl;
// sort ranks from lowest to highest
std::vector<std::pair<std::string, float>> ranksOrder;
for (const auto& rank : ranks) {
ranksOrder.push_back({ rank.first, rank.second });
}
std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair<std::string, float>& a, const std::pair<std::string, float>& b) {
return a.second < b.second;
});
// Show the control model info.
oss << " " << Colors::BLUE() << std::left << std::setw(maxModelName) << ranksOrder.at(0).first << " ";
oss << std::setw(12) << " " << std::setprecision(7) << std::fixed << " " << ranksOrder.at(0).second << std::endl;
for (const auto& item : ranksOrder) {
auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
double pvalue = 0.0;
for (const auto& stat : statsOrder) {
if (stat.first == idx) {
pvalue = stat.second;
}
}
holmResult.holmLines.push_back({ item.first, pvalue, item.second, wtl.at(idx), pvalue < significance });
if (item.first == models.at(controlIdx)) {
continue;
}
auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
oss << " " << colorStatus << std::left << std::setw(maxModelName) << item.first << " ";
oss << std::setprecision(6) << std::scientific << pvalue << std::setprecision(7) << std::fixed << " " << item.second;
oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss;
oss << " " << status << textStatus << std::endl;
}
oss << color << " *************************************************************************************************************" << std::endl;
oss << Colors::RESET();
if (output) {
std::cout << oss.str();
}
}
bool Statistics::friedmanTest()
{
if (!fitted) {
fit();
}
std::stringstream oss;
// Friedman test
// Calculate the Friedman statistic
oss << Colors::BLUE() << std::endl;
oss << "***************************************************************************************************************" << std::endl;
oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl;
double degreesOfFreedom = nModels - 1.0;
double sumSquared = 0;
for (const auto& rank : ranks) {
sumSquared += pow(rank.second, 2);
}
// Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
// Calculate the critical value
boost::math::chi_squared chiSquared(degreesOfFreedom);
long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
double criticalValue = quantile(chiSquared, 1 - significance);
oss << "Friedman statistic: " << friedmanQ << std::endl;
oss << "Critical χ2 Value for df=" << std::fixed << (int)degreesOfFreedom
<< " and alpha=" << std::setprecision(2) << std::fixed << significance << ": " << std::setprecision(7) << std::scientific << criticalValue << std::endl;
oss << "p-value: " << std::scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << std::setprecision(2) << std::fixed << significance << std::endl;
bool result;
if (p_value < significance) {
oss << Colors::GREEN() << "The null hypothesis H0 is rejected." << std::endl;
result = true;
} else {
oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl;
result = false;
}
oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << std::endl;
if (output) {
std::cout << oss.str();
}
friedmanResult = { friedmanQ, criticalValue, p_value, result };
return result;
}
FriedmanResult& Statistics::getFriedmanResult()
{
return friedmanResult;
}
HolmResult& Statistics::getHolmResult()
{
return holmResult;
}
std::map<std::string, std::map<std::string, float>>& Statistics::getRanks()
{
return ranksModels;
}
} // namespace platform

63
src/Platform/Statistics.h Normal file
View File

@@ -0,0 +1,63 @@
#ifndef STATISTICS_H
#define STATISTICS_H
#include <iostream>
#include <vector>
#include <map>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
struct WTL {
int win;
int tie;
int loss;
};
struct FriedmanResult {
double statistic;
double criticalValue;
long double pvalue;
bool reject;
};
struct HolmLine {
std::string model;
long double pvalue;
double rank;
WTL wtl;
bool reject;
};
struct HolmResult {
std::string model;
std::vector<HolmLine> holmLines;
};
class Statistics {
public:
Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true);
bool friedmanTest();
void postHocHolmTest(bool friedmanResult);
FriedmanResult& getFriedmanResult();
HolmResult& getHolmResult();
std::map<std::string, std::map<std::string, float>>& getRanks();
private:
void fit();
void computeRanks();
void computeWTL();
const std::vector<std::string>& models;
const std::vector<std::string>& datasets;
const json& data;
double significance;
bool output;
bool fitted = false;
int nModels = 0;
int nDatasets = 0;
int controlIdx = 0;
std::map<int, WTL> wtl;
std::map<std::string, float> ranks;
int maxModelName = 0;
int maxDatasetName = 0;
FriedmanResult friedmanResult;
HolmResult holmResult;
std::map<std::string, std::map<std::string, float>> ranksModels;
};
}
#endif // !STATISTICS_H

17
src/Platform/Symbols.h Normal file
View File

@@ -0,0 +1,17 @@
#ifndef SYMBOLS_H
#define SYMBOLS_H
#include <string>
namespace platform {
class Symbols {
public:
inline static const std::string check_mark{ "\u2714" };
inline static const std::string exclamation{ "\u2757" };
inline static const std::string black_star{ "\u2605" };
inline static const std::string cross{ "\u2717" };
inline static const std::string upward_arrow{ "\u27B6" };
inline static const std::string down_arrow{ "\u27B4" };
inline static const std::string equal_best{ check_mark };
inline static const std::string better_best{ black_star };
};
}
#endif // !SYMBOLS_H

43
src/Platform/Timer.h Normal file
View File

@@ -0,0 +1,43 @@
#ifndef TIMER_H
#define TIMER_H
#include <chrono>
#include <string>
#include <sstream>
namespace platform {
class Timer {
private:
std::chrono::high_resolution_clock::time_point begin;
std::chrono::high_resolution_clock::time_point end;
public:
Timer() = default;
~Timer() = default;
void start() { begin = std::chrono::high_resolution_clock::now(); }
void stop() { end = std::chrono::high_resolution_clock::now(); }
double getDuration()
{
stop();
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (end - begin);
return time_span.count();
}
double getLapse()
{
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (std::chrono::high_resolution_clock::now() - begin);
return time_span.count();
}
std::string getDurationString(bool lapse = false)
{
double duration = lapse ? getLapse() : getDuration();
return translate2String(duration);
}
std::string translate2String(double duration)
{
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
std::stringstream ss;
ss << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit;
return ss.str();
}
};
} /* namespace platform */
#endif /* TIMER_H */

30
src/Platform/Utils.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef UTILS_H
#define UTILS_H
#include <sstream>
#include <string>
#include <vector>
namespace platform {
//static std::vector<std::string> split(const std::string& text, char delimiter);
static std::vector<std::string> split(const std::string& text, char delimiter)
{
std::vector<std::string> result;
std::stringstream ss(text);
std::string token;
while (std::getline(ss, token, delimiter)) {
result.push_back(token);
}
return result;
}
static std::string trim(const std::string& str)
{
std::string result = str;
result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) {
return !std::isspace(ch);
}));
result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), result.end());
return result;
}
}
#endif

85
src/Platform/b_best.cc Normal file
View File

@@ -0,0 +1,85 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "Paths.h"
#include "BestResults.h"
#include "Colors.h"
#include "config.h"
void manageArguments(argparse::ArgumentParser& program, int argc, char** argv)
{
program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true);
program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const std::string& value) {
try {
auto k = std::stod(value);
if (k < 0.01 || k > 0.15) {
throw std::runtime_error("Significance level hast to be a number in [0.01, 0.15]");
}
return k;
}
catch (const std::runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an decimal number");
}});
}
int main(int argc, char** argv)
{
argparse::ArgumentParser program("b_best", { project_version.begin(), project_version.end() });
manageArguments(program, argc, argv);
std::string model, score;
bool build, report, friedman, excel;
double level;
try {
program.parse_args(argc, argv);
model = program.get<std::string>("model");
score = program.get<std::string>("score");
build = program.get<bool>("build");
report = program.get<bool>("report");
friedman = program.get<bool>("friedman");
excel = program.get<bool>("excel");
level = program.get<double>("level");
if (model == "" || score == "") {
throw std::runtime_error("Model and score name must be supplied");
}
if (friedman && model != "any") {
std::cerr << "Friedman test can only be used with all models" << std::endl;
std::cerr << program;
exit(1);
}
if (!report && !build) {
std::cerr << "Either build, report or both, have to be selected to do anything!" << std::endl;
std::cerr << program;
exit(1);
}
}
catch (const std::exception& err) {
std::cerr << err.what() << std::endl;
std::cerr << program;
exit(1);
}
// Generate report
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman, level);
if (build) {
if (model == "any") {
results.buildAll();
} else {
std::string fileName = results.build();
std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl;
}
}
if (report) {
if (model == "any") {
results.reportAll(excel);
} else {
results.reportSingle(excel);
}
}
return 0;
}

232
src/Platform/b_grid.cc Normal file
View File

@@ -0,0 +1,232 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include <map>
#include <nlohmann/json.hpp>
#include <mpi.h>
#include "DotEnv.h"
#include "Models.h"
#include "modelRegister.h"
#include "GridSearch.h"
#include "Paths.h"
#include "Timer.h"
#include "Colors.h"
#include "config.h"
using json = nlohmann::json;
const int MAXL = 133;
void manageArguments(argparse::ArgumentParser& program)
{
auto env = platform::DotEnv();
auto& group = program.add_mutually_exclusive_group(true);
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring());
}
);
group.add_argument("--dump").help("Show the grid combinations").default_value(false).implicit_value(true);
group.add_argument("--report").help("Report the computed hyperparameters").default_value(false).implicit_value(true);
group.add_argument("--compute").help("Perform computation of the grid output hyperparameters").default_value(false).implicit_value(true);
program.add_argument("--discretize").help("Discretize input datasets").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
program.add_argument("--continue").help("Continue computing from that dataset").default_value(platform::GridSearch::NO_CONTINUE());
program.add_argument("--only").help("Used with continue to compute that dataset only").default_value(false).implicit_value(true);
program.add_argument("--exclude").default_value("[]").help("Datasets to exclude in json format, e.g. [\"dataset1\", \"dataset2\"]");
program.add_argument("--nested").help("Set the double/nested cross validation number of folds").default_value(5).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of nested folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of nested folds must be an integer");
}});
program.add_argument("--score").help("Score used in gridsearch").default_value("accuracy");
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an integer");
}});
auto seed_values = env.getSeeds();
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
}
void list_dump(std::string& model)
{
auto data = platform::GridData(platform::Paths::grid_input(model));
std::cout << Colors::MAGENTA() << "Listing configuration input file (Grid)" << std::endl << std::endl;
int index = 0;
int max_hyper = 15;
int max_dataset = 7;
auto combinations = data.getGridFile();
for (auto const& item : combinations) {
if (item.first.size() > max_dataset) {
max_dataset = item.first.size();
}
if (item.second.dump().size() > max_hyper) {
max_hyper = item.second.dump().size();
}
}
std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. "
<< setw(max_hyper) << "Hyperparameters" << std::endl;
std::cout << "=== " << string(max_dataset, '=') << " ===== " << string(max_hyper, '=') << std::endl;
bool odd = true;
for (auto const& item : combinations) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
std::cout << color;
auto num_combinations = data.getNumCombinations(item.first);
std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first
<< " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << item.second.dump() << std::endl;
odd = !odd;
}
std::cout << Colors::RESET() << std::endl;
}
std::string headerLine(const std::string& text, int utf = 0)
{
int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n;
return "* " + text + std::string(n + utf, ' ') + "*\n";
}
void list_results(json& results, std::string& model)
{
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
std::cout << headerLine("Listing computed hyperparameters for model " + model);
std::cout << headerLine("Date & time: " + results["date"].get<std::string>() + " Duration: " + results["duration"].get<std::string>());
std::cout << headerLine("Score: " + results["score"].get<std::string>());
std::cout << headerLine(
"Random seeds: " + results["seeds"].dump()
+ " Discretized: " + (results["discretize"].get<bool>() ? "True" : "False")
+ " Stratified: " + (results["stratified"].get<bool>() ? "True" : "False")
+ " #Folds: " + std::to_string(results["n_folds"].get<int>())
+ " Nested: " + (results["nested"].get<int>() == 0 ? "False" : to_string(results["nested"].get<int>()))
);
std::cout << std::string(MAXL, '*') << std::endl;
int spaces = 7;
int hyperparameters_spaces = 15;
for (const auto& item : results["results"].items()) {
auto key = item.key();
auto value = item.value();
if (key.size() > spaces) {
spaces = key.size();
}
if (value["hyperparameters"].dump().size() > hyperparameters_spaces) {
hyperparameters_spaces = value["hyperparameters"].dump().size();
}
}
std::cout << Colors::GREEN() << " # " << left << setw(spaces) << "Dataset" << " " << setw(19) << "Date" << " "
<< "Duration " << setw(8) << "Score" << " " << "Hyperparameters" << std::endl;
std::cout << "=== " << string(spaces, '=') << " " << string(19, '=') << " " << string(8, '=') << " "
<< string(8, '=') << " " << string(hyperparameters_spaces, '=') << std::endl;
bool odd = true;
int index = 0;
for (const auto& item : results["results"].items()) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
auto value = item.value();
std::cout << color;
std::cout << std::setw(3) << std::right << index++ << " ";
std::cout << left << setw(spaces) << item.key() << " " << value["date"].get<string>()
<< " " << setw(8) << right << value["duration"].get<string>() << " " << setw(8) << setprecision(6)
<< fixed << right << value["score"].get<double>() << " " << value["hyperparameters"].dump() << std::endl;
odd = !odd;
}
std::cout << Colors::RESET() << std::endl;
}
/*
* Main
*/
int main(int argc, char** argv)
{
argparse::ArgumentParser program("b_grid", { project_version.begin(), project_version.end() });
manageArguments(program);
struct platform::ConfigGrid config;
bool dump, compute;
try {
program.parse_args(argc, argv);
config.model = program.get<std::string>("model");
config.score = program.get<std::string>("score");
config.discretize = program.get<bool>("discretize");
config.stratified = program.get<bool>("stratified");
config.n_folds = program.get<int>("folds");
config.quiet = program.get<bool>("quiet");
config.only = program.get<bool>("only");
config.seeds = program.get<std::vector<int>>("seeds");
config.nested = program.get<int>("nested");
config.continue_from = program.get<std::string>("continue");
if (config.continue_from == platform::GridSearch::NO_CONTINUE() && config.only) {
throw std::runtime_error("Cannot use --only without --continue");
}
dump = program.get<bool>("dump");
compute = program.get<bool>("compute");
if (dump && (config.continue_from != platform::GridSearch::NO_CONTINUE() || config.only)) {
throw std::runtime_error("Cannot use --dump with --continue or --only");
}
auto excluded = program.get<std::string>("exclude");
config.excluded = json::parse(excluded);
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
/*
* Begin Processing
*/
auto env = platform::DotEnv();
config.platform = env.get("platform");
platform::Paths::createPath(platform::Paths::grid());
auto grid_search = platform::GridSearch(config);
platform::Timer timer;
timer.start();
if (dump) {
list_dump(config.model);
} else {
if (compute) {
struct platform::ConfigMPI mpi_config;
mpi_config.manager = 0; // which process is the manager
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_config.rank);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_config.n_procs);
if (mpi_config.n_procs < 2) {
throw std::runtime_error("Cannot use --compute with less than 2 mpi processes, try mpirun -np 2 ...");
}
grid_search.go(mpi_config);
if (mpi_config.rank == mpi_config.manager) {
auto results = grid_search.loadResults();
list_results(results, config.model);
std::cout << "Process took " << timer.getDurationString() << std::endl;
}
MPI_Finalize();
} else {
// List results
auto results = grid_search.loadResults();
if (results.empty()) {
std::cout << "** No results found" << std::endl;
} else {
list_results(results, config.model);
}
}
}
std::cout << "Done!" << std::endl;
return 0;
}

56
src/Platform/b_list.cc Normal file
View File

@@ -0,0 +1,56 @@
#include <iostream>
#include <locale>
#include "Paths.h"
#include "Colors.h"
#include "Datasets.h"
const int BALANCE_LENGTH = 75;
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
std::string do_grouping() const { return "\03"; }
};
void outputBalance(const std::string& balance)
{
auto temp = std::string(balance);
while (temp.size() > BALANCE_LENGTH - 1) {
auto part = temp.substr(0, BALANCE_LENGTH);
std::cout << part << std::endl;
std::cout << setw(48) << " ";
temp = temp.substr(BALANCE_LENGTH);
}
std::cout << temp << std::endl;
}
int main(int argc, char** argv)
{
auto data = platform::Datasets(false, platform::Paths::datasets());
locale mylocale(std::cout.getloc(), new separated);
locale::global(mylocale);
std::cout.imbue(mylocale);
std::cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << std::endl;
std::string balanceBars = std::string(BALANCE_LENGTH, '=');
std::cout << "============================== ====== ===== === " << balanceBars << std::endl;
bool odd = true;
for (const auto& dataset : data.getNames()) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
std::cout << color << setw(30) << left << dataset << " ";
data.loadDataset(dataset);
auto nSamples = data.getNSamples(dataset);
std::cout << setw(6) << right << nSamples << " ";
std::cout << setw(5) << right << data.getFeatures(dataset).size() << " ";
std::cout << setw(3) << right << data.getNClasses(dataset) << " ";
std::stringstream oss;
std::string sep = "";
for (auto number : data.getClassesCounts(dataset)) {
oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
sep = " / ";
}
outputBalance(oss.str());
odd = !odd;
}
std::cout << Colors::RESET() << std::endl;
return 0;
}

135
src/Platform/b_main.cc Normal file
View File

@@ -0,0 +1,135 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "Experiment.h"
#include "Datasets.h"
#include "DotEnv.h"
#include "Models.h"
#include "modelRegister.h"
#include "Paths.h"
#include "config.h"
using json = nlohmann::json;
void manageArguments(argparse::ArgumentParser& program)
{
auto env = platform::DotEnv();
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring());
}
);
program.add_argument("--title").default_value("").help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an integer");
}});
auto seed_values = env.getSeeds();
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
}
int main(int argc, char** argv)
{
argparse::ArgumentParser program("b_main", { project_version.begin(), project_version.end() });
manageArguments(program);
std::string file_name, model_name, title, hyperparameters_file;
json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet;
std::vector<int> seeds;
std::vector<std::string> filesToTest;
int n_folds;
try {
program.parse_args(argc, argv);
file_name = program.get<std::string>("dataset");
model_name = program.get<std::string>("model");
discretize_dataset = program.get<bool>("discretize");
stratified = program.get<bool>("stratified");
quiet = program.get<bool>("quiet");
n_folds = program.get<int>("folds");
seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters);
hyperparameters_file = program.get<std::string>("hyper-file");
if (hyperparameters_file != "" && hyperparameters != "{}") {
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
}
title = program.get<std::string>("title");
if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided");
}
saveResults = program.get<bool>("save");
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets());
if (file_name != "") {
if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << std::endl;
exit(1);
}
if (title == "") {
title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds";
}
filesToTest.push_back(file_name);
} else {
filesToTest = datasets.getNames();
saveResults = true;
}
platform::HyperParameters test_hyperparams;
if (hyperparameters_file != "") {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file);
} else {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
}
/*
* Begin Processing
*/
auto env = platform::DotEnv();
auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
experiment.setHyperparameters(test_hyperparams);
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}
platform::Timer timer;
timer.start();
experiment.go(filesToTest, quiet);
experiment.setDuration(timer.getDuration());
if (saveResults) {
experiment.save(platform::Paths::results());
}
if (!quiet)
experiment.report();
std::cout << "Done!" << std::endl;
return 0;
}

49
src/Platform/b_manage.cc Normal file
View File

@@ -0,0 +1,49 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "ManageResults.h"
#include "config.h"
void manageArguments(argparse::ArgumentParser& program, int argc, char** argv)
{
program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>();
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true);
program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true);
program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true);
try {
program.parse_args(argc, argv);
auto number = program.get<int>("number");
if (number < 0) {
throw std::runtime_error("Number of results must be greater than or equal to 0");
}
auto model = program.get<std::string>("model");
auto score = program.get<std::string>("score");
auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
}
catch (const std::exception& err) {
std::cerr << err.what() << std::endl;
std::cerr << program;
exit(1);
}
}
int main(int argc, char** argv)
{
auto program = argparse::ArgumentParser("b_manage", { project_version.begin(), project_version.end() });
manageArguments(program, argc, argv);
int number = program.get<int>("number");
std::string model = program.get<std::string>("model");
std::string score = program.get<std::string>("score");
auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
if (complete)
partial = false;
auto manager = platform::ManageResults(number, model, score, complete, partial, compare);
manager.doMenu();
return 0;
}

View File

@@ -0,0 +1,29 @@
#ifndef MODEL_REGISTER_H
#define MODEL_REGISTER_H
static platform::Registrar registrarT("TAN",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
static platform::Registrar registrarTLD("TANLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
static platform::Registrar registrarS("SPODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
static platform::Registrar registrarSLD("SPODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
static platform::Registrar registrarK("KDB",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);});
static platform::Registrar registrarKLD("KDBLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
static platform::Registrar registrarA("AODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
static platform::Registrar registrarBA("BoostAODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
static platform::Registrar registrarSt("STree",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::STree();});
static platform::Registrar registrarOdte("Odte",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::ODTE();});
static platform::Registrar registrarSvc("SVC",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::SVC();});
static platform::Registrar registrarRaF("RandomForest",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();});
#endif

15
tests/CMakeLists.txt Normal file
View File

@@ -0,0 +1,15 @@
if(ENABLE_TESTING)
set(TEST_PLATFORM "unit_tests_platform")
include_directories(
${BayesNet_SOURCE_DIR}/src/BayesNet
${BayesNet_SOURCE_DIR}/src/Platform
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR}/lib/argparse/include
)
set(TEST_SOURCES_PLATFORM TestUtils.cc)
add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM})
target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)
add_test(NAME ${TEST_PLATFORM} COMMAND ${TEST_PLATFORM})
endif(ENABLE_TESTING)

104
tests/TestUtils.cc Normal file
View File

@@ -0,0 +1,104 @@
#include "TestUtils.h"
class Paths {
public:
static std::string datasets()
{
return "../../data/";
}
};
pair<std::vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<std::string> features)
{
std::vector<mdlp::labels_t> Xd;
map<std::string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xd.push_back(xd);
}
return { Xd, maxes };
}
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{
std::vector<mdlp::labels_t> Xd;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
Xd.push_back(xd);
}
return Xd;
}
bool file_exists(const std::std::string& name)
{
if (FILE* file = fopen(name.c_str(), "r")) {
fclose(file);
return true;
} else {
return false;
}
}
tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::std::string& name, bool class_last, bool discretize_dataset)
{
auto handler = ArffFiles();
handler.load(Paths::datasets() + static_cast<std::string>(name) + ".arff", class_last);
// Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
std::vector<std::string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
torch::Tensor Xd;
auto states = map<std::string, std::vector<int>>();
if (discretize_dataset) {
auto Xr = discretizeDataset(X, y);
Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
}
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0);
} else {
Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) {
Xd.index_put_({ i, "..." }, torch::tensor(X[i]));
}
}
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
}
tuple<std::vector<std::vector<int>>, std::vector<int>, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadFile(const std::std::string& name)
{
auto handler = ArffFiles();
handler.load(Paths::datasets() + static_cast<std::string>(name) + ".arff");
// Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
std::vector<std::string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
// Discretize Dataset
std::vector<mdlp::labels_t> Xd;
map<std::string, int> maxes;
tie(Xd, maxes) = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<std::string, std::vector<int>> states;
for (auto feature : features) {
states[feature] = std::vector<int>(maxes[feature]);
}
states[className] = std::vector<int>(maxes[className]);
return { Xd, y, features, className, states };
}

43
tests/TestUtils.h Normal file
View File

@@ -0,0 +1,43 @@
#ifndef TEST_UTILS_H
#define TEST_UTILS_H
#include <torch/torch.h>
#include <string>
#include <vector>
#include <map>
#include <std::tuple>
#include "ArffFiles.h"
#include "CPPFImdlp.h"
bool file_exists(const std::std::string& name);
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
std::tuple<vector<vector<int>>, std::vector<int>, std::vector<string>, std::string, map<std::string, std::vector<int>>> loadFile(const std::string& name);
std::tuple<torch::Tensor, torch::Tensor, std::vector<string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last, bool discretize_dataset);
class RawDatasets {
public:
RawDatasets(const std::string& file_name, bool discretize)
{
// Xt can be either discretized or not
tie(Xt, yt, featurest, classNamet, statest) = loadDataset(file_name, true, discretize);
// Xv is always discretized
tie(Xv, yv, featuresv, classNamev, statesv) = loadFile(file_name);
auto yresized = torch::transpose(yt.view({ yt.size(0), 1 }), 0, 1);
dataset = torch::cat({ Xt, yresized }, 0);
nSamples = dataset.size(1);
weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble);
weightsv = std::vector<double>(nSamples, 1.0 / nSamples);
classNumStates = discretize ? statest.at(classNamet).size() : 0;
}
torch::Tensor Xt, yt, dataset, weights;
std::vector<vector<int>> Xv;
std::vector<double> weightsv;
std::vector<int> yv;
std::vector<string> featurest, featuresv;
map<std::string, std::vector<int>> statest, statesv;
std::string classNamet, classNamev;
int nSamples, classNumStates;
double epsilon = 1e-5;
};
#endif //TEST_UTILS_H

863
tests/data/diabetes.arff Executable file
View File

@@ -0,0 +1,863 @@
% 1. Title: Pima Indians Diabetes Database
%
% 2. Sources:
% (a) Original owners: National Institute of Diabetes and Digestive and
% Kidney Diseases
% (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu)
% Research Center, RMI Group Leader
% Applied Physics Laboratory
% The Johns Hopkins University
% Johns Hopkins Road
% Laurel, MD 20707
% (301) 953-6231
% (c) Date received: 9 May 1990
%
% 3. Past Usage:
% 1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \&
% Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast
% the onset of diabetes mellitus. In {\it Proceedings of the Symposium
% on Computer Applications and Medical Care} (pp. 261--265). IEEE
% Computer Society Press.
%
% The diagnostic, binary-valued variable investigated is whether the
% patient shows signs of diabetes according to World Health Organization
% criteria (i.e., if the 2 hour post-load plasma glucose was at least
% 200 mg/dl at any survey examination or if found during routine medical
% care). The population lives near Phoenix, Arizona, USA.
%
% Results: Their ADAP algorithm makes a real-valued prediction between
% 0 and 1. This was transformed into a binary decision using a cutoff of
% 0.448. Using 576 training instances, the sensitivity and specificity
% of their algorithm was 76% on the remaining 192 instances.
%
% 4. Relevant Information:
% Several constraints were placed on the selection of these instances from
% a larger database. In particular, all patients here are females at
% least 21 years old of Pima Indian heritage. ADAP is an adaptive learning
% routine that generates and executes digital analogs of perceptron-like
% devices. It is a unique algorithm; see the paper for details.
%
% 5. Number of Instances: 768
%
% 6. Number of Attributes: 8 plus class
%
% 7. For Each Attribute: (all numeric-valued)
% 1. Number of times pregnant
% 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
% 3. Diastolic blood pressure (mm Hg)
% 4. Triceps skin fold thickness (mm)
% 5. 2-Hour serum insulin (mu U/ml)
% 6. Body mass index (weight in kg/(height in m)^2)
% 7. Diabetes pedigree function
% 8. Age (years)
% 9. Class variable (0 or 1)
%
% 8. Missing Attribute Values: None
%
% 9. Class Distribution: (class value 1 is interpreted as "tested positive for
% diabetes")
%
% Class Value Number of instances
% 0 500
% 1 268
%
% 10. Brief statistical analysis:
%
% Attribute number: Mean: Standard Deviation:
% 1. 3.8 3.4
% 2. 120.9 32.0
% 3. 69.1 19.4
% 4. 20.5 16.0
% 5. 79.8 115.2
% 6. 32.0 7.9
% 7. 0.5 0.3
% 8. 33.2 11.8
%
%
%
%
%
%
% Relabeled values in attribute 'class'
% From: 0 To: tested_negative
% From: 1 To: tested_positive
%
@relation pima_diabetes
@attribute 'preg' real
@attribute 'plas' real
@attribute 'pres' real
@attribute 'skin' real
@attribute 'insu' real
@attribute 'mass' real
@attribute 'pedi' real
@attribute 'age' real
@attribute 'class' { tested_negative, tested_positive}
@data
6,148,72,35,0,33.6,0.627,50,tested_positive
1,85,66,29,0,26.6,0.351,31,tested_negative
8,183,64,0,0,23.3,0.672,32,tested_positive
1,89,66,23,94,28.1,0.167,21,tested_negative
0,137,40,35,168,43.1,2.288,33,tested_positive
5,116,74,0,0,25.6,0.201,30,tested_negative
3,78,50,32,88,31,0.248,26,tested_positive
10,115,0,0,0,35.3,0.134,29,tested_negative
2,197,70,45,543,30.5,0.158,53,tested_positive
8,125,96,0,0,0,0.232,54,tested_positive
4,110,92,0,0,37.6,0.191,30,tested_negative
10,168,74,0,0,38,0.537,34,tested_positive
10,139,80,0,0,27.1,1.441,57,tested_negative
1,189,60,23,846,30.1,0.398,59,tested_positive
5,166,72,19,175,25.8,0.587,51,tested_positive
7,100,0,0,0,30,0.484,32,tested_positive
0,118,84,47,230,45.8,0.551,31,tested_positive
7,107,74,0,0,29.6,0.254,31,tested_positive
1,103,30,38,83,43.3,0.183,33,tested_negative
1,115,70,30,96,34.6,0.529,32,tested_positive
3,126,88,41,235,39.3,0.704,27,tested_negative
8,99,84,0,0,35.4,0.388,50,tested_negative
7,196,90,0,0,39.8,0.451,41,tested_positive
9,119,80,35,0,29,0.263,29,tested_positive
11,143,94,33,146,36.6,0.254,51,tested_positive
10,125,70,26,115,31.1,0.205,41,tested_positive
7,147,76,0,0,39.4,0.257,43,tested_positive
1,97,66,15,140,23.2,0.487,22,tested_negative
13,145,82,19,110,22.2,0.245,57,tested_negative
5,117,92,0,0,34.1,0.337,38,tested_negative
5,109,75,26,0,36,0.546,60,tested_negative
3,158,76,36,245,31.6,0.851,28,tested_positive
3,88,58,11,54,24.8,0.267,22,tested_negative
6,92,92,0,0,19.9,0.188,28,tested_negative
10,122,78,31,0,27.6,0.512,45,tested_negative
4,103,60,33,192,24,0.966,33,tested_negative
11,138,76,0,0,33.2,0.42,35,tested_negative
9,102,76,37,0,32.9,0.665,46,tested_positive
2,90,68,42,0,38.2,0.503,27,tested_positive
4,111,72,47,207,37.1,1.39,56,tested_positive
3,180,64,25,70,34,0.271,26,tested_negative
7,133,84,0,0,40.2,0.696,37,tested_negative
7,106,92,18,0,22.7,0.235,48,tested_negative
9,171,110,24,240,45.4,0.721,54,tested_positive
7,159,64,0,0,27.4,0.294,40,tested_negative
0,180,66,39,0,42,1.893,25,tested_positive
1,146,56,0,0,29.7,0.564,29,tested_negative
2,71,70,27,0,28,0.586,22,tested_negative
7,103,66,32,0,39.1,0.344,31,tested_positive
7,105,0,0,0,0,0.305,24,tested_negative
1,103,80,11,82,19.4,0.491,22,tested_negative
1,101,50,15,36,24.2,0.526,26,tested_negative
5,88,66,21,23,24.4,0.342,30,tested_negative
8,176,90,34,300,33.7,0.467,58,tested_positive
7,150,66,42,342,34.7,0.718,42,tested_negative
1,73,50,10,0,23,0.248,21,tested_negative
7,187,68,39,304,37.7,0.254,41,tested_positive
0,100,88,60,110,46.8,0.962,31,tested_negative
0,146,82,0,0,40.5,1.781,44,tested_negative
0,105,64,41,142,41.5,0.173,22,tested_negative
2,84,0,0,0,0,0.304,21,tested_negative
8,133,72,0,0,32.9,0.27,39,tested_positive
5,44,62,0,0,25,0.587,36,tested_negative
2,141,58,34,128,25.4,0.699,24,tested_negative
7,114,66,0,0,32.8,0.258,42,tested_positive
5,99,74,27,0,29,0.203,32,tested_negative
0,109,88,30,0,32.5,0.855,38,tested_positive
2,109,92,0,0,42.7,0.845,54,tested_negative
1,95,66,13,38,19.6,0.334,25,tested_negative
4,146,85,27,100,28.9,0.189,27,tested_negative
2,100,66,20,90,32.9,0.867,28,tested_positive
5,139,64,35,140,28.6,0.411,26,tested_negative
13,126,90,0,0,43.4,0.583,42,tested_positive
4,129,86,20,270,35.1,0.231,23,tested_negative
1,79,75,30,0,32,0.396,22,tested_negative
1,0,48,20,0,24.7,0.14,22,tested_negative
7,62,78,0,0,32.6,0.391,41,tested_negative
5,95,72,33,0,37.7,0.37,27,tested_negative
0,131,0,0,0,43.2,0.27,26,tested_positive
2,112,66,22,0,25,0.307,24,tested_negative
3,113,44,13,0,22.4,0.14,22,tested_negative
2,74,0,0,0,0,0.102,22,tested_negative
7,83,78,26,71,29.3,0.767,36,tested_negative
0,101,65,28,0,24.6,0.237,22,tested_negative
5,137,108,0,0,48.8,0.227,37,tested_positive
2,110,74,29,125,32.4,0.698,27,tested_negative
13,106,72,54,0,36.6,0.178,45,tested_negative
2,100,68,25,71,38.5,0.324,26,tested_negative
15,136,70,32,110,37.1,0.153,43,tested_positive
1,107,68,19,0,26.5,0.165,24,tested_negative
1,80,55,0,0,19.1,0.258,21,tested_negative
4,123,80,15,176,32,0.443,34,tested_negative
7,81,78,40,48,46.7,0.261,42,tested_negative
4,134,72,0,0,23.8,0.277,60,tested_positive
2,142,82,18,64,24.7,0.761,21,tested_negative
6,144,72,27,228,33.9,0.255,40,tested_negative
2,92,62,28,0,31.6,0.13,24,tested_negative
1,71,48,18,76,20.4,0.323,22,tested_negative
6,93,50,30,64,28.7,0.356,23,tested_negative
1,122,90,51,220,49.7,0.325,31,tested_positive
1,163,72,0,0,39,1.222,33,tested_positive
1,151,60,0,0,26.1,0.179,22,tested_negative
0,125,96,0,0,22.5,0.262,21,tested_negative
1,81,72,18,40,26.6,0.283,24,tested_negative
2,85,65,0,0,39.6,0.93,27,tested_negative
1,126,56,29,152,28.7,0.801,21,tested_negative
1,96,122,0,0,22.4,0.207,27,tested_negative
4,144,58,28,140,29.5,0.287,37,tested_negative
3,83,58,31,18,34.3,0.336,25,tested_negative
0,95,85,25,36,37.4,0.247,24,tested_positive
3,171,72,33,135,33.3,0.199,24,tested_positive
8,155,62,26,495,34,0.543,46,tested_positive
1,89,76,34,37,31.2,0.192,23,tested_negative
4,76,62,0,0,34,0.391,25,tested_negative
7,160,54,32,175,30.5,0.588,39,tested_positive
4,146,92,0,0,31.2,0.539,61,tested_positive
5,124,74,0,0,34,0.22,38,tested_positive
5,78,48,0,0,33.7,0.654,25,tested_negative
4,97,60,23,0,28.2,0.443,22,tested_negative
4,99,76,15,51,23.2,0.223,21,tested_negative
0,162,76,56,100,53.2,0.759,25,tested_positive
6,111,64,39,0,34.2,0.26,24,tested_negative
2,107,74,30,100,33.6,0.404,23,tested_negative
5,132,80,0,0,26.8,0.186,69,tested_negative
0,113,76,0,0,33.3,0.278,23,tested_positive
1,88,30,42,99,55,0.496,26,tested_positive
3,120,70,30,135,42.9,0.452,30,tested_negative
1,118,58,36,94,33.3,0.261,23,tested_negative
1,117,88,24,145,34.5,0.403,40,tested_positive
0,105,84,0,0,27.9,0.741,62,tested_positive
4,173,70,14,168,29.7,0.361,33,tested_positive
9,122,56,0,0,33.3,1.114,33,tested_positive
3,170,64,37,225,34.5,0.356,30,tested_positive
8,84,74,31,0,38.3,0.457,39,tested_negative
2,96,68,13,49,21.1,0.647,26,tested_negative
2,125,60,20,140,33.8,0.088,31,tested_negative
0,100,70,26,50,30.8,0.597,21,tested_negative
0,93,60,25,92,28.7,0.532,22,tested_negative
0,129,80,0,0,31.2,0.703,29,tested_negative
5,105,72,29,325,36.9,0.159,28,tested_negative
3,128,78,0,0,21.1,0.268,55,tested_negative
5,106,82,30,0,39.5,0.286,38,tested_negative
2,108,52,26,63,32.5,0.318,22,tested_negative
10,108,66,0,0,32.4,0.272,42,tested_positive
4,154,62,31,284,32.8,0.237,23,tested_negative
0,102,75,23,0,0,0.572,21,tested_negative
9,57,80,37,0,32.8,0.096,41,tested_negative
2,106,64,35,119,30.5,1.4,34,tested_negative
5,147,78,0,0,33.7,0.218,65,tested_negative
2,90,70,17,0,27.3,0.085,22,tested_negative
1,136,74,50,204,37.4,0.399,24,tested_negative
4,114,65,0,0,21.9,0.432,37,tested_negative
9,156,86,28,155,34.3,1.189,42,tested_positive
1,153,82,42,485,40.6,0.687,23,tested_negative
8,188,78,0,0,47.9,0.137,43,tested_positive
7,152,88,44,0,50,0.337,36,tested_positive
2,99,52,15,94,24.6,0.637,21,tested_negative
1,109,56,21,135,25.2,0.833,23,tested_negative
2,88,74,19,53,29,0.229,22,tested_negative
17,163,72,41,114,40.9,0.817,47,tested_positive
4,151,90,38,0,29.7,0.294,36,tested_negative
7,102,74,40,105,37.2,0.204,45,tested_negative
0,114,80,34,285,44.2,0.167,27,tested_negative
2,100,64,23,0,29.7,0.368,21,tested_negative
0,131,88,0,0,31.6,0.743,32,tested_positive
6,104,74,18,156,29.9,0.722,41,tested_positive
3,148,66,25,0,32.5,0.256,22,tested_negative
4,120,68,0,0,29.6,0.709,34,tested_negative
4,110,66,0,0,31.9,0.471,29,tested_negative
3,111,90,12,78,28.4,0.495,29,tested_negative
6,102,82,0,0,30.8,0.18,36,tested_positive
6,134,70,23,130,35.4,0.542,29,tested_positive
2,87,0,23,0,28.9,0.773,25,tested_negative
1,79,60,42,48,43.5,0.678,23,tested_negative
2,75,64,24,55,29.7,0.37,33,tested_negative
8,179,72,42,130,32.7,0.719,36,tested_positive
6,85,78,0,0,31.2,0.382,42,tested_negative
0,129,110,46,130,67.1,0.319,26,tested_positive
5,143,78,0,0,45,0.19,47,tested_negative
5,130,82,0,0,39.1,0.956,37,tested_positive
6,87,80,0,0,23.2,0.084,32,tested_negative
0,119,64,18,92,34.9,0.725,23,tested_negative
1,0,74,20,23,27.7,0.299,21,tested_negative
5,73,60,0,0,26.8,0.268,27,tested_negative
4,141,74,0,0,27.6,0.244,40,tested_negative
7,194,68,28,0,35.9,0.745,41,tested_positive
8,181,68,36,495,30.1,0.615,60,tested_positive
1,128,98,41,58,32,1.321,33,tested_positive
8,109,76,39,114,27.9,0.64,31,tested_positive
5,139,80,35,160,31.6,0.361,25,tested_positive
3,111,62,0,0,22.6,0.142,21,tested_negative
9,123,70,44,94,33.1,0.374,40,tested_negative
7,159,66,0,0,30.4,0.383,36,tested_positive
11,135,0,0,0,52.3,0.578,40,tested_positive
8,85,55,20,0,24.4,0.136,42,tested_negative
5,158,84,41,210,39.4,0.395,29,tested_positive
1,105,58,0,0,24.3,0.187,21,tested_negative
3,107,62,13,48,22.9,0.678,23,tested_positive
4,109,64,44,99,34.8,0.905,26,tested_positive
4,148,60,27,318,30.9,0.15,29,tested_positive
0,113,80,16,0,31,0.874,21,tested_negative
1,138,82,0,0,40.1,0.236,28,tested_negative
0,108,68,20,0,27.3,0.787,32,tested_negative
2,99,70,16,44,20.4,0.235,27,tested_negative
6,103,72,32,190,37.7,0.324,55,tested_negative
5,111,72,28,0,23.9,0.407,27,tested_negative
8,196,76,29,280,37.5,0.605,57,tested_positive
5,162,104,0,0,37.7,0.151,52,tested_positive
1,96,64,27,87,33.2,0.289,21,tested_negative
7,184,84,33,0,35.5,0.355,41,tested_positive
2,81,60,22,0,27.7,0.29,25,tested_negative
0,147,85,54,0,42.8,0.375,24,tested_negative
7,179,95,31,0,34.2,0.164,60,tested_negative
0,140,65,26,130,42.6,0.431,24,tested_positive
9,112,82,32,175,34.2,0.26,36,tested_positive
12,151,70,40,271,41.8,0.742,38,tested_positive
5,109,62,41,129,35.8,0.514,25,tested_positive
6,125,68,30,120,30,0.464,32,tested_negative
5,85,74,22,0,29,1.224,32,tested_positive
5,112,66,0,0,37.8,0.261,41,tested_positive
0,177,60,29,478,34.6,1.072,21,tested_positive
2,158,90,0,0,31.6,0.805,66,tested_positive
7,119,0,0,0,25.2,0.209,37,tested_negative
7,142,60,33,190,28.8,0.687,61,tested_negative
1,100,66,15,56,23.6,0.666,26,tested_negative
1,87,78,27,32,34.6,0.101,22,tested_negative
0,101,76,0,0,35.7,0.198,26,tested_negative
3,162,52,38,0,37.2,0.652,24,tested_positive
4,197,70,39,744,36.7,2.329,31,tested_negative
0,117,80,31,53,45.2,0.089,24,tested_negative
4,142,86,0,0,44,0.645,22,tested_positive
6,134,80,37,370,46.2,0.238,46,tested_positive
1,79,80,25,37,25.4,0.583,22,tested_negative
4,122,68,0,0,35,0.394,29,tested_negative
3,74,68,28,45,29.7,0.293,23,tested_negative
4,171,72,0,0,43.6,0.479,26,tested_positive
7,181,84,21,192,35.9,0.586,51,tested_positive
0,179,90,27,0,44.1,0.686,23,tested_positive
9,164,84,21,0,30.8,0.831,32,tested_positive
0,104,76,0,0,18.4,0.582,27,tested_negative
1,91,64,24,0,29.2,0.192,21,tested_negative
4,91,70,32,88,33.1,0.446,22,tested_negative
3,139,54,0,0,25.6,0.402,22,tested_positive
6,119,50,22,176,27.1,1.318,33,tested_positive
2,146,76,35,194,38.2,0.329,29,tested_negative
9,184,85,15,0,30,1.213,49,tested_positive
10,122,68,0,0,31.2,0.258,41,tested_negative
0,165,90,33,680,52.3,0.427,23,tested_negative
9,124,70,33,402,35.4,0.282,34,tested_negative
1,111,86,19,0,30.1,0.143,23,tested_negative
9,106,52,0,0,31.2,0.38,42,tested_negative
2,129,84,0,0,28,0.284,27,tested_negative
2,90,80,14,55,24.4,0.249,24,tested_negative
0,86,68,32,0,35.8,0.238,25,tested_negative
12,92,62,7,258,27.6,0.926,44,tested_positive
1,113,64,35,0,33.6,0.543,21,tested_positive
3,111,56,39,0,30.1,0.557,30,tested_negative
2,114,68,22,0,28.7,0.092,25,tested_negative
1,193,50,16,375,25.9,0.655,24,tested_negative
11,155,76,28,150,33.3,1.353,51,tested_positive
3,191,68,15,130,30.9,0.299,34,tested_negative
3,141,0,0,0,30,0.761,27,tested_positive
4,95,70,32,0,32.1,0.612,24,tested_negative
3,142,80,15,0,32.4,0.2,63,tested_negative
4,123,62,0,0,32,0.226,35,tested_positive
5,96,74,18,67,33.6,0.997,43,tested_negative
0,138,0,0,0,36.3,0.933,25,tested_positive
2,128,64,42,0,40,1.101,24,tested_negative
0,102,52,0,0,25.1,0.078,21,tested_negative
2,146,0,0,0,27.5,0.24,28,tested_positive
10,101,86,37,0,45.6,1.136,38,tested_positive
2,108,62,32,56,25.2,0.128,21,tested_negative
3,122,78,0,0,23,0.254,40,tested_negative
1,71,78,50,45,33.2,0.422,21,tested_negative
13,106,70,0,0,34.2,0.251,52,tested_negative
2,100,70,52,57,40.5,0.677,25,tested_negative
7,106,60,24,0,26.5,0.296,29,tested_positive
0,104,64,23,116,27.8,0.454,23,tested_negative
5,114,74,0,0,24.9,0.744,57,tested_negative
2,108,62,10,278,25.3,0.881,22,tested_negative
0,146,70,0,0,37.9,0.334,28,tested_positive
10,129,76,28,122,35.9,0.28,39,tested_negative
7,133,88,15,155,32.4,0.262,37,tested_negative
7,161,86,0,0,30.4,0.165,47,tested_positive
2,108,80,0,0,27,0.259,52,tested_positive
7,136,74,26,135,26,0.647,51,tested_negative
5,155,84,44,545,38.7,0.619,34,tested_negative
1,119,86,39,220,45.6,0.808,29,tested_positive
4,96,56,17,49,20.8,0.34,26,tested_negative
5,108,72,43,75,36.1,0.263,33,tested_negative
0,78,88,29,40,36.9,0.434,21,tested_negative
0,107,62,30,74,36.6,0.757,25,tested_positive
2,128,78,37,182,43.3,1.224,31,tested_positive
1,128,48,45,194,40.5,0.613,24,tested_positive
0,161,50,0,0,21.9,0.254,65,tested_negative
6,151,62,31,120,35.5,0.692,28,tested_negative
2,146,70,38,360,28,0.337,29,tested_positive
0,126,84,29,215,30.7,0.52,24,tested_negative
14,100,78,25,184,36.6,0.412,46,tested_positive
8,112,72,0,0,23.6,0.84,58,tested_negative
0,167,0,0,0,32.3,0.839,30,tested_positive
2,144,58,33,135,31.6,0.422,25,tested_positive
5,77,82,41,42,35.8,0.156,35,tested_negative
5,115,98,0,0,52.9,0.209,28,tested_positive
3,150,76,0,0,21,0.207,37,tested_negative
2,120,76,37,105,39.7,0.215,29,tested_negative
10,161,68,23,132,25.5,0.326,47,tested_positive
0,137,68,14,148,24.8,0.143,21,tested_negative
0,128,68,19,180,30.5,1.391,25,tested_positive
2,124,68,28,205,32.9,0.875,30,tested_positive
6,80,66,30,0,26.2,0.313,41,tested_negative
0,106,70,37,148,39.4,0.605,22,tested_negative
2,155,74,17,96,26.6,0.433,27,tested_positive
3,113,50,10,85,29.5,0.626,25,tested_negative
7,109,80,31,0,35.9,1.127,43,tested_positive
2,112,68,22,94,34.1,0.315,26,tested_negative
3,99,80,11,64,19.3,0.284,30,tested_negative
3,182,74,0,0,30.5,0.345,29,tested_positive
3,115,66,39,140,38.1,0.15,28,tested_negative
6,194,78,0,0,23.5,0.129,59,tested_positive
4,129,60,12,231,27.5,0.527,31,tested_negative
3,112,74,30,0,31.6,0.197,25,tested_positive
0,124,70,20,0,27.4,0.254,36,tested_positive
13,152,90,33,29,26.8,0.731,43,tested_positive
2,112,75,32,0,35.7,0.148,21,tested_negative
1,157,72,21,168,25.6,0.123,24,tested_negative
1,122,64,32,156,35.1,0.692,30,tested_positive
10,179,70,0,0,35.1,0.2,37,tested_negative
2,102,86,36,120,45.5,0.127,23,tested_positive
6,105,70,32,68,30.8,0.122,37,tested_negative
8,118,72,19,0,23.1,1.476,46,tested_negative
2,87,58,16,52,32.7,0.166,25,tested_negative
1,180,0,0,0,43.3,0.282,41,tested_positive
12,106,80,0,0,23.6,0.137,44,tested_negative
1,95,60,18,58,23.9,0.26,22,tested_negative
0,165,76,43,255,47.9,0.259,26,tested_negative
0,117,0,0,0,33.8,0.932,44,tested_negative
5,115,76,0,0,31.2,0.343,44,tested_positive
9,152,78,34,171,34.2,0.893,33,tested_positive
7,178,84,0,0,39.9,0.331,41,tested_positive
1,130,70,13,105,25.9,0.472,22,tested_negative
1,95,74,21,73,25.9,0.673,36,tested_negative
1,0,68,35,0,32,0.389,22,tested_negative
5,122,86,0,0,34.7,0.29,33,tested_negative
8,95,72,0,0,36.8,0.485,57,tested_negative
8,126,88,36,108,38.5,0.349,49,tested_negative
1,139,46,19,83,28.7,0.654,22,tested_negative
3,116,0,0,0,23.5,0.187,23,tested_negative
3,99,62,19,74,21.8,0.279,26,tested_negative
5,0,80,32,0,41,0.346,37,tested_positive
4,92,80,0,0,42.2,0.237,29,tested_negative
4,137,84,0,0,31.2,0.252,30,tested_negative
3,61,82,28,0,34.4,0.243,46,tested_negative
1,90,62,12,43,27.2,0.58,24,tested_negative
3,90,78,0,0,42.7,0.559,21,tested_negative
9,165,88,0,0,30.4,0.302,49,tested_positive
1,125,50,40,167,33.3,0.962,28,tested_positive
13,129,0,30,0,39.9,0.569,44,tested_positive
12,88,74,40,54,35.3,0.378,48,tested_negative
1,196,76,36,249,36.5,0.875,29,tested_positive
5,189,64,33,325,31.2,0.583,29,tested_positive
5,158,70,0,0,29.8,0.207,63,tested_negative
5,103,108,37,0,39.2,0.305,65,tested_negative
4,146,78,0,0,38.5,0.52,67,tested_positive
4,147,74,25,293,34.9,0.385,30,tested_negative
5,99,54,28,83,34,0.499,30,tested_negative
6,124,72,0,0,27.6,0.368,29,tested_positive
0,101,64,17,0,21,0.252,21,tested_negative
3,81,86,16,66,27.5,0.306,22,tested_negative
1,133,102,28,140,32.8,0.234,45,tested_positive
3,173,82,48,465,38.4,2.137,25,tested_positive
0,118,64,23,89,0,1.731,21,tested_negative
0,84,64,22,66,35.8,0.545,21,tested_negative
2,105,58,40,94,34.9,0.225,25,tested_negative
2,122,52,43,158,36.2,0.816,28,tested_negative
12,140,82,43,325,39.2,0.528,58,tested_positive
0,98,82,15,84,25.2,0.299,22,tested_negative
1,87,60,37,75,37.2,0.509,22,tested_negative
4,156,75,0,0,48.3,0.238,32,tested_positive
0,93,100,39,72,43.4,1.021,35,tested_negative
1,107,72,30,82,30.8,0.821,24,tested_negative
0,105,68,22,0,20,0.236,22,tested_negative
1,109,60,8,182,25.4,0.947,21,tested_negative
1,90,62,18,59,25.1,1.268,25,tested_negative
1,125,70,24,110,24.3,0.221,25,tested_negative
1,119,54,13,50,22.3,0.205,24,tested_negative
5,116,74,29,0,32.3,0.66,35,tested_positive
8,105,100,36,0,43.3,0.239,45,tested_positive
5,144,82,26,285,32,0.452,58,tested_positive
3,100,68,23,81,31.6,0.949,28,tested_negative
1,100,66,29,196,32,0.444,42,tested_negative
5,166,76,0,0,45.7,0.34,27,tested_positive
1,131,64,14,415,23.7,0.389,21,tested_negative
4,116,72,12,87,22.1,0.463,37,tested_negative
4,158,78,0,0,32.9,0.803,31,tested_positive
2,127,58,24,275,27.7,1.6,25,tested_negative
3,96,56,34,115,24.7,0.944,39,tested_negative
0,131,66,40,0,34.3,0.196,22,tested_positive
3,82,70,0,0,21.1,0.389,25,tested_negative
3,193,70,31,0,34.9,0.241,25,tested_positive
4,95,64,0,0,32,0.161,31,tested_positive
6,137,61,0,0,24.2,0.151,55,tested_negative
5,136,84,41,88,35,0.286,35,tested_positive
9,72,78,25,0,31.6,0.28,38,tested_negative
5,168,64,0,0,32.9,0.135,41,tested_positive
2,123,48,32,165,42.1,0.52,26,tested_negative
4,115,72,0,0,28.9,0.376,46,tested_positive
0,101,62,0,0,21.9,0.336,25,tested_negative
8,197,74,0,0,25.9,1.191,39,tested_positive
1,172,68,49,579,42.4,0.702,28,tested_positive
6,102,90,39,0,35.7,0.674,28,tested_negative
1,112,72,30,176,34.4,0.528,25,tested_negative
1,143,84,23,310,42.4,1.076,22,tested_negative
1,143,74,22,61,26.2,0.256,21,tested_negative
0,138,60,35,167,34.6,0.534,21,tested_positive
3,173,84,33,474,35.7,0.258,22,tested_positive
1,97,68,21,0,27.2,1.095,22,tested_negative
4,144,82,32,0,38.5,0.554,37,tested_positive
1,83,68,0,0,18.2,0.624,27,tested_negative
3,129,64,29,115,26.4,0.219,28,tested_positive
1,119,88,41,170,45.3,0.507,26,tested_negative
2,94,68,18,76,26,0.561,21,tested_negative
0,102,64,46,78,40.6,0.496,21,tested_negative
2,115,64,22,0,30.8,0.421,21,tested_negative
8,151,78,32,210,42.9,0.516,36,tested_positive
4,184,78,39,277,37,0.264,31,tested_positive
0,94,0,0,0,0,0.256,25,tested_negative
1,181,64,30,180,34.1,0.328,38,tested_positive
0,135,94,46,145,40.6,0.284,26,tested_negative
1,95,82,25,180,35,0.233,43,tested_positive
2,99,0,0,0,22.2,0.108,23,tested_negative
3,89,74,16,85,30.4,0.551,38,tested_negative
1,80,74,11,60,30,0.527,22,tested_negative
2,139,75,0,0,25.6,0.167,29,tested_negative
1,90,68,8,0,24.5,1.138,36,tested_negative
0,141,0,0,0,42.4,0.205,29,tested_positive
12,140,85,33,0,37.4,0.244,41,tested_negative
5,147,75,0,0,29.9,0.434,28,tested_negative
1,97,70,15,0,18.2,0.147,21,tested_negative
6,107,88,0,0,36.8,0.727,31,tested_negative
0,189,104,25,0,34.3,0.435,41,tested_positive
2,83,66,23,50,32.2,0.497,22,tested_negative
4,117,64,27,120,33.2,0.23,24,tested_negative
8,108,70,0,0,30.5,0.955,33,tested_positive
4,117,62,12,0,29.7,0.38,30,tested_positive
0,180,78,63,14,59.4,2.42,25,tested_positive
1,100,72,12,70,25.3,0.658,28,tested_negative
0,95,80,45,92,36.5,0.33,26,tested_negative
0,104,64,37,64,33.6,0.51,22,tested_positive
0,120,74,18,63,30.5,0.285,26,tested_negative
1,82,64,13,95,21.2,0.415,23,tested_negative
2,134,70,0,0,28.9,0.542,23,tested_positive
0,91,68,32,210,39.9,0.381,25,tested_negative
2,119,0,0,0,19.6,0.832,72,tested_negative
2,100,54,28,105,37.8,0.498,24,tested_negative
14,175,62,30,0,33.6,0.212,38,tested_positive
1,135,54,0,0,26.7,0.687,62,tested_negative
5,86,68,28,71,30.2,0.364,24,tested_negative
10,148,84,48,237,37.6,1.001,51,tested_positive
9,134,74,33,60,25.9,0.46,81,tested_negative
9,120,72,22,56,20.8,0.733,48,tested_negative
1,71,62,0,0,21.8,0.416,26,tested_negative
8,74,70,40,49,35.3,0.705,39,tested_negative
5,88,78,30,0,27.6,0.258,37,tested_negative
10,115,98,0,0,24,1.022,34,tested_negative
0,124,56,13,105,21.8,0.452,21,tested_negative
0,74,52,10,36,27.8,0.269,22,tested_negative
0,97,64,36,100,36.8,0.6,25,tested_negative
8,120,0,0,0,30,0.183,38,tested_positive
6,154,78,41,140,46.1,0.571,27,tested_negative
1,144,82,40,0,41.3,0.607,28,tested_negative
0,137,70,38,0,33.2,0.17,22,tested_negative
0,119,66,27,0,38.8,0.259,22,tested_negative
7,136,90,0,0,29.9,0.21,50,tested_negative
4,114,64,0,0,28.9,0.126,24,tested_negative
0,137,84,27,0,27.3,0.231,59,tested_negative
2,105,80,45,191,33.7,0.711,29,tested_positive
7,114,76,17,110,23.8,0.466,31,tested_negative
8,126,74,38,75,25.9,0.162,39,tested_negative
4,132,86,31,0,28,0.419,63,tested_negative
3,158,70,30,328,35.5,0.344,35,tested_positive
0,123,88,37,0,35.2,0.197,29,tested_negative
4,85,58,22,49,27.8,0.306,28,tested_negative
0,84,82,31,125,38.2,0.233,23,tested_negative
0,145,0,0,0,44.2,0.63,31,tested_positive
0,135,68,42,250,42.3,0.365,24,tested_positive
1,139,62,41,480,40.7,0.536,21,tested_negative
0,173,78,32,265,46.5,1.159,58,tested_negative
4,99,72,17,0,25.6,0.294,28,tested_negative
8,194,80,0,0,26.1,0.551,67,tested_negative
2,83,65,28,66,36.8,0.629,24,tested_negative
2,89,90,30,0,33.5,0.292,42,tested_negative
4,99,68,38,0,32.8,0.145,33,tested_negative
4,125,70,18,122,28.9,1.144,45,tested_positive
3,80,0,0,0,0,0.174,22,tested_negative
6,166,74,0,0,26.6,0.304,66,tested_negative
5,110,68,0,0,26,0.292,30,tested_negative
2,81,72,15,76,30.1,0.547,25,tested_negative
7,195,70,33,145,25.1,0.163,55,tested_positive
6,154,74,32,193,29.3,0.839,39,tested_negative
2,117,90,19,71,25.2,0.313,21,tested_negative
3,84,72,32,0,37.2,0.267,28,tested_negative
6,0,68,41,0,39,0.727,41,tested_positive
7,94,64,25,79,33.3,0.738,41,tested_negative
3,96,78,39,0,37.3,0.238,40,tested_negative
10,75,82,0,0,33.3,0.263,38,tested_negative
0,180,90,26,90,36.5,0.314,35,tested_positive
1,130,60,23,170,28.6,0.692,21,tested_negative
2,84,50,23,76,30.4,0.968,21,tested_negative
8,120,78,0,0,25,0.409,64,tested_negative
12,84,72,31,0,29.7,0.297,46,tested_positive
0,139,62,17,210,22.1,0.207,21,tested_negative
9,91,68,0,0,24.2,0.2,58,tested_negative
2,91,62,0,0,27.3,0.525,22,tested_negative
3,99,54,19,86,25.6,0.154,24,tested_negative
3,163,70,18,105,31.6,0.268,28,tested_positive
9,145,88,34,165,30.3,0.771,53,tested_positive
7,125,86,0,0,37.6,0.304,51,tested_negative
13,76,60,0,0,32.8,0.18,41,tested_negative
6,129,90,7,326,19.6,0.582,60,tested_negative
2,68,70,32,66,25,0.187,25,tested_negative
3,124,80,33,130,33.2,0.305,26,tested_negative
6,114,0,0,0,0,0.189,26,tested_negative
9,130,70,0,0,34.2,0.652,45,tested_positive
3,125,58,0,0,31.6,0.151,24,tested_negative
3,87,60,18,0,21.8,0.444,21,tested_negative
1,97,64,19,82,18.2,0.299,21,tested_negative
3,116,74,15,105,26.3,0.107,24,tested_negative
0,117,66,31,188,30.8,0.493,22,tested_negative
0,111,65,0,0,24.6,0.66,31,tested_negative
2,122,60,18,106,29.8,0.717,22,tested_negative
0,107,76,0,0,45.3,0.686,24,tested_negative
1,86,66,52,65,41.3,0.917,29,tested_negative
6,91,0,0,0,29.8,0.501,31,tested_negative
1,77,56,30,56,33.3,1.251,24,tested_negative
4,132,0,0,0,32.9,0.302,23,tested_positive
0,105,90,0,0,29.6,0.197,46,tested_negative
0,57,60,0,0,21.7,0.735,67,tested_negative
0,127,80,37,210,36.3,0.804,23,tested_negative
3,129,92,49,155,36.4,0.968,32,tested_positive
8,100,74,40,215,39.4,0.661,43,tested_positive
3,128,72,25,190,32.4,0.549,27,tested_positive
10,90,85,32,0,34.9,0.825,56,tested_positive
4,84,90,23,56,39.5,0.159,25,tested_negative
1,88,78,29,76,32,0.365,29,tested_negative
8,186,90,35,225,34.5,0.423,37,tested_positive
5,187,76,27,207,43.6,1.034,53,tested_positive
4,131,68,21,166,33.1,0.16,28,tested_negative
1,164,82,43,67,32.8,0.341,50,tested_negative
4,189,110,31,0,28.5,0.68,37,tested_negative
1,116,70,28,0,27.4,0.204,21,tested_negative
3,84,68,30,106,31.9,0.591,25,tested_negative
6,114,88,0,0,27.8,0.247,66,tested_negative
1,88,62,24,44,29.9,0.422,23,tested_negative
1,84,64,23,115,36.9,0.471,28,tested_negative
7,124,70,33,215,25.5,0.161,37,tested_negative
1,97,70,40,0,38.1,0.218,30,tested_negative
8,110,76,0,0,27.8,0.237,58,tested_negative
11,103,68,40,0,46.2,0.126,42,tested_negative
11,85,74,0,0,30.1,0.3,35,tested_negative
6,125,76,0,0,33.8,0.121,54,tested_positive
0,198,66,32,274,41.3,0.502,28,tested_positive
1,87,68,34,77,37.6,0.401,24,tested_negative
6,99,60,19,54,26.9,0.497,32,tested_negative
0,91,80,0,0,32.4,0.601,27,tested_negative
2,95,54,14,88,26.1,0.748,22,tested_negative
1,99,72,30,18,38.6,0.412,21,tested_negative
6,92,62,32,126,32,0.085,46,tested_negative
4,154,72,29,126,31.3,0.338,37,tested_negative
0,121,66,30,165,34.3,0.203,33,tested_positive
3,78,70,0,0,32.5,0.27,39,tested_negative
2,130,96,0,0,22.6,0.268,21,tested_negative
3,111,58,31,44,29.5,0.43,22,tested_negative
2,98,60,17,120,34.7,0.198,22,tested_negative
1,143,86,30,330,30.1,0.892,23,tested_negative
1,119,44,47,63,35.5,0.28,25,tested_negative
6,108,44,20,130,24,0.813,35,tested_negative
2,118,80,0,0,42.9,0.693,21,tested_positive
10,133,68,0,0,27,0.245,36,tested_negative
2,197,70,99,0,34.7,0.575,62,tested_positive
0,151,90,46,0,42.1,0.371,21,tested_positive
6,109,60,27,0,25,0.206,27,tested_negative
12,121,78,17,0,26.5,0.259,62,tested_negative
8,100,76,0,0,38.7,0.19,42,tested_negative
8,124,76,24,600,28.7,0.687,52,tested_positive
1,93,56,11,0,22.5,0.417,22,tested_negative
8,143,66,0,0,34.9,0.129,41,tested_positive
6,103,66,0,0,24.3,0.249,29,tested_negative
3,176,86,27,156,33.3,1.154,52,tested_positive
0,73,0,0,0,21.1,0.342,25,tested_negative
11,111,84,40,0,46.8,0.925,45,tested_positive
2,112,78,50,140,39.4,0.175,24,tested_negative
3,132,80,0,0,34.4,0.402,44,tested_positive
2,82,52,22,115,28.5,1.699,25,tested_negative
6,123,72,45,230,33.6,0.733,34,tested_negative
0,188,82,14,185,32,0.682,22,tested_positive
0,67,76,0,0,45.3,0.194,46,tested_negative
1,89,24,19,25,27.8,0.559,21,tested_negative
1,173,74,0,0,36.8,0.088,38,tested_positive
1,109,38,18,120,23.1,0.407,26,tested_negative
1,108,88,19,0,27.1,0.4,24,tested_negative
6,96,0,0,0,23.7,0.19,28,tested_negative
1,124,74,36,0,27.8,0.1,30,tested_negative
7,150,78,29,126,35.2,0.692,54,tested_positive
4,183,0,0,0,28.4,0.212,36,tested_positive
1,124,60,32,0,35.8,0.514,21,tested_negative
1,181,78,42,293,40,1.258,22,tested_positive
1,92,62,25,41,19.5,0.482,25,tested_negative
0,152,82,39,272,41.5,0.27,27,tested_negative
1,111,62,13,182,24,0.138,23,tested_negative
3,106,54,21,158,30.9,0.292,24,tested_negative
3,174,58,22,194,32.9,0.593,36,tested_positive
7,168,88,42,321,38.2,0.787,40,tested_positive
6,105,80,28,0,32.5,0.878,26,tested_negative
11,138,74,26,144,36.1,0.557,50,tested_positive
3,106,72,0,0,25.8,0.207,27,tested_negative
6,117,96,0,0,28.7,0.157,30,tested_negative
2,68,62,13,15,20.1,0.257,23,tested_negative
9,112,82,24,0,28.2,1.282,50,tested_positive
0,119,0,0,0,32.4,0.141,24,tested_positive
2,112,86,42,160,38.4,0.246,28,tested_negative
2,92,76,20,0,24.2,1.698,28,tested_negative
6,183,94,0,0,40.8,1.461,45,tested_negative
0,94,70,27,115,43.5,0.347,21,tested_negative
2,108,64,0,0,30.8,0.158,21,tested_negative
4,90,88,47,54,37.7,0.362,29,tested_negative
0,125,68,0,0,24.7,0.206,21,tested_negative
0,132,78,0,0,32.4,0.393,21,tested_negative
5,128,80,0,0,34.6,0.144,45,tested_negative
4,94,65,22,0,24.7,0.148,21,tested_negative
7,114,64,0,0,27.4,0.732,34,tested_positive
0,102,78,40,90,34.5,0.238,24,tested_negative
2,111,60,0,0,26.2,0.343,23,tested_negative
1,128,82,17,183,27.5,0.115,22,tested_negative
10,92,62,0,0,25.9,0.167,31,tested_negative
13,104,72,0,0,31.2,0.465,38,tested_positive
5,104,74,0,0,28.8,0.153,48,tested_negative
2,94,76,18,66,31.6,0.649,23,tested_negative
7,97,76,32,91,40.9,0.871,32,tested_positive
1,100,74,12,46,19.5,0.149,28,tested_negative
0,102,86,17,105,29.3,0.695,27,tested_negative
4,128,70,0,0,34.3,0.303,24,tested_negative
6,147,80,0,0,29.5,0.178,50,tested_positive
4,90,0,0,0,28,0.61,31,tested_negative
3,103,72,30,152,27.6,0.73,27,tested_negative
2,157,74,35,440,39.4,0.134,30,tested_negative
1,167,74,17,144,23.4,0.447,33,tested_positive
0,179,50,36,159,37.8,0.455,22,tested_positive
11,136,84,35,130,28.3,0.26,42,tested_positive
0,107,60,25,0,26.4,0.133,23,tested_negative
1,91,54,25,100,25.2,0.234,23,tested_negative
1,117,60,23,106,33.8,0.466,27,tested_negative
5,123,74,40,77,34.1,0.269,28,tested_negative
2,120,54,0,0,26.8,0.455,27,tested_negative
1,106,70,28,135,34.2,0.142,22,tested_negative
2,155,52,27,540,38.7,0.24,25,tested_positive
2,101,58,35,90,21.8,0.155,22,tested_negative
1,120,80,48,200,38.9,1.162,41,tested_negative
11,127,106,0,0,39,0.19,51,tested_negative
3,80,82,31,70,34.2,1.292,27,tested_positive
10,162,84,0,0,27.7,0.182,54,tested_negative
1,199,76,43,0,42.9,1.394,22,tested_positive
8,167,106,46,231,37.6,0.165,43,tested_positive
9,145,80,46,130,37.9,0.637,40,tested_positive
6,115,60,39,0,33.7,0.245,40,tested_positive
1,112,80,45,132,34.8,0.217,24,tested_negative
4,145,82,18,0,32.5,0.235,70,tested_positive
10,111,70,27,0,27.5,0.141,40,tested_positive
6,98,58,33,190,34,0.43,43,tested_negative
9,154,78,30,100,30.9,0.164,45,tested_negative
6,165,68,26,168,33.6,0.631,49,tested_negative
1,99,58,10,0,25.4,0.551,21,tested_negative
10,68,106,23,49,35.5,0.285,47,tested_negative
3,123,100,35,240,57.3,0.88,22,tested_negative
8,91,82,0,0,35.6,0.587,68,tested_negative
6,195,70,0,0,30.9,0.328,31,tested_positive
9,156,86,0,0,24.8,0.23,53,tested_positive
0,93,60,0,0,35.3,0.263,25,tested_negative
3,121,52,0,0,36,0.127,25,tested_positive
2,101,58,17,265,24.2,0.614,23,tested_negative
2,56,56,28,45,24.2,0.332,22,tested_negative
0,162,76,36,0,49.6,0.364,26,tested_positive
0,95,64,39,105,44.6,0.366,22,tested_negative
4,125,80,0,0,32.3,0.536,27,tested_positive
5,136,82,0,0,0,0.64,69,tested_negative
2,129,74,26,205,33.2,0.591,25,tested_negative
3,130,64,0,0,23.1,0.314,22,tested_negative
1,107,50,19,0,28.3,0.181,29,tested_negative
1,140,74,26,180,24.1,0.828,23,tested_negative
1,144,82,46,180,46.1,0.335,46,tested_positive
8,107,80,0,0,24.6,0.856,34,tested_negative
13,158,114,0,0,42.3,0.257,44,tested_positive
2,121,70,32,95,39.1,0.886,23,tested_negative
7,129,68,49,125,38.5,0.439,43,tested_positive
2,90,60,0,0,23.5,0.191,25,tested_negative
7,142,90,24,480,30.4,0.128,43,tested_positive
3,169,74,19,125,29.9,0.268,31,tested_positive
0,99,0,0,0,25,0.253,22,tested_negative
4,127,88,11,155,34.5,0.598,28,tested_negative
4,118,70,0,0,44.5,0.904,26,tested_negative
2,122,76,27,200,35.9,0.483,26,tested_negative
6,125,78,31,0,27.6,0.565,49,tested_positive
1,168,88,29,0,35,0.905,52,tested_positive
2,129,0,0,0,38.5,0.304,41,tested_negative
4,110,76,20,100,28.4,0.118,27,tested_negative
6,80,80,36,0,39.8,0.177,28,tested_negative
10,115,0,0,0,0,0.261,30,tested_positive
2,127,46,21,335,34.4,0.176,22,tested_negative
9,164,78,0,0,32.8,0.148,45,tested_positive
2,93,64,32,160,38,0.674,23,tested_positive
3,158,64,13,387,31.2,0.295,24,tested_negative
5,126,78,27,22,29.6,0.439,40,tested_negative
10,129,62,36,0,41.2,0.441,38,tested_positive
0,134,58,20,291,26.4,0.352,21,tested_negative
3,102,74,0,0,29.5,0.121,32,tested_negative
7,187,50,33,392,33.9,0.826,34,tested_positive
3,173,78,39,185,33.8,0.97,31,tested_positive
10,94,72,18,0,23.1,0.595,56,tested_negative
1,108,60,46,178,35.5,0.415,24,tested_negative
5,97,76,27,0,35.6,0.378,52,tested_positive
4,83,86,19,0,29.3,0.317,34,tested_negative
1,114,66,36,200,38.1,0.289,21,tested_negative
1,149,68,29,127,29.3,0.349,42,tested_positive
5,117,86,30,105,39.1,0.251,42,tested_negative
1,111,94,0,0,32.8,0.265,45,tested_negative
4,112,78,40,0,39.4,0.236,38,tested_negative
1,116,78,29,180,36.1,0.496,25,tested_negative
0,141,84,26,0,32.4,0.433,22,tested_negative
2,175,88,0,0,22.9,0.326,22,tested_negative
2,92,52,0,0,30.1,0.141,22,tested_negative
3,130,78,23,79,28.4,0.323,34,tested_positive
8,120,86,0,0,28.4,0.259,22,tested_positive
2,174,88,37,120,44.5,0.646,24,tested_positive
2,106,56,27,165,29,0.426,22,tested_negative
2,105,75,0,0,23.3,0.56,53,tested_negative
4,95,60,32,0,35.4,0.284,28,tested_negative
0,126,86,27,120,27.4,0.515,21,tested_negative
8,65,72,23,0,32,0.6,42,tested_negative
2,99,60,17,160,36.6,0.453,21,tested_negative
1,102,74,0,0,39.5,0.293,42,tested_positive
11,120,80,37,150,42.3,0.785,48,tested_positive
3,102,44,20,94,30.8,0.4,26,tested_negative
1,109,58,18,116,28.5,0.219,22,tested_negative
9,140,94,0,0,32.7,0.734,45,tested_positive
13,153,88,37,140,40.6,1.174,39,tested_negative
12,100,84,33,105,30,0.488,46,tested_negative
1,147,94,41,0,49.3,0.358,27,tested_positive
1,81,74,41,57,46.3,1.096,32,tested_negative
3,187,70,22,200,36.4,0.408,36,tested_positive
6,162,62,0,0,24.3,0.178,50,tested_positive
4,136,70,0,0,31.2,1.182,22,tested_positive
1,121,78,39,74,39,0.261,28,tested_negative
3,108,62,24,0,26,0.223,25,tested_negative
0,181,88,44,510,43.3,0.222,26,tested_positive
8,154,78,32,0,32.4,0.443,45,tested_positive
1,128,88,39,110,36.5,1.057,37,tested_positive
7,137,90,41,0,32,0.391,39,tested_negative
0,123,72,0,0,36.3,0.258,52,tested_positive
1,106,76,0,0,37.5,0.197,26,tested_negative
6,190,92,0,0,35.5,0.278,66,tested_positive
2,88,58,26,16,28.4,0.766,22,tested_negative
9,170,74,31,0,44,0.403,43,tested_positive
9,89,62,0,0,22.5,0.142,33,tested_negative
10,101,76,48,180,32.9,0.171,63,tested_negative
2,122,70,27,0,36.8,0.34,27,tested_negative
5,121,72,23,112,26.2,0.245,30,tested_negative
1,126,60,0,0,30.1,0.349,47,tested_positive
1,93,70,31,0,30.4,0.315,23,tested_negative

428
tests/data/ecoli.arff Executable file
View File

@@ -0,0 +1,428 @@
%
% 1. Title: Protein Localization Sites
%
%
% 2. Creator and Maintainer:
% Kenta Nakai
% Institue of Molecular and Cellular Biology
% Osaka, University
% 1-3 Yamada-oka, Suita 565 Japan
% nakai@imcb.osaka-u.ac.jp
% http://www.imcb.osaka-u.ac.jp/nakai/psort.html
% Donor: Paul Horton (paulh@cs.berkeley.edu)
% Date: September, 1996
% See also: yeast database
%
% 3. Past Usage.
% Reference: "A Probablistic Classification System for Predicting the Cellular
% Localization Sites of Proteins", Paul Horton & Kenta Nakai,
% Intelligent Systems in Molecular Biology, 109-115.
% St. Louis, USA 1996.
% Results: 81% for E.coli with an ad hoc structured
% probability model. Also similar accuracy for Binary Decision Tree and
% Bayesian Classifier methods applied by the same authors in
% unpublished results.
%
% Predicted Attribute: Localization site of protein. ( non-numeric ).
%
%
% 4. The references below describe a predecessor to this dataset and its
% development. They also give results (not cross-validated) for classification
% by a rule-based expert system with that version of the dataset.
%
% Reference: "Expert Sytem for Predicting Protein Localization Sites in
% Gram-Negative Bacteria", Kenta Nakai & Minoru Kanehisa,
% PROTEINS: Structure, Function, and Genetics 11:95-110, 1991.
%
% Reference: "A Knowledge Base for Predicting Protein Localization Sites in
% Eukaryotic Cells", Kenta Nakai & Minoru Kanehisa,
% Genomics 14:897-911, 1992.
%
%
% 5. Number of Instances: 336 for the E.coli dataset and
%
%
% 6. Number of Attributes.
% for E.coli dataset: 8 ( 7 predictive, 1 name )
%
% 7. Attribute Information.
%
% 1. Sequence Name: Accession number for the SWISS-PROT database
% 2. mcg: McGeoch's method for signal sequence recognition.
% 3. gvh: von Heijne's method for signal sequence recognition.
% 4. lip: von Heijne's Signal Peptidase II consensus sequence score.
% Binary attribute.
% 5. chg: Presence of charge on N-terminus of predicted lipoproteins.
% Binary attribute.
% 6. aac: score of discriminant analysis of the amino acid content of
% outer membrane and periplasmic proteins.
% 7. alm1: score of the ALOM membrane spanning region prediction program.
% 8. alm2: score of ALOM program after excluding putative cleavable signal
% regions from the sequence.
%
% NOTE - the sequence name has been removed
%
% 8. Missing Attribute Values: None.
%
%
% 9. Class Distribution. The class is the localization site. Please see Nakai &
% Kanehisa referenced above for more details.
%
% cp (cytoplasm) 143
% im (inner membrane without signal sequence) 77
% pp (perisplasm) 52
% imU (inner membrane, uncleavable signal sequence) 35
% om (outer membrane) 20
% omL (outer membrane lipoprotein) 5
% imL (inner membrane lipoprotein) 2
% imS (inner membrane, cleavable signal sequence) 2
@relation ecoli
@attribute mcg numeric
@attribute gvh numeric
@attribute lip numeric
@attribute chg numeric
@attribute aac numeric
@attribute alm1 numeric
@attribute alm2 numeric
@attribute class {cp,im,pp,imU,om,omL,imL,imS}
@data
0.49,0.29,0.48,0.5,0.56,0.24,0.35,cp
0.07,0.4,0.48,0.5,0.54,0.35,0.44,cp
0.56,0.4,0.48,0.5,0.49,0.37,0.46,cp
0.59,0.49,0.48,0.5,0.52,0.45,0.36,cp
0.23,0.32,0.48,0.5,0.55,0.25,0.35,cp
0.67,0.39,0.48,0.5,0.36,0.38,0.46,cp
0.29,0.28,0.48,0.5,0.44,0.23,0.34,cp
0.21,0.34,0.48,0.5,0.51,0.28,0.39,cp
0.2,0.44,0.48,0.5,0.46,0.51,0.57,cp
0.42,0.4,0.48,0.5,0.56,0.18,0.3,cp
0.42,0.24,0.48,0.5,0.57,0.27,0.37,cp
0.25,0.48,0.48,0.5,0.44,0.17,0.29,cp
0.39,0.32,0.48,0.5,0.46,0.24,0.35,cp
0.51,0.5,0.48,0.5,0.46,0.32,0.35,cp
0.22,0.43,0.48,0.5,0.48,0.16,0.28,cp
0.25,0.4,0.48,0.5,0.46,0.44,0.52,cp
0.34,0.45,0.48,0.5,0.38,0.24,0.35,cp
0.44,0.27,0.48,0.5,0.55,0.52,0.58,cp
0.23,0.4,0.48,0.5,0.39,0.28,0.38,cp
0.41,0.57,0.48,0.5,0.39,0.21,0.32,cp
0.4,0.45,0.48,0.5,0.38,0.22,0,cp
0.31,0.23,0.48,0.5,0.73,0.05,0.14,cp
0.51,0.54,0.48,0.5,0.41,0.34,0.43,cp
0.3,0.16,0.48,0.5,0.56,0.11,0.23,cp
0.36,0.39,0.48,0.5,0.48,0.22,0.23,cp
0.29,0.37,0.48,0.5,0.48,0.44,0.52,cp
0.25,0.4,0.48,0.5,0.47,0.33,0.42,cp
0.21,0.51,0.48,0.5,0.5,0.32,0.41,cp
0.43,0.37,0.48,0.5,0.53,0.35,0.44,cp
0.43,0.39,0.48,0.5,0.47,0.31,0.41,cp
0.53,0.38,0.48,0.5,0.44,0.26,0.36,cp
0.34,0.33,0.48,0.5,0.38,0.35,0.44,cp
0.56,0.51,0.48,0.5,0.34,0.37,0.46,cp
0.4,0.29,0.48,0.5,0.42,0.35,0.44,cp
0.24,0.35,0.48,0.5,0.31,0.19,0.31,cp
0.36,0.54,0.48,0.5,0.41,0.38,0.46,cp
0.29,0.52,0.48,0.5,0.42,0.29,0.39,cp
0.65,0.47,0.48,0.5,0.59,0.3,0.4,cp
0.32,0.42,0.48,0.5,0.35,0.28,0.38,cp
0.38,0.46,0.48,0.5,0.48,0.22,0.29,cp
0.33,0.45,0.48,0.5,0.52,0.32,0.41,cp
0.3,0.37,0.48,0.5,0.59,0.41,0.49,cp
0.4,0.5,0.48,0.5,0.45,0.39,0.47,cp
0.28,0.38,0.48,0.5,0.5,0.33,0.42,cp
0.61,0.45,0.48,0.5,0.48,0.35,0.41,cp
0.17,0.38,0.48,0.5,0.45,0.42,0.5,cp
0.44,0.35,0.48,0.5,0.55,0.55,0.61,cp
0.43,0.4,0.48,0.5,0.39,0.28,0.39,cp
0.42,0.35,0.48,0.5,0.58,0.15,0.27,cp
0.23,0.33,0.48,0.5,0.43,0.33,0.43,cp
0.37,0.52,0.48,0.5,0.42,0.42,0.36,cp
0.29,0.3,0.48,0.5,0.45,0.03,0.17,cp
0.22,0.36,0.48,0.5,0.35,0.39,0.47,cp
0.23,0.58,0.48,0.5,0.37,0.53,0.59,cp
0.47,0.47,0.48,0.5,0.22,0.16,0.26,cp
0.54,0.47,0.48,0.5,0.28,0.33,0.42,cp
0.51,0.37,0.48,0.5,0.35,0.36,0.45,cp
0.4,0.35,0.48,0.5,0.45,0.33,0.42,cp
0.44,0.34,0.48,0.5,0.3,0.33,0.43,cp
0.42,0.38,0.48,0.5,0.54,0.34,0.43,cp
0.44,0.56,0.48,0.5,0.5,0.46,0.54,cp
0.52,0.36,0.48,0.5,0.41,0.28,0.38,cp
0.36,0.41,0.48,0.5,0.48,0.47,0.54,cp
0.18,0.3,0.48,0.5,0.46,0.24,0.35,cp
0.47,0.29,0.48,0.5,0.51,0.33,0.43,cp
0.24,0.43,0.48,0.5,0.54,0.52,0.59,cp
0.25,0.37,0.48,0.5,0.41,0.33,0.42,cp
0.52,0.57,0.48,0.5,0.42,0.47,0.54,cp
0.25,0.37,0.48,0.5,0.43,0.26,0.36,cp
0.35,0.48,0.48,0.5,0.56,0.4,0.48,cp
0.26,0.26,0.48,0.5,0.34,0.25,0.35,cp
0.44,0.51,0.48,0.5,0.47,0.26,0.36,cp
0.37,0.5,0.48,0.5,0.42,0.36,0.45,cp
0.44,0.42,0.48,0.5,0.42,0.25,0.2,cp
0.24,0.43,0.48,0.5,0.37,0.28,0.38,cp
0.42,0.3,0.48,0.5,0.48,0.26,0.36,cp
0.48,0.42,0.48,0.5,0.45,0.25,0.35,cp
0.41,0.48,0.48,0.5,0.51,0.44,0.51,cp
0.44,0.28,0.48,0.5,0.43,0.27,0.37,cp
0.29,0.41,0.48,0.5,0.48,0.38,0.46,cp
0.34,0.28,0.48,0.5,0.41,0.35,0.44,cp
0.41,0.43,0.48,0.5,0.45,0.31,0.41,cp
0.29,0.47,0.48,0.5,0.41,0.23,0.34,cp
0.34,0.55,0.48,0.5,0.58,0.31,0.41,cp
0.36,0.56,0.48,0.5,0.43,0.45,0.53,cp
0.4,0.46,0.48,0.5,0.52,0.49,0.56,cp
0.5,0.49,0.48,0.5,0.49,0.46,0.53,cp
0.52,0.44,0.48,0.5,0.37,0.36,0.42,cp
0.5,0.51,0.48,0.5,0.27,0.23,0.34,cp
0.53,0.42,0.48,0.5,0.16,0.29,0.39,cp
0.34,0.46,0.48,0.5,0.52,0.35,0.44,cp
0.4,0.42,0.48,0.5,0.37,0.27,0.27,cp
0.41,0.43,0.48,0.5,0.5,0.24,0.25,cp
0.3,0.45,0.48,0.5,0.36,0.21,0.32,cp
0.31,0.47,0.48,0.5,0.29,0.28,0.39,cp
0.64,0.76,0.48,0.5,0.45,0.35,0.38,cp
0.35,0.37,0.48,0.5,0.3,0.34,0.43,cp
0.57,0.54,0.48,0.5,0.37,0.28,0.33,cp
0.65,0.55,0.48,0.5,0.34,0.37,0.28,cp
0.51,0.46,0.48,0.5,0.58,0.31,0.41,cp
0.38,0.4,0.48,0.5,0.63,0.25,0.35,cp
0.24,0.57,0.48,0.5,0.63,0.34,0.43,cp
0.38,0.26,0.48,0.5,0.54,0.16,0.28,cp
0.33,0.47,0.48,0.5,0.53,0.18,0.29,cp
0.24,0.34,0.48,0.5,0.38,0.3,0.4,cp
0.26,0.5,0.48,0.5,0.44,0.32,0.41,cp
0.44,0.49,0.48,0.5,0.39,0.38,0.4,cp
0.43,0.32,0.48,0.5,0.33,0.45,0.52,cp
0.49,0.43,0.48,0.5,0.49,0.3,0.4,cp
0.47,0.28,0.48,0.5,0.56,0.2,0.25,cp
0.32,0.33,0.48,0.5,0.6,0.06,0.2,cp
0.34,0.35,0.48,0.5,0.51,0.49,0.56,cp
0.35,0.34,0.48,0.5,0.46,0.3,0.27,cp
0.38,0.3,0.48,0.5,0.43,0.29,0.39,cp
0.38,0.44,0.48,0.5,0.43,0.2,0.31,cp
0.41,0.51,0.48,0.5,0.58,0.2,0.31,cp
0.34,0.42,0.48,0.5,0.41,0.34,0.43,cp
0.51,0.49,0.48,0.5,0.53,0.14,0.26,cp
0.25,0.51,0.48,0.5,0.37,0.42,0.5,cp
0.29,0.28,0.48,0.5,0.5,0.42,0.5,cp
0.25,0.26,0.48,0.5,0.39,0.32,0.42,cp
0.24,0.41,0.48,0.5,0.49,0.23,0.34,cp
0.17,0.39,0.48,0.5,0.53,0.3,0.39,cp
0.04,0.31,0.48,0.5,0.41,0.29,0.39,cp
0.61,0.36,0.48,0.5,0.49,0.35,0.44,cp
0.34,0.51,0.48,0.5,0.44,0.37,0.46,cp
0.28,0.33,0.48,0.5,0.45,0.22,0.33,cp
0.4,0.46,0.48,0.5,0.42,0.35,0.44,cp
0.23,0.34,0.48,0.5,0.43,0.26,0.37,cp
0.37,0.44,0.48,0.5,0.42,0.39,0.47,cp
0,0.38,0.48,0.5,0.42,0.48,0.55,cp
0.39,0.31,0.48,0.5,0.38,0.34,0.43,cp
0.3,0.44,0.48,0.5,0.49,0.22,0.33,cp
0.27,0.3,0.48,0.5,0.71,0.28,0.39,cp
0.17,0.52,0.48,0.5,0.49,0.37,0.46,cp
0.36,0.42,0.48,0.5,0.53,0.32,0.41,cp
0.3,0.37,0.48,0.5,0.43,0.18,0.3,cp
0.26,0.4,0.48,0.5,0.36,0.26,0.37,cp
0.4,0.41,0.48,0.5,0.55,0.22,0.33,cp
0.22,0.34,0.48,0.5,0.42,0.29,0.39,cp
0.44,0.35,0.48,0.5,0.44,0.52,0.59,cp
0.27,0.42,0.48,0.5,0.37,0.38,0.43,cp
0.16,0.43,0.48,0.5,0.54,0.27,0.37,cp
0.06,0.61,0.48,0.5,0.49,0.92,0.37,im
0.44,0.52,0.48,0.5,0.43,0.47,0.54,im
0.63,0.47,0.48,0.5,0.51,0.82,0.84,im
0.23,0.48,0.48,0.5,0.59,0.88,0.89,im
0.34,0.49,0.48,0.5,0.58,0.85,0.8,im
0.43,0.4,0.48,0.5,0.58,0.75,0.78,im
0.46,0.61,0.48,0.5,0.48,0.86,0.87,im
0.27,0.35,0.48,0.5,0.51,0.77,0.79,im
0.52,0.39,0.48,0.5,0.65,0.71,0.73,im
0.29,0.47,0.48,0.5,0.71,0.65,0.69,im
0.55,0.47,0.48,0.5,0.57,0.78,0.8,im
0.12,0.67,0.48,0.5,0.74,0.58,0.63,im
0.4,0.5,0.48,0.5,0.65,0.82,0.84,im
0.73,0.36,0.48,0.5,0.53,0.91,0.92,im
0.84,0.44,0.48,0.5,0.48,0.71,0.74,im
0.48,0.45,0.48,0.5,0.6,0.78,0.8,im
0.54,0.49,0.48,0.5,0.4,0.87,0.88,im
0.48,0.41,0.48,0.5,0.51,0.9,0.88,im
0.5,0.66,0.48,0.5,0.31,0.92,0.92,im
0.72,0.46,0.48,0.5,0.51,0.66,0.7,im
0.47,0.55,0.48,0.5,0.58,0.71,0.75,im
0.33,0.56,0.48,0.5,0.33,0.78,0.8,im
0.64,0.58,0.48,0.5,0.48,0.78,0.73,im
0.54,0.57,0.48,0.5,0.56,0.81,0.83,im
0.47,0.59,0.48,0.5,0.52,0.76,0.79,im
0.63,0.5,0.48,0.5,0.59,0.85,0.86,im
0.49,0.42,0.48,0.5,0.53,0.79,0.81,im
0.31,0.5,0.48,0.5,0.57,0.84,0.85,im
0.74,0.44,0.48,0.5,0.55,0.88,0.89,im
0.33,0.45,0.48,0.5,0.45,0.88,0.89,im
0.45,0.4,0.48,0.5,0.61,0.74,0.77,im
0.71,0.4,0.48,0.5,0.71,0.7,0.74,im
0.5,0.37,0.48,0.5,0.66,0.64,0.69,im
0.66,0.53,0.48,0.5,0.59,0.66,0.66,im
0.6,0.61,0.48,0.5,0.54,0.67,0.71,im
0.83,0.37,0.48,0.5,0.61,0.71,0.74,im
0.34,0.51,0.48,0.5,0.67,0.9,0.9,im
0.63,0.54,0.48,0.5,0.65,0.79,0.81,im
0.7,0.4,0.48,0.5,0.56,0.86,0.83,im
0.6,0.5,1,0.5,0.54,0.77,0.8,im
0.16,0.51,0.48,0.5,0.33,0.39,0.48,im
0.74,0.7,0.48,0.5,0.66,0.65,0.69,im
0.2,0.46,0.48,0.5,0.57,0.78,0.81,im
0.89,0.55,0.48,0.5,0.51,0.72,0.76,im
0.7,0.46,0.48,0.5,0.56,0.78,0.73,im
0.12,0.43,0.48,0.5,0.63,0.7,0.74,im
0.61,0.52,0.48,0.5,0.54,0.67,0.52,im
0.33,0.37,0.48,0.5,0.46,0.65,0.69,im
0.63,0.65,0.48,0.5,0.66,0.67,0.71,im
0.41,0.51,0.48,0.5,0.53,0.75,0.78,im
0.34,0.67,0.48,0.5,0.52,0.76,0.79,im
0.58,0.34,0.48,0.5,0.56,0.87,0.81,im
0.59,0.56,0.48,0.5,0.55,0.8,0.82,im
0.51,0.4,0.48,0.5,0.57,0.62,0.67,im
0.5,0.57,0.48,0.5,0.71,0.61,0.66,im
0.6,0.46,0.48,0.5,0.45,0.81,0.83,im
0.37,0.47,0.48,0.5,0.39,0.76,0.79,im
0.58,0.55,0.48,0.5,0.57,0.7,0.74,im
0.36,0.47,0.48,0.5,0.51,0.69,0.72,im
0.39,0.41,0.48,0.5,0.52,0.72,0.75,im
0.35,0.51,0.48,0.5,0.61,0.71,0.74,im
0.31,0.44,0.48,0.5,0.5,0.79,0.82,im
0.61,0.66,0.48,0.5,0.46,0.87,0.88,im
0.48,0.49,0.48,0.5,0.52,0.77,0.71,im
0.11,0.5,0.48,0.5,0.58,0.72,0.68,im
0.31,0.36,0.48,0.5,0.58,0.94,0.94,im
0.68,0.51,0.48,0.5,0.71,0.75,0.78,im
0.69,0.39,0.48,0.5,0.57,0.76,0.79,im
0.52,0.54,0.48,0.5,0.62,0.76,0.79,im
0.46,0.59,0.48,0.5,0.36,0.76,0.23,im
0.36,0.45,0.48,0.5,0.38,0.79,0.17,im
0,0.51,0.48,0.5,0.35,0.67,0.44,im
0.1,0.49,0.48,0.5,0.41,0.67,0.21,im
0.3,0.51,0.48,0.5,0.42,0.61,0.34,im
0.61,0.47,0.48,0.5,0,0.8,0.32,im
0.63,0.75,0.48,0.5,0.64,0.73,0.66,im
0.71,0.52,0.48,0.5,0.64,1,0.99,im
0.85,0.53,0.48,0.5,0.53,0.52,0.35,imS
0.63,0.49,0.48,0.5,0.54,0.76,0.79,imS
0.75,0.55,1,1,0.4,0.47,0.3,imL
0.7,0.39,1,0.5,0.51,0.82,0.84,imL
0.72,0.42,0.48,0.5,0.65,0.77,0.79,imU
0.79,0.41,0.48,0.5,0.66,0.81,0.83,imU
0.83,0.48,0.48,0.5,0.65,0.76,0.79,imU
0.69,0.43,0.48,0.5,0.59,0.74,0.77,imU
0.79,0.36,0.48,0.5,0.46,0.82,0.7,imU
0.78,0.33,0.48,0.5,0.57,0.77,0.79,imU
0.75,0.37,0.48,0.5,0.64,0.7,0.74,imU
0.59,0.29,0.48,0.5,0.64,0.75,0.77,imU
0.67,0.37,0.48,0.5,0.54,0.64,0.68,imU
0.66,0.48,0.48,0.5,0.54,0.7,0.74,imU
0.64,0.46,0.48,0.5,0.48,0.73,0.76,imU
0.76,0.71,0.48,0.5,0.5,0.71,0.75,imU
0.84,0.49,0.48,0.5,0.55,0.78,0.74,imU
0.77,0.55,0.48,0.5,0.51,0.78,0.74,imU
0.81,0.44,0.48,0.5,0.42,0.67,0.68,imU
0.58,0.6,0.48,0.5,0.59,0.73,0.76,imU
0.63,0.42,0.48,0.5,0.48,0.77,0.8,imU
0.62,0.42,0.48,0.5,0.58,0.79,0.81,imU
0.86,0.39,0.48,0.5,0.59,0.89,0.9,imU
0.81,0.53,0.48,0.5,0.57,0.87,0.88,imU
0.87,0.49,0.48,0.5,0.61,0.76,0.79,imU
0.47,0.46,0.48,0.5,0.62,0.74,0.77,imU
0.76,0.41,0.48,0.5,0.5,0.59,0.62,imU
0.7,0.53,0.48,0.5,0.7,0.86,0.87,imU
0.64,0.45,0.48,0.5,0.67,0.61,0.66,imU
0.81,0.52,0.48,0.5,0.57,0.78,0.8,imU
0.73,0.26,0.48,0.5,0.57,0.75,0.78,imU
0.49,0.61,1,0.5,0.56,0.71,0.74,imU
0.88,0.42,0.48,0.5,0.52,0.73,0.75,imU
0.84,0.54,0.48,0.5,0.75,0.92,0.7,imU
0.63,0.51,0.48,0.5,0.64,0.72,0.76,imU
0.86,0.55,0.48,0.5,0.63,0.81,0.83,imU
0.79,0.54,0.48,0.5,0.5,0.66,0.68,imU
0.57,0.38,0.48,0.5,0.06,0.49,0.33,imU
0.78,0.44,0.48,0.5,0.45,0.73,0.68,imU
0.78,0.68,0.48,0.5,0.83,0.4,0.29,om
0.63,0.69,0.48,0.5,0.65,0.41,0.28,om
0.67,0.88,0.48,0.5,0.73,0.5,0.25,om
0.61,0.75,0.48,0.5,0.51,0.33,0.33,om
0.67,0.84,0.48,0.5,0.74,0.54,0.37,om
0.74,0.9,0.48,0.5,0.57,0.53,0.29,om
0.73,0.84,0.48,0.5,0.86,0.58,0.29,om
0.75,0.76,0.48,0.5,0.83,0.57,0.3,om
0.77,0.57,0.48,0.5,0.88,0.53,0.2,om
0.74,0.78,0.48,0.5,0.75,0.54,0.15,om
0.68,0.76,0.48,0.5,0.84,0.45,0.27,om
0.56,0.68,0.48,0.5,0.77,0.36,0.45,om
0.65,0.51,0.48,0.5,0.66,0.54,0.33,om
0.52,0.81,0.48,0.5,0.72,0.38,0.38,om
0.64,0.57,0.48,0.5,0.7,0.33,0.26,om
0.6,0.76,1,0.5,0.77,0.59,0.52,om
0.69,0.59,0.48,0.5,0.77,0.39,0.21,om
0.63,0.49,0.48,0.5,0.79,0.45,0.28,om
0.71,0.71,0.48,0.5,0.68,0.43,0.36,om
0.68,0.63,0.48,0.5,0.73,0.4,0.3,om
0.77,0.57,1,0.5,0.37,0.54,0.01,omL
0.66,0.49,1,0.5,0.54,0.56,0.36,omL
0.71,0.46,1,0.5,0.52,0.59,0.3,omL
0.67,0.55,1,0.5,0.66,0.58,0.16,omL
0.68,0.49,1,0.5,0.62,0.55,0.28,omL
0.74,0.49,0.48,0.5,0.42,0.54,0.36,pp
0.7,0.61,0.48,0.5,0.56,0.52,0.43,pp
0.66,0.86,0.48,0.5,0.34,0.41,0.36,pp
0.73,0.78,0.48,0.5,0.58,0.51,0.31,pp
0.65,0.57,0.48,0.5,0.47,0.47,0.51,pp
0.72,0.86,0.48,0.5,0.17,0.55,0.21,pp
0.67,0.7,0.48,0.5,0.46,0.45,0.33,pp
0.67,0.81,0.48,0.5,0.54,0.49,0.23,pp
0.67,0.61,0.48,0.5,0.51,0.37,0.38,pp
0.63,1,0.48,0.5,0.35,0.51,0.49,pp
0.57,0.59,0.48,0.5,0.39,0.47,0.33,pp
0.71,0.71,0.48,0.5,0.4,0.54,0.39,pp
0.66,0.74,0.48,0.5,0.31,0.38,0.43,pp
0.67,0.81,0.48,0.5,0.25,0.42,0.25,pp
0.64,0.72,0.48,0.5,0.49,0.42,0.19,pp
0.68,0.82,0.48,0.5,0.38,0.65,0.56,pp
0.32,0.39,0.48,0.5,0.53,0.28,0.38,pp
0.7,0.64,0.48,0.5,0.47,0.51,0.47,pp
0.63,0.57,0.48,0.5,0.49,0.7,0.2,pp
0.74,0.82,0.48,0.5,0.49,0.49,0.41,pp
0.63,0.86,0.48,0.5,0.39,0.47,0.34,pp
0.63,0.83,0.48,0.5,0.4,0.39,0.19,pp
0.63,0.71,0.48,0.5,0.6,0.4,0.39,pp
0.71,0.86,0.48,0.5,0.4,0.54,0.32,pp
0.68,0.78,0.48,0.5,0.43,0.44,0.42,pp
0.64,0.84,0.48,0.5,0.37,0.45,0.4,pp
0.74,0.47,0.48,0.5,0.5,0.57,0.42,pp
0.75,0.84,0.48,0.5,0.35,0.52,0.33,pp
0.63,0.65,0.48,0.5,0.39,0.44,0.35,pp
0.69,0.67,0.48,0.5,0.3,0.39,0.24,pp
0.7,0.71,0.48,0.5,0.42,0.84,0.85,pp
0.69,0.8,0.48,0.5,0.46,0.57,0.26,pp
0.64,0.66,0.48,0.5,0.41,0.39,0.2,pp
0.63,0.8,0.48,0.5,0.46,0.31,0.29,pp
0.66,0.71,0.48,0.5,0.41,0.5,0.35,pp
0.69,0.59,0.48,0.5,0.46,0.44,0.52,pp
0.68,0.67,0.48,0.5,0.49,0.4,0.34,pp
0.64,0.78,0.48,0.5,0.5,0.36,0.38,pp
0.62,0.78,0.48,0.5,0.47,0.49,0.54,pp
0.76,0.73,0.48,0.5,0.44,0.39,0.39,pp
0.64,0.81,0.48,0.5,0.37,0.39,0.44,pp
0.29,0.39,0.48,0.5,0.52,0.4,0.48,pp
0.62,0.83,0.48,0.5,0.46,0.36,0.4,pp
0.56,0.54,0.48,0.5,0.43,0.37,0.3,pp
0.69,0.66,0.48,0.5,0.41,0.5,0.25,pp
0.69,0.65,0.48,0.5,0.63,0.48,0.41,pp
0.43,0.59,0.48,0.5,0.52,0.49,0.56,pp
0.74,0.56,0.48,0.5,0.47,0.68,0.3,pp
0.71,0.57,0.48,0.5,0.48,0.35,0.32,pp
0.61,0.6,0.48,0.5,0.44,0.39,0.38,pp
0.59,0.61,0.48,0.5,0.42,0.42,0.37,pp
0.74,0.74,0.48,0.5,0.31,0.53,0.52,pp

332
tests/data/glass.arff Executable file
View File

@@ -0,0 +1,332 @@
% 1. Title: Glass Identification Database
%
% 2. Sources:
% (a) Creator: B. German
% -- Central Research Establishment
% Home Office Forensic Science Service
% Aldermaston, Reading, Berkshire RG7 4PN
% (b) Donor: Vina Spiehler, Ph.D., DABFT
% Diagnostic Products Corporation
% (213) 776-0180 (ext 3014)
% (c) Date: September, 1987
%
% 3. Past Usage:
% -- Rule Induction in Forensic Science
% -- Ian W. Evett and Ernest J. Spiehler
% -- Central Research Establishment
% Home Office Forensic Science Service
% Aldermaston, Reading, Berkshire RG7 4PN
% -- Unknown technical note number (sorry, not listed here)
% -- General Results: nearest neighbor held its own with respect to the
% rule-based system
%
% 4. Relevant Information:n
% Vina conducted a comparison test of her rule-based system, BEAGLE, the
% nearest-neighbor algorithm, and discriminant analysis. BEAGLE is
% a product available through VRS Consulting, Inc.; 4676 Admiralty Way,
% Suite 206; Marina Del Ray, CA 90292 (213) 827-7890 and FAX: -3189.
% In determining whether the glass was a type of "float" glass or not,
% the following results were obtained (# incorrect answers):
%
% Type of Sample Beagle NN DA
% Windows that were float processed (87) 10 12 21
% Windows that were not: (76) 19 16 22
%
% The study of classification of types of glass was motivated by
% criminological investigation. At the scene of the crime, the glass left
% can be used as evidence...if it is correctly identified!
%
% 5. Number of Instances: 214
%
% 6. Number of Attributes: 10 (including an Id#) plus the class attribute
% -- all attributes are continuously valued
%
% 7. Attribute Information:
% 1. Id number: 1 to 214
% 2. RI: refractive index
% 3. Na: Sodium (unit measurement: weight percent in corresponding oxide, as
% are attributes 4-10)
% 4. Mg: Magnesium
% 5. Al: Aluminum
% 6. Si: Silicon
% 7. K: Potassium
% 8. Ca: Calcium
% 9. Ba: Barium
% 10. Fe: Iron
% 11. Type of glass: (class attribute)
% -- 1 building_windows_float_processed
% -- 2 building_windows_non_float_processed
% -- 3 vehicle_windows_float_processed
% -- 4 vehicle_windows_non_float_processed (none in this database)
% -- 5 containers
% -- 6 tableware
% -- 7 headlamps
%
% 8. Missing Attribute Values: None
%
% Summary Statistics:
% Attribute: Min Max Mean SD Correlation with class
% 2. RI: 1.5112 1.5339 1.5184 0.0030 -0.1642
% 3. Na: 10.73 17.38 13.4079 0.8166 0.5030
% 4. Mg: 0 4.49 2.6845 1.4424 -0.7447
% 5. Al: 0.29 3.5 1.4449 0.4993 0.5988
% 6. Si: 69.81 75.41 72.6509 0.7745 0.1515
% 7. K: 0 6.21 0.4971 0.6522 -0.0100
% 8. Ca: 5.43 16.19 8.9570 1.4232 0.0007
% 9. Ba: 0 3.15 0.1750 0.4972 0.5751
% 10. Fe: 0 0.51 0.0570 0.0974 -0.1879
%
% 9. Class Distribution: (out of 214 total instances)
% -- 163 Window glass (building windows and vehicle windows)
% -- 87 float processed
% -- 70 building windows
% -- 17 vehicle windows
% -- 76 non-float processed
% -- 76 building windows
% -- 0 vehicle windows
% -- 51 Non-window glass
% -- 13 containers
% -- 9 tableware
% -- 29 headlamps
%
%
%
%
%
%
%
% Relabeled values in attribute 'Type'
% From: '1' To: 'build wind float'
% From: '2' To: 'build wind non-float'
% From: '3' To: 'vehic wind float'
% From: '4' To: 'vehic wind non-float'
% From: '5' To: containers
% From: '6' To: tableware
% From: '7' To: headlamps
%
@relation Glass
@attribute 'RI' real
@attribute 'Na' real
@attribute 'Mg' real
@attribute 'Al' real
@attribute 'Si' real
@attribute 'K' real
@attribute 'Ca' real
@attribute 'Ba' real
@attribute 'Fe' real
@attribute 'Type' {'build wind float', 'build wind non-float', 'vehic wind float', 'vehic wind non-float', containers, tableware, headlamps}
@data
1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0,0,'build wind float'
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0,0,'vehic wind float'
1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0,0,'build wind float'
1.51299,14.4,1.74,1.54,74.55,0,7.59,0,0,tableware
1.53393,12.3,0,1,70.16,0.12,16.19,0,0.24,'build wind non-float'
1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,'build wind non-float'
1.51779,13.64,3.65,0.65,73,0.06,8.93,0,0,'vehic wind float'
1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0,0,'build wind float'
1.51545,14.14,0,2.68,73.39,0.08,9.07,0.61,0.05,headlamps
1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0,0.28,'build wind non-float'
1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0,0,'build wind non-float'
1.51743,12.2,3.25,1.16,73.55,0.62,8.9,0,0.24,'build wind non-float'
1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0,0,'build wind float'
1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0,0,'vehic wind float'
1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0,0.17,'vehic wind float'
1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0,0,'build wind non-float'
1.51719,14.75,0,2,73.02,0,8.53,1.59,0.08,headlamps
1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0,0,'build wind non-float'
1.51994,13.27,0,1.76,73.03,0.47,11.32,0,0,containers
1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0,'build wind non-float'
1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0,0.17,'build wind float'
1.52475,11.45,0,1.88,72.19,0.81,13.24,0,0.34,'build wind non-float'
1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0,0.22,'build wind non-float'
1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0,0.11,'build wind float'
1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,containers
1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0,0,'build wind non-float'
1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0,0,'build wind non-float'
1.51683,14.56,0,1.98,73.29,0,8.52,1.57,0.07,headlamps
1.51687,13.23,3.54,1.48,72.84,0.56,8.1,0,0,'build wind non-float'
1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0,0,'vehic wind float'
1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0,0,'build wind non-float'
1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0,0,'vehic wind float'
1.51115,17.38,0,0.34,75.41,0,6.65,0,0,tableware
1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0,0,'build wind non-float'
1.51755,13,3.6,1.36,72.99,0.57,8.4,0,0.11,'build wind float'
1.51571,12.72,3.46,1.56,73.2,0.67,8.09,0,0.24,'build wind float'
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0,0.26,'build wind float'
1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0,0,'build wind non-float'
1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,'build wind non-float'
1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0,0,'build wind non-float'
1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0,0,'build wind float'
1.51806,13,3.8,1.08,73.07,0.56,8.38,0,0.12,'build wind non-float'
1.51627,13,3.58,1.54,72.83,0.61,8.04,0,0,'build wind non-float'
1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0,0,'build wind non-float'
1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,'vehic wind float'
1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0,0,'build wind float'
1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0,containers
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0,0,'build wind float'
1.51784,13.08,3.49,1.28,72.86,0.6,8.49,0,0,'build wind float'
1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0,0,'build wind non-float'
1.51753,12.57,3.47,1.38,73.39,0.6,8.55,0,0.06,'build wind float'
1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,'build wind non-float'
1.51743,13.3,3.6,1.14,73.09,0.58,8.17,0,0,'build wind float'
1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0,0,'build wind non-float'
1.5164,14.37,0,2.74,72.85,0,9.45,0.54,0,headlamps
1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0,0.07,'build wind float'
1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0,0,headlamps
1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0,0,'build wind float'
1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0,0,'vehic wind float'
1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0,0,'build wind non-float'
1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0,0.32,'build wind non-float'
1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0,0,'build wind float'
1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0,0,'build wind float'
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0,0.16,'build wind float'
1.51556,13.87,0,2.54,73.23,0.14,9.41,0.81,0.01,headlamps
1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0,0.11,'build wind float'
1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0,0.37,'vehic wind float'
1.53125,10.73,0,2.1,69.81,0.58,13.3,3.15,0.28,'build wind non-float'
1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0,0.17,'build wind float'
1.51829,14.46,2.24,1.62,72.38,0,9.26,0,0,tableware
1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0,0.14,'build wind non-float'
1.51888,14.99,0.78,1.74,72.5,0,9.95,0,0,tableware
1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0,0.1,'build wind non-float'
1.523,13.31,3.58,0.82,71.99,0.12,10.17,0,0.03,'build wind float'
1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0,0,'build wind non-float'
1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0,0,'build wind float'
1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0,0.31,'build wind float'
1.51646,13.04,3.4,1.26,73.01,0.52,8.58,0,0,'vehic wind float'
1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0,0,'build wind float'
1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0,0,'build wind float'
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0,0,'build wind float'
1.52127,14.32,3.9,0.83,71.5,0,9.49,0,0,'vehic wind float'
1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0,0,'build wind float'
1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0,0,containers
1.518,13.71,3.93,1.54,71.81,0.54,8.21,0,0.15,'build wind non-float'
1.52777,12.64,0,0.67,72.02,0.06,14.4,0,0,'build wind non-float'
1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0,0.19,'build wind float'
1.51764,12.98,3.54,1.21,73,0.65,8.53,0,0,'build wind float'
1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0,0,'build wind non-float'
1.51645,14.94,0,1.87,73.11,0,8.67,1.38,0,headlamps
1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0,0.3,'build wind float'
1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0,0.16,'build wind float'
1.51937,13.79,2.41,1.19,72.76,0,9.77,0,0,tableware
1.51514,14.85,0,2.42,73.72,0,8.39,0.56,0,headlamps
1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0,0.07,'build wind float'
1.51732,14.95,0,1.8,72.99,0,8.61,1.55,0,headlamps
1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0,0.18,'build wind non-float'
1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0,0,'build wind non-float'
1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0,0,'build wind non-float'
1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0,0,'build wind float'
1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0,0.29,'build wind non-float'
1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0,headlamps
1.51685,14.92,0,1.99,73.06,0,8.4,1.59,0,headlamps
1.51658,14.8,0,1.99,73.11,0,8.28,1.71,0,headlamps
1.51316,13.02,0,3.04,70.48,6.21,6.96,0,0,containers
1.51709,13,3.47,1.79,72.72,0.66,8.18,0,0,'build wind non-float'
1.51727,14.7,0,2.34,73.28,0,8.95,0.66,0,headlamps
1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0,0,'build wind float'
1.51969,12.64,0,1.65,73.75,0.38,11.53,0,0,containers
1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0,0.2,'build wind non-float'
1.51617,14.95,0,2.27,73.3,0,8.71,0.67,0,headlamps
1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0,0,'build wind float'
1.51651,14.38,0,1.94,73.61,0,8.48,1.57,0,headlamps
1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0,0,'vehic wind float'
1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0,0,headlamps
1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,'build wind non-float'
1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0,headlamps
1.51818,13.72,0,0.56,74.45,0,10.99,0,0,'build wind non-float'
1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0,0.24,'build wind float'
1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0,0.24,'build wind non-float'
1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0,0.24,'build wind float'
1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0,0,'build wind non-float'
1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0,0,'build wind non-float'
1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0,0.28,containers
1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0,0.17,'build wind float'
1.51653,11.95,0,1.19,75.18,2.7,8.93,0,0,headlamps
1.51623,14.14,0,2.88,72.61,0.08,9.18,1.06,0,headlamps
1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0,0,'build wind float'
1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0,0,'build wind float'
1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0,0,'build wind non-float'
1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0,0,'build wind non-float'
1.52065,14.36,0,2.02,73.42,0,8.44,1.64,0,headlamps
1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0,0.14,'build wind float'
1.52369,13.44,0,1.58,72.22,0.32,12.24,0,0,containers
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0,0,'build wind float'
1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0,0,'build wind float'
1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0,0,'build wind non-float'
1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0,0,'build wind float'
1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0,0.09,'build wind non-float'
1.51784,12.68,3.67,1.16,73.11,0.61,8.7,0,0,'build wind float'
1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0,'build wind float'
1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0,'build wind float'
1.51666,12.86,0,1.83,73.88,0.97,10.17,0,0,containers
1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0,0,'build wind non-float'
1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0,0.12,'build wind non-float'
1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0,0,'build wind non-float'
1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0,0.17,'build wind non-float'
1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0,0.12,'build wind non-float'
1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0,0,'build wind non-float'
1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0,headlamps
1.52227,14.17,3.81,0.78,71.35,0,9.69,0,0,'build wind float'
1.52614,13.7,0,1.36,71.24,0.19,13.44,0,0.1,'build wind non-float'
1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0,0,'build wind non-float'
1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0,0,'vehic wind float'
1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0,0,'build wind float'
1.51508,15.15,0,2.25,73.5,0,8.34,0.63,0,headlamps
1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0,0,containers
1.51966,14.77,3.75,0.29,72.02,0.03,9,0,0,'build wind float'
1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0,0,'build wind non-float'
1.52664,11.23,0,0.77,73.21,0,14.68,0,0,'build wind non-float'
1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0,0.11,'build wind float'
1.51602,14.85,0,2.38,73.28,0,8.76,0.64,0.09,headlamps
1.51321,13,0,3.02,70.7,6.21,6.93,0,0,containers
1.52739,11.02,0,0.75,73.08,0,14.96,0,0,'build wind non-float'
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0,0,'build wind float'
1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0,0,'build wind float'
1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0,0.35,'build wind non-float'
1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0,0,'build wind non-float'
1.51609,15.01,0,2.51,73.05,0.05,8.83,0.53,0,headlamps
1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0,0,'build wind non-float'
1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0,0.19,'build wind non-float'
1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0,0.1,'build wind float'
1.51831,14.39,0,1.82,72.86,1.41,6.47,2.88,0,headlamps
1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0,0,'build wind float'
1.51613,13.88,1.78,1.79,73.1,0,8.67,0.76,0,headlamps
1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0,0,'build wind float'
1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0,0,'build wind float'
1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0,0,containers
1.51969,14.56,0,0.56,73.48,0,11.22,0,0,tableware
1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0,0,'build wind non-float'
1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0,0.1,'build wind non-float'
1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0,0.09,'vehic wind float'
1.52222,14.43,0,1,72.67,0.1,11.52,0,0.08,'build wind non-float'
1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0,0,'build wind float'
1.51711,14.23,0,2.08,73.36,0,8.62,1.67,0,headlamps
1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0,0,'build wind float'
1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0,0,'build wind float'
1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0,0.1,'vehic wind float'
1.52043,13.38,0,1.4,72.25,0.33,12.5,0,0,containers
1.519,13.49,3.48,1.35,71.95,0.55,9,0,0,'build wind float'
1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0,0.09,'build wind float'
1.51905,14,2.39,1.56,72.37,0,9.57,0,0,tableware
1.51531,14.38,0,2.66,73.1,0.04,9.08,0.64,0,headlamps
1.51916,14.15,0,2.09,72.74,0,10.88,0,0,tableware
1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0,0.15,'build wind non-float'
1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0,0,'build wind non-float'
1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0,0,'build wind non-float'
1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0,0.09,'build wind non-float'
1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0,0.21,'build wind non-float'
1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0,0,'build wind non-float'
1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0,0.16,'build wind float'
1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0,0,'vehic wind float'
1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,'build wind float'
1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0,0,'build wind non-float'
1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0,0,'build wind float'
1.51623,14.2,0,2.79,73.46,0.04,9.04,0.4,0.09,headlamps
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0,0,'build wind float'
1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0,0,'build wind float'
1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0,0,'vehic wind float'
1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0,0,'build wind non-float'
1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0,0.14,'build wind non-float'
1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0,0,'build wind non-float'
1.51852,14.09,2.19,1.66,72.67,0,9.32,0,0,tableware

225
tests/data/iris.arff Executable file
View File

@@ -0,0 +1,225 @@
% 1. Title: Iris Plants Database
%
% 2. Sources:
% (a) Creator: R.A. Fisher
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
% (c) Date: July, 1988
%
% 3. Past Usage:
% - Publications: too many to mention!!! Here are a few.
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
% to Mathematical Statistics" (John Wiley, NY, 1950).
% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
% Structure and Classification Rule for Recognition in Partially Exposed
% Environments". IEEE Transactions on Pattern Analysis and Machine
% Intelligence, Vol. PAMI-2, No. 1, 67-71.
% -- Results:
% -- very low misclassification rates (0% for the setosa class)
% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
% Transactions on Information Theory, May 1972, 431-433.
% -- Results:
% -- very low misclassification rates again
% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
% conceptual clustering system finds 3 classes in the data.
%
% 4. Relevant Information:
% --- This is perhaps the best known database to be found in the pattern
% recognition literature. Fisher's paper is a classic in the field
% and is referenced frequently to this day. (See Duda & Hart, for
% example.) The data set contains 3 classes of 50 instances each,
% where each class refers to a type of iris plant. One class is
% linearly separable from the other 2; the latter are NOT linearly
% separable from each other.
% --- Predicted attribute: class of iris plant.
% --- This is an exceedingly simple domain.
%
% 5. Number of Instances: 150 (50 in each of three classes)
%
% 6. Number of Attributes: 4 numeric, predictive attributes and the class
%
% 7. Attribute Information:
% 1. sepal length in cm
% 2. sepal width in cm
% 3. petal length in cm
% 4. petal width in cm
% 5. class:
% -- Iris Setosa
% -- Iris Versicolour
% -- Iris Virginica
%
% 8. Missing Attribute Values: None
%
% Summary Statistics:
% Min Max Mean SD Class Correlation
% sepal length: 4.3 7.9 5.84 0.83 0.7826
% sepal width: 2.0 4.4 3.05 0.43 -0.4194
% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
%
% 9. Class Distribution: 33.3% for each of 3 classes.
@RELATION iris
@ATTRIBUTE sepallength REAL
@ATTRIBUTE sepalwidth REAL
@ATTRIBUTE petallength REAL
@ATTRIBUTE petalwidth REAL
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
@DATA
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
%
%
%

10177
tests/data/kdd_JapaneseVowels.arff Executable file

File diff suppressed because it is too large Load Diff

20191
tests/data/letter.arff Executable file

File diff suppressed because it is too large Load Diff

399
tests/data/liver-disorders.arff Executable file
View File

@@ -0,0 +1,399 @@
% 1. Title: BUPA liver disorders
%
% 2. Source information:
% -- Creators: BUPA Medical Research Ltd.
% -- Donor: Richard S. Forsyth
% 8 Grosvenor Avenue
% Mapperley Park
% Nottingham NG3 5DX
% 0602-621676
% -- Date: 5/15/1990
%
% 3. Past usage:
% -- None known other than what is shown in the PC/BEAGLE User's Guide
% (written by Richard S. Forsyth).
%
% 4. Relevant information:
% -- The first 5 variables are all blood tests which are thought
% to be sensitive to liver disorders that might arise from
% excessive alcohol consumption. Each line in the bupa.data file
% constitutes the record of a single male individual.
% -- It appears that drinks>5 is some sort of a selector on this database.
% See the PC/BEAGLE User's Guide for more information.
%
% 5. Number of instances: 345
%
% 6. Number of attributes: 7 overall
%
% 7. Attribute information:
% 1. mcv mean corpuscular volume
% 2. alkphos alkaline phosphotase
% 3. sgpt alamine aminotransferase
% 4. sgot aspartate aminotransferase
% 5. gammagt gamma-glutamyl transpeptidase
% 6. drinks number of half-pint equivalents of alcoholic beverages
% drunk per day
% 7. selector field used to split data into two sets
%
% 8. Missing values: none%
% Information about the dataset
% CLASSTYPE: nominal
% CLASSINDEX: last
%
@relation liver-disorders
@attribute mcv INTEGER
@attribute alkphos INTEGER
@attribute sgpt INTEGER
@attribute sgot INTEGER
@attribute gammagt INTEGER
@attribute drinks REAL
@attribute selector {1,2}
@data
85,92,45,27,31,0.0,1
85,64,59,32,23,0.0,2
86,54,33,16,54,0.0,2
91,78,34,24,36,0.0,2
87,70,12,28,10,0.0,2
98,55,13,17,17,0.0,2
88,62,20,17,9,0.5,1
88,67,21,11,11,0.5,1
92,54,22,20,7,0.5,1
90,60,25,19,5,0.5,1
89,52,13,24,15,0.5,1
82,62,17,17,15,0.5,1
90,64,61,32,13,0.5,1
86,77,25,19,18,0.5,1
96,67,29,20,11,0.5,1
91,78,20,31,18,0.5,1
89,67,23,16,10,0.5,1
89,79,17,17,16,0.5,1
91,107,20,20,56,0.5,1
94,116,11,33,11,0.5,1
92,59,35,13,19,0.5,1
93,23,35,20,20,0.5,1
90,60,23,27,5,0.5,1
96,68,18,19,19,0.5,1
84,80,47,33,97,0.5,1
92,70,24,13,26,0.5,1
90,47,28,15,18,0.5,1
88,66,20,21,10,0.5,1
91,102,17,13,19,0.5,1
87,41,31,19,16,0.5,1
86,79,28,16,17,0.5,1
91,57,31,23,42,0.5,1
93,77,32,18,29,0.5,1
88,96,28,21,40,0.5,1
94,65,22,18,11,0.5,1
91,72,155,68,82,0.5,2
85,54,47,33,22,0.5,2
79,39,14,19,9,0.5,2
85,85,25,26,30,0.5,2
89,63,24,20,38,0.5,2
84,92,68,37,44,0.5,2
89,68,26,39,42,0.5,2
89,101,18,25,13,0.5,2
86,84,18,14,16,0.5,2
85,65,25,14,18,0.5,2
88,61,19,21,13,0.5,2
92,56,14,16,10,0.5,2
95,50,29,25,50,0.5,2
91,75,24,22,11,0.5,2
83,40,29,25,38,0.5,2
89,74,19,23,16,0.5,2
85,64,24,22,11,0.5,2
92,57,64,36,90,0.5,2
94,48,11,23,43,0.5,2
87,52,21,19,30,0.5,2
85,65,23,29,15,0.5,2
84,82,21,21,19,0.5,2
88,49,20,22,19,0.5,2
96,67,26,26,36,0.5,2
90,63,24,24,24,0.5,2
90,45,33,34,27,0.5,2
90,72,14,15,18,0.5,2
91,55,4,8,13,0.5,2
91,52,15,22,11,0.5,2
87,71,32,19,27,1.0,1
89,77,26,20,19,1.0,1
89,67,5,17,14,1.0,2
85,51,26,24,23,1.0,2
103,75,19,30,13,1.0,2
90,63,16,21,14,1.0,2
90,63,29,23,57,2.0,1
90,67,35,19,35,2.0,1
87,66,27,22,9,2.0,1
90,73,34,21,22,2.0,1
86,54,20,21,16,2.0,1
90,80,19,14,42,2.0,1
87,90,43,28,156,2.0,2
96,72,28,19,30,2.0,2
91,55,9,25,16,2.0,2
95,78,27,25,30,2.0,2
92,101,34,30,64,2.0,2
89,51,41,22,48,2.0,2
91,99,42,33,16,2.0,2
94,58,21,18,26,2.0,2
92,60,30,27,297,2.0,2
94,58,21,18,26,2.0,2
88,47,33,26,29,2.0,2
92,65,17,25,9,2.0,2
92,79,22,20,11,3.0,1
84,83,20,25,7,3.0,1
88,68,27,21,26,3.0,1
86,48,20,20,6,3.0,1
99,69,45,32,30,3.0,1
88,66,23,12,15,3.0,1
89,62,42,30,20,3.0,1
90,51,23,17,27,3.0,1
81,61,32,37,53,3.0,2
89,89,23,18,104,3.0,2
89,65,26,18,36,3.0,2
92,75,26,26,24,3.0,2
85,59,25,20,25,3.0,2
92,61,18,13,81,3.0,2
89,63,22,27,10,4.0,1
90,84,18,23,13,4.0,1
88,95,25,19,14,4.0,1
89,35,27,29,17,4.0,1
91,80,37,23,27,4.0,1
91,109,33,15,18,4.0,1
91,65,17,5,7,4.0,1
88,107,29,20,50,4.0,2
87,76,22,55,9,4.0,2
87,86,28,23,21,4.0,2
87,42,26,23,17,4.0,2
88,80,24,25,17,4.0,2
90,96,34,49,169,4.0,2
86,67,11,15,8,4.0,2
92,40,19,20,21,4.0,2
85,60,17,21,14,4.0,2
89,90,15,17,25,4.0,2
91,57,15,16,16,4.0,2
96,55,48,39,42,4.0,2
79,101,17,27,23,4.0,2
90,134,14,20,14,4.0,2
89,76,14,21,24,4.0,2
88,93,29,27,31,4.0,2
90,67,10,16,16,4.0,2
92,73,24,21,48,4.0,2
91,55,28,28,82,4.0,2
83,45,19,21,13,4.0,2
90,74,19,14,22,4.0,2
92,66,21,16,33,5.0,1
93,63,26,18,18,5.0,1
86,78,47,39,107,5.0,2
97,44,113,45,150,5.0,2
87,59,15,19,12,5.0,2
86,44,21,11,15,5.0,2
87,64,16,20,24,5.0,2
92,57,21,23,22,5.0,2
90,70,25,23,112,5.0,2
99,59,17,19,11,5.0,2
92,80,10,26,20,6.0,1
95,60,26,22,28,6.0,1
91,63,25,26,15,6.0,1
92,62,37,21,36,6.0,1
95,50,13,14,15,6.0,1
90,76,37,19,50,6.0,1
96,70,70,26,36,6.0,1
95,62,64,42,76,6.0,1
92,62,20,23,20,6.0,1
91,63,25,26,15,6.0,1
82,56,67,38,92,6.0,2
92,82,27,24,37,6.0,2
90,63,12,26,21,6.0,2
88,37,9,15,16,6.0,2
100,60,29,23,76,6.0,2
98,43,35,23,69,6.0,2
91,74,87,50,67,6.0,2
92,87,57,25,44,6.0,2
93,99,36,34,48,6.0,2
90,72,17,19,19,6.0,2
97,93,21,20,68,6.0,2
93,50,18,25,17,6.0,2
90,57,20,26,33,6.0,2
92,76,31,28,41,6.0,2
88,55,19,17,14,6.0,2
89,63,24,29,29,6.0,2
92,79,70,32,84,7.0,1
92,93,58,35,120,7.0,1
93,84,58,47,62,7.0,2
97,71,29,22,52,8.0,1
84,99,33,19,26,8.0,1
96,44,42,23,73,8.0,1
90,62,22,21,21,8.0,1
92,94,18,17,6,8.0,1
90,67,77,39,114,8.0,1
97,71,29,22,52,8.0,1
91,69,25,25,66,8.0,2
93,59,17,20,14,8.0,2
92,95,85,48,200,8.0,2
90,50,26,22,53,8.0,2
91,62,59,47,60,8.0,2
92,93,22,28,123,9.0,1
92,77,86,41,31,10.0,1
86,66,22,24,26,10.0,2
98,57,31,34,73,10.0,2
95,80,50,64,55,10.0,2
92,108,53,33,94,12.0,2
97,92,22,28,49,12.0,2
93,77,39,37,108,16.0,1
94,83,81,34,201,20.0,1
87,75,25,21,14,0.0,1
88,56,23,18,12,0.0,1
84,97,41,20,32,0.0,2
94,91,27,20,15,0.5,1
97,62,17,13,5,0.5,1
92,85,25,20,12,0.5,1
82,48,27,15,12,0.5,1
88,74,31,25,15,0.5,1
95,77,30,14,21,0.5,1
88,94,26,18,8,0.5,1
91,70,19,19,22,0.5,1
83,54,27,15,12,0.5,1
91,105,40,26,56,0.5,1
86,79,37,28,14,0.5,1
91,96,35,22,135,0.5,1
89,82,23,14,35,0.5,1
90,73,24,23,11,0.5,1
90,87,19,25,19,0.5,1
89,82,33,32,18,0.5,1
85,79,17,8,9,0.5,1
85,119,30,26,17,0.5,1
78,69,24,18,31,0.5,1
88,107,34,21,27,0.5,1
89,115,17,27,7,0.5,1
92,67,23,15,12,0.5,1
89,101,27,34,14,0.5,1
91,84,11,12,10,0.5,1
94,101,41,20,53,0.5,2
88,46,29,22,18,0.5,2
88,122,35,29,42,0.5,2
84,88,28,25,35,0.5,2
90,79,18,15,24,0.5,2
87,69,22,26,11,0.5,2
65,63,19,20,14,0.5,2
90,64,12,17,14,0.5,2
85,58,18,24,16,0.5,2
88,81,41,27,36,0.5,2
86,78,52,29,62,0.5,2
82,74,38,28,48,0.5,2
86,58,36,27,59,0.5,2
94,56,30,18,27,0.5,2
87,57,30,30,22,0.5,2
98,74,148,75,159,0.5,2
94,75,20,25,38,0.5,2
83,68,17,20,71,0.5,2
93,56,25,21,33,0.5,2
101,65,18,21,22,0.5,2
92,65,25,20,31,0.5,2
92,58,14,16,13,0.5,2
86,58,16,23,23,0.5,2
85,62,15,13,22,0.5,2
86,57,13,20,13,0.5,2
86,54,26,30,13,0.5,2
81,41,33,27,34,1.0,1
91,67,32,26,13,1.0,1
91,80,21,19,14,1.0,1
92,60,23,15,19,1.0,1
91,60,32,14,8,1.0,1
93,65,28,22,10,1.0,1
90,63,45,24,85,1.0,2
87,92,21,22,37,1.0,2
83,78,31,19,115,1.0,2
95,62,24,23,14,1.0,2
93,59,41,30,48,1.0,2
84,82,43,32,38,2.0,1
87,71,33,20,22,2.0,1
86,44,24,15,18,2.0,1
86,66,28,24,21,2.0,1
88,58,31,17,17,2.0,1
90,61,28,29,31,2.0,1
88,69,70,24,64,2.0,1
93,87,18,17,26,2.0,1
98,58,33,21,28,2.0,1
91,44,18,18,23,2.0,2
87,75,37,19,70,2.0,2
94,91,30,26,25,2.0,2
88,85,14,15,10,2.0,2
89,109,26,25,27,2.0,2
87,59,37,27,34,2.0,2
93,58,20,23,18,2.0,2
88,57,9,15,16,2.0,2
94,65,38,27,17,3.0,1
91,71,12,22,11,3.0,1
90,55,20,20,16,3.0,1
91,64,21,17,26,3.0,2
88,47,35,26,33,3.0,2
82,72,31,20,84,3.0,2
85,58,83,49,51,3.0,2
91,54,25,22,35,4.0,1
98,50,27,25,53,4.0,2
86,62,29,21,26,4.0,2
89,48,32,22,14,4.0,2
82,68,20,22,9,4.0,2
83,70,17,19,23,4.0,2
96,70,21,26,21,4.0,2
94,117,77,56,52,4.0,2
93,45,11,14,21,4.0,2
93,49,27,21,29,4.0,2
84,73,46,32,39,4.0,2
91,63,17,17,46,4.0,2
90,57,31,18,37,4.0,2
87,45,19,13,16,4.0,2
91,68,14,20,19,4.0,2
86,55,29,35,108,4.0,2
91,86,52,47,52,4.0,2
88,46,15,33,55,4.0,2
85,52,22,23,34,4.0,2
89,72,33,27,55,4.0,2
95,59,23,18,19,4.0,2
94,43,154,82,121,4.0,2
96,56,38,26,23,5.0,2
90,52,10,17,12,5.0,2
94,45,20,16,12,5.0,2
99,42,14,21,49,5.0,2
93,102,47,23,37,5.0,2
94,71,25,26,31,5.0,2
92,73,33,34,115,5.0,2
87,54,41,29,23,6.0,1
92,67,15,14,14,6.0,1
98,101,31,26,32,6.0,1
92,53,51,33,92,6.0,1
97,94,43,43,82,6.0,1
93,43,11,16,54,6.0,1
93,68,24,18,19,6.0,1
95,36,38,19,15,6.0,1
99,86,58,42,203,6.0,1
98,66,103,57,114,6.0,1
92,80,10,26,20,6.0,1
96,74,27,25,43,6.0,2
95,93,21,27,47,6.0,2
86,109,16,22,28,6.0,2
91,46,30,24,39,7.0,2
102,82,34,78,203,7.0,2
85,50,12,18,14,7.0,2
91,57,33,23,12,8.0,1
91,52,76,32,24,8.0,1
93,70,46,30,33,8.0,1
87,55,36,19,25,8.0,1
98,123,28,24,31,8.0,1
82,55,18,23,44,8.0,2
95,73,20,25,225,8.0,2
97,80,17,20,53,8.0,2
100,83,25,24,28,8.0,2
88,91,56,35,126,9.0,2
91,138,45,21,48,10.0,1
92,41,37,22,37,10.0,1
86,123,20,25,23,10.0,2
91,93,35,34,37,10.0,2
87,87,15,23,11,10.0,2
87,56,52,43,55,10.0,2
99,75,26,24,41,12.0,1
96,69,53,43,203,12.0,2
98,77,55,35,89,15.0,1
91,68,27,26,14,16.0,1
98,99,57,45,65,20.0,1

2306
tests/data/mfeat-factors.arff Executable file

File diff suppressed because it is too large Load Diff