From 331381930a23312feb5b0be72868e906921c44a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Wed, 8 Nov 2023 10:35:38 +0100 Subject: [PATCH] Implement hyperparameters --- .gitmodules | 3 + CMakeLists.txt | 28 +- Makefile | 2 +- cmake/modules/AddGitSubmodule.cmake | 12 + cmake/modules/CodeCoverage.cmake | 742 ++++++++++++++++++++++++++++ cmake/modules/StaticAnalyzers.cmake | 22 + lib/Files/ArffFiles.cc | 76 ++- lib/Files/ArffFiles.h | 36 +- lib/json | 1 + src/CMakeLists.txt | 3 +- src/Classifier.h | 13 + src/PyClassifier.cc | 22 +- src/PyClassifier.h | 10 +- src/STree.cc | 7 + src/STree.h | 2 + src/example.cpp | 257 ---------- src/main.cc | 2 + 17 files changed, 913 insertions(+), 325 deletions(-) create mode 100644 .gitmodules create mode 100644 cmake/modules/AddGitSubmodule.cmake create mode 100644 cmake/modules/CodeCoverage.cmake create mode 100644 cmake/modules/StaticAnalyzers.cmake create mode 160000 lib/json create mode 100644 src/Classifier.h delete mode 100644 src/example.cpp diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ccfd8fe --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "lib/json"] + path = lib/json + url = https://github.com/nlohmann/json.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b3b5a8..8c9142b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,29 @@ -cmake_minimum_required(VERSION 3.5) -project(PyWrap) +cmake_minimum_required(VERSION 3.20) +project(PyWrap + VERSION 0.1.0 + DESCRIPTION "Wrap Python classifiers." + HOMEPAGE_URL "https://github.com/rmontanana/pywrap" + LANGUAGES CXX +) + +# Global CMake variables +# ---------------------- set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") +# CMakes modules +# -------------- +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) +include(AddGitSubmodule) + +# Libraries +# --------- find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED) find_package(Torch REQUIRED) find_package(Boost REQUIRED COMPONENTS python3 numpy3) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") - # Temporary patch while find_package(Torch) is not fixed file( GLOB @@ -19,5 +33,11 @@ file( ) message(STATUS "TORCH Libraries: ${LIBTORCH_PYTHON}") +# External libraries - dependencies of BayesNet +# --------------------------------------------- +add_git_submodule("lib/json") add_subdirectory(lib/Files) + +# Include directories +# ------------------- add_subdirectory(src) \ No newline at end of file diff --git a/Makefile b/Makefile index 76c9b31..4e4c080 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ SHELL := /bin/bash f_release = build_release f_debug = build_debug -app_targets = main example +app_targets = main test_targets = unit_tests_bayesnet unit_tests_platform n_procs = -j 16 diff --git a/cmake/modules/AddGitSubmodule.cmake b/cmake/modules/AddGitSubmodule.cmake new file mode 100644 index 0000000..7855fce --- /dev/null +++ b/cmake/modules/AddGitSubmodule.cmake @@ -0,0 +1,12 @@ + +function(add_git_submodule dir) + find_package(Git REQUIRED) + + if(NOT EXISTS ${dir}/CMakeLists.txt) + message(STATUS "🚨 Adding git submodule => ${dir}") + execute_process(COMMAND ${GIT_EXECUTABLE} + submodule update --init --recursive -- ${dir} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + endif() + add_subdirectory(${dir}) +endfunction(add_git_submodule) diff --git a/cmake/modules/CodeCoverage.cmake b/cmake/modules/CodeCoverage.cmake new file mode 100644 index 0000000..d4a039f --- /dev/null +++ b/cmake/modules/CodeCoverage.cmake @@ -0,0 +1,742 @@ +# Copyright (c) 2012 - 2017, Lars Bilke +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CHANGES: +# +# 2012-01-31, Lars Bilke +# - Enable Code Coverage +# +# 2013-09-17, Joakim Söderberg +# - Added support for Clang. +# - Some additional usage instructions. +# +# 2016-02-03, Lars Bilke +# - Refactored functions to use named parameters +# +# 2017-06-02, Lars Bilke +# - Merged with modified version from github.com/ufz/ogs +# +# 2019-05-06, Anatolii Kurotych +# - Remove unnecessary --coverage flag +# +# 2019-12-13, FeRD (Frank Dana) +# - Deprecate COVERAGE_LCOVR_EXCLUDES and COVERAGE_GCOVR_EXCLUDES lists in favor +# of tool-agnostic COVERAGE_EXCLUDES variable, or EXCLUDE setup arguments. +# - CMake 3.4+: All excludes can be specified relative to BASE_DIRECTORY +# - All setup functions: accept BASE_DIRECTORY, EXCLUDE list +# - Set lcov basedir with -b argument +# - Add automatic --demangle-cpp in lcovr, if 'c++filt' is available (can be +# overridden with NO_DEMANGLE option in setup_target_for_coverage_lcovr().) +# - Delete output dir, .info file on 'make clean' +# - Remove Python detection, since version mismatches will break gcovr +# - Minor cleanup (lowercase function names, update examples...) +# +# 2019-12-19, FeRD (Frank Dana) +# - Rename Lcov outputs, make filtered file canonical, fix cleanup for targets +# +# 2020-01-19, Bob Apthorpe +# - Added gfortran support +# +# 2020-02-17, FeRD (Frank Dana) +# - Make all add_custom_target()s VERBATIM to auto-escape wildcard characters +# in EXCLUDEs, and remove manual escaping from gcovr targets +# +# 2021-01-19, Robin Mueller +# - Add CODE_COVERAGE_VERBOSE option which will allow to print out commands which are run +# - Added the option for users to set the GCOVR_ADDITIONAL_ARGS variable to supply additional +# flags to the gcovr command +# +# 2020-05-04, Mihchael Davis +# - Add -fprofile-abs-path to make gcno files contain absolute paths +# - Fix BASE_DIRECTORY not working when defined +# - Change BYPRODUCT from folder to index.html to stop ninja from complaining about double defines +# +# 2021-05-10, Martin Stump +# - Check if the generator is multi-config before warning about non-Debug builds +# +# 2022-02-22, Marko Wehle +# - Change gcovr output from -o for --xml and --html output respectively. +# This will allow for Multiple Output Formats at the same time by making use of GCOVR_ADDITIONAL_ARGS, e.g. GCOVR_ADDITIONAL_ARGS "--txt". +# +# 2022-09-28, Sebastian Mueller +# - fix append_coverage_compiler_flags_to_target to correctly add flags +# - replace "-fprofile-arcs -ftest-coverage" with "--coverage" (equivalent) +# +# USAGE: +# +# 1. Copy this file into your cmake modules path. +# +# 2. Add the following line to your CMakeLists.txt (best inside an if-condition +# using a CMake option() to enable it just optionally): +# include(CodeCoverage) +# +# 3. Append necessary compiler flags for all supported source files: +# append_coverage_compiler_flags() +# Or for specific target: +# append_coverage_compiler_flags_to_target(YOUR_TARGET_NAME) +# +# 3.a (OPTIONAL) Set appropriate optimization flags, e.g. -O0, -O1 or -Og +# +# 4. If you need to exclude additional directories from the report, specify them +# using full paths in the COVERAGE_EXCLUDES variable before calling +# setup_target_for_coverage_*(). +# Example: +# set(COVERAGE_EXCLUDES +# '${PROJECT_SOURCE_DIR}/src/dir1/*' +# '/path/to/my/src/dir2/*') +# Or, use the EXCLUDE argument to setup_target_for_coverage_*(). +# Example: +# setup_target_for_coverage_lcov( +# NAME coverage +# EXECUTABLE testrunner +# EXCLUDE "${PROJECT_SOURCE_DIR}/src/dir1/*" "/path/to/my/src/dir2/*") +# +# 4.a NOTE: With CMake 3.4+, COVERAGE_EXCLUDES or EXCLUDE can also be set +# relative to the BASE_DIRECTORY (default: PROJECT_SOURCE_DIR) +# Example: +# set(COVERAGE_EXCLUDES "dir1/*") +# setup_target_for_coverage_gcovr_html( +# NAME coverage +# EXECUTABLE testrunner +# BASE_DIRECTORY "${PROJECT_SOURCE_DIR}/src" +# EXCLUDE "dir2/*") +# +# 5. Use the functions described below to create a custom make target which +# runs your test executable and produces a code coverage report. +# +# 6. Build a Debug build: +# cmake -DCMAKE_BUILD_TYPE=Debug .. +# make +# make my_coverage_target +# + +include(CMakeParseArguments) + +option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE) + +# Check prereqs +find_program( GCOV_PATH gcov ) +find_program( LCOV_PATH NAMES lcov lcov.bat lcov.exe lcov.perl) +find_program( FASTCOV_PATH NAMES fastcov fastcov.py ) +find_program( GENHTML_PATH NAMES genhtml genhtml.perl genhtml.bat ) +find_program( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/scripts/test) +find_program( CPPFILT_PATH NAMES c++filt ) + +if(NOT GCOV_PATH) + message(FATAL_ERROR "gcov not found! Aborting...") +endif() # NOT GCOV_PATH + +# Check supported compiler (Clang, GNU and Flang) +get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) +foreach(LANG ${LANGUAGES}) + if("${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang") + if("${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS 3) + message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...") + endif() + elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU" + AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang") + message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") + endif() +endforeach() + +set(COVERAGE_COMPILER_FLAGS "-g --coverage" + CACHE INTERNAL "") +if(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Clang)") + include(CheckCXXCompilerFlag) + check_cxx_compiler_flag(-fprofile-abs-path HAVE_fprofile_abs_path) + if(HAVE_fprofile_abs_path) + set(COVERAGE_COMPILER_FLAGS "${COVERAGE_COMPILER_FLAGS} -fprofile-abs-path") + endif() +endif() + +set(CMAKE_Fortran_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the Fortran compiler during coverage builds." + FORCE ) +set(CMAKE_CXX_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the C++ compiler during coverage builds." + FORCE ) +set(CMAKE_C_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the C compiler during coverage builds." + FORCE ) +set(CMAKE_EXE_LINKER_FLAGS_COVERAGE + "" + CACHE STRING "Flags used for linking binaries during coverage builds." + FORCE ) +set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE + "" + CACHE STRING "Flags used by the shared libraries linker during coverage builds." + FORCE ) +mark_as_advanced( + CMAKE_Fortran_FLAGS_COVERAGE + CMAKE_CXX_FLAGS_COVERAGE + CMAKE_C_FLAGS_COVERAGE + CMAKE_EXE_LINKER_FLAGS_COVERAGE + CMAKE_SHARED_LINKER_FLAGS_COVERAGE ) + +get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) +if(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG)) + message(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading") +endif() # NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG) + +if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + link_libraries(gcov) +endif() + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_lcov( +# NAME testrunner_coverage # New target name +# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES testrunner # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# NO_DEMANGLE # Don't demangle C++ symbols +# # even if c++filt is found +# ) +function(setup_target_for_coverage_lcov) + + set(options NO_DEMANGLE SONARQUBE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES LCOV_ARGS GENHTML_ARGS) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT LCOV_PATH) + message(FATAL_ERROR "lcov not found! Aborting...") + endif() # NOT LCOV_PATH + + if(NOT GENHTML_PATH) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() # NOT GENHTML_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(LCOV_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_LCOV_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND LCOV_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES LCOV_EXCLUDES) + + # Conditional arguments + if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) + set(GENHTML_EXTRA_ARGS "--demangle-cpp") + endif() + + # Setting up commands which will be run to generate coverage data. + # Cleanup lcov + set(LCOV_CLEAN_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -directory . + -b ${BASEDIR} --zerocounters + ) + # Create baseline to make sure untouched files show up in the report + set(LCOV_BASELINE_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -c -i -d . -b + ${BASEDIR} -o ${Coverage_NAME}.base + ) + # Run tests + set(LCOV_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Capturing lcov counters and generating report + set(LCOV_CAPTURE_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --directory . -b + ${BASEDIR} --capture --output-file ${Coverage_NAME}.capture + ) + # add baseline counters + set(LCOV_BASELINE_COUNT_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -a ${Coverage_NAME}.base + -a ${Coverage_NAME}.capture --output-file ${Coverage_NAME}.total + ) + # filter collected data to final coverage report + set(LCOV_FILTER_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --remove + ${Coverage_NAME}.total ${LCOV_EXCLUDES} --output-file ${Coverage_NAME}.info + ) + # Generate HTML output + set(LCOV_GEN_HTML_CMD + ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} -o + ${Coverage_NAME} ${Coverage_NAME}.info + ) + if(${Coverage_SONARQUBE}) + # Generate SonarQube output + set(GCOVR_XML_CMD + ${GCOVR_PATH} --sonarqube ${Coverage_NAME}_sonarqube.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + set(GCOVR_XML_CMD_COMMAND + COMMAND ${GCOVR_XML_CMD} + ) + set(GCOVR_XML_CMD_BYPRODUCTS ${Coverage_NAME}_sonarqube.xml) + set(GCOVR_XML_CMD_COMMENT COMMENT "SonarQube code coverage info report saved in ${Coverage_NAME}_sonarqube.xml.") + endif() + + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + message(STATUS "Command to clean up lcov: ") + string(REPLACE ";" " " LCOV_CLEAN_CMD_SPACED "${LCOV_CLEAN_CMD}") + message(STATUS "${LCOV_CLEAN_CMD_SPACED}") + + message(STATUS "Command to create baseline: ") + string(REPLACE ";" " " LCOV_BASELINE_CMD_SPACED "${LCOV_BASELINE_CMD}") + message(STATUS "${LCOV_BASELINE_CMD_SPACED}") + + message(STATUS "Command to run the tests: ") + string(REPLACE ";" " " LCOV_EXEC_TESTS_CMD_SPACED "${LCOV_EXEC_TESTS_CMD}") + message(STATUS "${LCOV_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to capture counters and generate report: ") + string(REPLACE ";" " " LCOV_CAPTURE_CMD_SPACED "${LCOV_CAPTURE_CMD}") + message(STATUS "${LCOV_CAPTURE_CMD_SPACED}") + + message(STATUS "Command to add baseline counters: ") + string(REPLACE ";" " " LCOV_BASELINE_COUNT_CMD_SPACED "${LCOV_BASELINE_COUNT_CMD}") + message(STATUS "${LCOV_BASELINE_COUNT_CMD_SPACED}") + + message(STATUS "Command to filter collected data: ") + string(REPLACE ";" " " LCOV_FILTER_CMD_SPACED "${LCOV_FILTER_CMD}") + message(STATUS "${LCOV_FILTER_CMD_SPACED}") + + message(STATUS "Command to generate lcov HTML output: ") + string(REPLACE ";" " " LCOV_GEN_HTML_CMD_SPACED "${LCOV_GEN_HTML_CMD}") + message(STATUS "${LCOV_GEN_HTML_CMD_SPACED}") + + if(${Coverage_SONARQUBE}) + message(STATUS "Command to generate SonarQube XML output: ") + string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}") + message(STATUS "${GCOVR_XML_CMD_SPACED}") + endif() + endif() + + # Setup target + add_custom_target(${Coverage_NAME} + COMMAND ${LCOV_CLEAN_CMD} + COMMAND ${LCOV_BASELINE_CMD} + COMMAND ${LCOV_EXEC_TESTS_CMD} + COMMAND ${LCOV_CAPTURE_CMD} + COMMAND ${LCOV_BASELINE_COUNT_CMD} + COMMAND ${LCOV_FILTER_CMD} + COMMAND ${LCOV_GEN_HTML_CMD} + ${GCOVR_XML_CMD_COMMAND} + + # Set output files as GENERATED (will be removed on 'make clean') + BYPRODUCTS + ${Coverage_NAME}.base + ${Coverage_NAME}.capture + ${Coverage_NAME}.total + ${Coverage_NAME}.info + ${GCOVR_XML_CMD_BYPRODUCTS} + ${Coverage_NAME}/index.html + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report." + ) + + # Show where to find the lcov info report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Lcov code coverage info report saved in ${Coverage_NAME}.info." + ${GCOVR_XML_CMD_COMMENT} + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." + ) + +endfunction() # setup_target_for_coverage_lcov + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_gcovr_xml( +# NAME ctest_coverage # New target name +# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES executable_target # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# ) +# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the +# GCVOR command. +function(setup_target_for_coverage_gcovr_xml) + + set(options NONE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT GCOVR_PATH) + message(FATAL_ERROR "gcovr not found! Aborting...") + endif() # NOT GCOVR_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(GCOVR_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES GCOVR_EXCLUDES) + + # Combine excludes to several -e arguments + set(GCOVR_EXCLUDE_ARGS "") + foreach(EXCLUDE ${GCOVR_EXCLUDES}) + list(APPEND GCOVR_EXCLUDE_ARGS "-e") + list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") + endforeach() + + # Set up commands which will be run to generate coverage data + # Run tests + set(GCOVR_XML_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Running gcovr + set(GCOVR_XML_CMD + ${GCOVR_PATH} --xml ${Coverage_NAME}.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + + message(STATUS "Command to run tests: ") + string(REPLACE ";" " " GCOVR_XML_EXEC_TESTS_CMD_SPACED "${GCOVR_XML_EXEC_TESTS_CMD}") + message(STATUS "${GCOVR_XML_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to generate gcovr XML coverage data: ") + string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}") + message(STATUS "${GCOVR_XML_CMD_SPACED}") + endif() + + add_custom_target(${Coverage_NAME} + COMMAND ${GCOVR_XML_EXEC_TESTS_CMD} + COMMAND ${GCOVR_XML_CMD} + + BYPRODUCTS ${Coverage_NAME}.xml + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Running gcovr to produce Cobertura code coverage report." + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Cobertura code coverage report saved in ${Coverage_NAME}.xml." + ) +endfunction() # setup_target_for_coverage_gcovr_xml + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_gcovr_html( +# NAME ctest_coverage # New target name +# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES executable_target # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# ) +# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the +# GCVOR command. +function(setup_target_for_coverage_gcovr_html) + + set(options NONE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT GCOVR_PATH) + message(FATAL_ERROR "gcovr not found! Aborting...") + endif() # NOT GCOVR_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(GCOVR_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES GCOVR_EXCLUDES) + + # Combine excludes to several -e arguments + set(GCOVR_EXCLUDE_ARGS "") + foreach(EXCLUDE ${GCOVR_EXCLUDES}) + list(APPEND GCOVR_EXCLUDE_ARGS "-e") + list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") + endforeach() + + # Set up commands which will be run to generate coverage data + # Run tests + set(GCOVR_HTML_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Create folder + set(GCOVR_HTML_FOLDER_CMD + ${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/${Coverage_NAME} + ) + # Running gcovr + set(GCOVR_HTML_CMD + ${GCOVR_PATH} --html ${Coverage_NAME}/index.html --html-details -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + + message(STATUS "Command to run tests: ") + string(REPLACE ";" " " GCOVR_HTML_EXEC_TESTS_CMD_SPACED "${GCOVR_HTML_EXEC_TESTS_CMD}") + message(STATUS "${GCOVR_HTML_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to create a folder: ") + string(REPLACE ";" " " GCOVR_HTML_FOLDER_CMD_SPACED "${GCOVR_HTML_FOLDER_CMD}") + message(STATUS "${GCOVR_HTML_FOLDER_CMD_SPACED}") + + message(STATUS "Command to generate gcovr HTML coverage data: ") + string(REPLACE ";" " " GCOVR_HTML_CMD_SPACED "${GCOVR_HTML_CMD}") + message(STATUS "${GCOVR_HTML_CMD_SPACED}") + endif() + + add_custom_target(${Coverage_NAME} + COMMAND ${GCOVR_HTML_EXEC_TESTS_CMD} + COMMAND ${GCOVR_HTML_FOLDER_CMD} + COMMAND ${GCOVR_HTML_CMD} + + BYPRODUCTS ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html # report directory + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Running gcovr to produce HTML code coverage report." + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." + ) + +endfunction() # setup_target_for_coverage_gcovr_html + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_fastcov( +# NAME testrunner_coverage # New target name +# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES testrunner # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/" "src/dir2/" # Patterns to exclude. +# NO_DEMANGLE # Don't demangle C++ symbols +# # even if c++filt is found +# SKIP_HTML # Don't create html report +# POST_CMD perl -i -pe s!${PROJECT_SOURCE_DIR}/!!g ctest_coverage.json # E.g. for stripping source dir from file paths +# ) +function(setup_target_for_coverage_fastcov) + + set(options NO_DEMANGLE SKIP_HTML) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES FASTCOV_ARGS GENHTML_ARGS POST_CMD) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT FASTCOV_PATH) + message(FATAL_ERROR "fastcov not found! Aborting...") + endif() + + if(NOT Coverage_SKIP_HTML AND NOT GENHTML_PATH) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (Patterns, not paths, for fastcov) + set(FASTCOV_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_FASTCOV_EXCLUDES}) + list(APPEND FASTCOV_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES FASTCOV_EXCLUDES) + + # Conditional arguments + if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) + set(GENHTML_EXTRA_ARGS "--demangle-cpp") + endif() + + # Set up commands which will be run to generate coverage data + set(FASTCOV_EXEC_TESTS_CMD ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}) + + set(FASTCOV_CAPTURE_CMD ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} + --search-directory ${BASEDIR} + --process-gcno + --output ${Coverage_NAME}.json + --exclude ${FASTCOV_EXCLUDES} + ) + + set(FASTCOV_CONVERT_CMD ${FASTCOV_PATH} + -C ${Coverage_NAME}.json --lcov --output ${Coverage_NAME}.info + ) + + if(Coverage_SKIP_HTML) + set(FASTCOV_HTML_CMD ";") + else() + set(FASTCOV_HTML_CMD ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} + -o ${Coverage_NAME} ${Coverage_NAME}.info + ) + endif() + + set(FASTCOV_POST_CMD ";") + if(Coverage_POST_CMD) + set(FASTCOV_POST_CMD ${Coverage_POST_CMD}) + endif() + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Code coverage commands for target ${Coverage_NAME} (fastcov):") + + message(" Running tests:") + string(REPLACE ";" " " FASTCOV_EXEC_TESTS_CMD_SPACED "${FASTCOV_EXEC_TESTS_CMD}") + message(" ${FASTCOV_EXEC_TESTS_CMD_SPACED}") + + message(" Capturing fastcov counters and generating report:") + string(REPLACE ";" " " FASTCOV_CAPTURE_CMD_SPACED "${FASTCOV_CAPTURE_CMD}") + message(" ${FASTCOV_CAPTURE_CMD_SPACED}") + + message(" Converting fastcov .json to lcov .info:") + string(REPLACE ";" " " FASTCOV_CONVERT_CMD_SPACED "${FASTCOV_CONVERT_CMD}") + message(" ${FASTCOV_CONVERT_CMD_SPACED}") + + if(NOT Coverage_SKIP_HTML) + message(" Generating HTML report: ") + string(REPLACE ";" " " FASTCOV_HTML_CMD_SPACED "${FASTCOV_HTML_CMD}") + message(" ${FASTCOV_HTML_CMD_SPACED}") + endif() + if(Coverage_POST_CMD) + message(" Running post command: ") + string(REPLACE ";" " " FASTCOV_POST_CMD_SPACED "${FASTCOV_POST_CMD}") + message(" ${FASTCOV_POST_CMD_SPACED}") + endif() + endif() + + # Setup target + add_custom_target(${Coverage_NAME} + + # Cleanup fastcov + COMMAND ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} + --search-directory ${BASEDIR} + --zerocounters + + COMMAND ${FASTCOV_EXEC_TESTS_CMD} + COMMAND ${FASTCOV_CAPTURE_CMD} + COMMAND ${FASTCOV_CONVERT_CMD} + COMMAND ${FASTCOV_HTML_CMD} + COMMAND ${FASTCOV_POST_CMD} + + # Set output files as GENERATED (will be removed on 'make clean') + BYPRODUCTS + ${Coverage_NAME}.info + ${Coverage_NAME}.json + ${Coverage_NAME}/index.html # report directory + + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Resetting code coverage counters to zero. Processing code coverage counters and generating report." + ) + + set(INFO_MSG "fastcov code coverage info report saved in ${Coverage_NAME}.info and ${Coverage_NAME}.json.") + if(NOT Coverage_SKIP_HTML) + string(APPEND INFO_MSG " Open ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html in your browser to view the coverage report.") + endif() + # Show where to find the fastcov info report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E echo ${INFO_MSG} + ) + +endfunction() # setup_target_for_coverage_fastcov + +function(append_coverage_compiler_flags) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + message(STATUS "Appending code coverage compiler flags: ${COVERAGE_COMPILER_FLAGS}") +endfunction() # append_coverage_compiler_flags + +# Setup coverage for specific library +function(append_coverage_compiler_flags_to_target name) + separate_arguments(_flag_list NATIVE_COMMAND "${COVERAGE_COMPILER_FLAGS}") + target_compile_options(${name} PRIVATE ${_flag_list}) + if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_link_libraries(${name} PRIVATE gcov) + endif() +endfunction() diff --git a/cmake/modules/StaticAnalyzers.cmake b/cmake/modules/StaticAnalyzers.cmake new file mode 100644 index 0000000..8a15aed --- /dev/null +++ b/cmake/modules/StaticAnalyzers.cmake @@ -0,0 +1,22 @@ +if(ENABLE_CLANG_TIDY) + find_program(CLANG_TIDY_COMMAND NAMES clang-tidy) + + if(NOT CLANG_TIDY_COMMAND) + message(WARNING "🔴 CMake_RUN_CLANG_TIDY is ON but clang-tidy is not found!") + set(CMAKE_CXX_CLANG_TIDY "" CACHE STRING "" FORCE) + else() + + message(STATUS "🟢 CMake_RUN_CLANG_TIDY is ON") + set(CLANGTIDY_EXTRA_ARGS + "-extra-arg=-Wno-unknown-warning-option" + ) + set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND};-p=${CMAKE_BINARY_DIR};${CLANGTIDY_EXTRA_ARGS}" CACHE STRING "" FORCE) + + add_custom_target(clang-tidy + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target ${CMAKE_PROJECT_NAME} + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target clang-tidy + COMMENT "Running clang-tidy..." + ) + set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + endif() +endif(ENABLE_CLANG_TIDY) diff --git a/lib/Files/ArffFiles.cc b/lib/Files/ArffFiles.cc index 4039b0b..99f29bd 100644 --- a/lib/Files/ArffFiles.cc +++ b/lib/Files/ArffFiles.cc @@ -4,11 +4,9 @@ #include #include -using namespace std; - ArffFiles::ArffFiles() = default; -vector ArffFiles::getLines() const +std::vector ArffFiles::getLines() const { return lines; } @@ -18,48 +16,48 @@ unsigned long int ArffFiles::getSize() const return lines.size(); } -vector> ArffFiles::getAttributes() const +std::vector> ArffFiles::getAttributes() const { return attributes; } -string ArffFiles::getClassName() const +std::string ArffFiles::getClassName() const { return className; } -string ArffFiles::getClassType() const +std::string ArffFiles::getClassType() const { return classType; } -vector>& ArffFiles::getX() +std::vector>& ArffFiles::getX() { return X; } -vector& ArffFiles::getY() +std::vector& ArffFiles::getY() { return y; } -void ArffFiles::loadCommon(string fileName) +void ArffFiles::loadCommon(std::string fileName) { - ifstream file(fileName); + std::ifstream file(fileName); if (!file.is_open()) { - throw invalid_argument("Unable to open file"); + throw std::invalid_argument("Unable to open file"); } - string line; - string keyword; - string attribute; - string type; - string type_w; + std::string line; + std::string keyword; + std::string attribute; + std::string type; + std::string type_w; while (getline(file, line)) { if (line.empty() || line[0] == '%' || line == "\r" || line == " ") { continue; } - if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) { - stringstream ss(line); + if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) { + std::stringstream ss(line); ss >> keyword >> attribute; type = ""; while (ss >> type_w) @@ -74,35 +72,35 @@ void ArffFiles::loadCommon(string fileName) } file.close(); if (attributes.empty()) - throw invalid_argument("No attributes found"); + throw std::invalid_argument("No attributes found"); } -void ArffFiles::load(const string& fileName, bool classLast) +void ArffFiles::load(const std::string& fileName, bool classLast) { int labelIndex; loadCommon(fileName); if (classLast) { - className = get<0>(attributes.back()); - classType = get<1>(attributes.back()); + className = std::get<0>(attributes.back()); + classType = std::get<1>(attributes.back()); attributes.pop_back(); labelIndex = static_cast(attributes.size()); } else { - className = get<0>(attributes.front()); - classType = get<1>(attributes.front()); + className = std::get<0>(attributes.front()); + classType = std::get<1>(attributes.front()); attributes.erase(attributes.begin()); labelIndex = 0; } generateDataset(labelIndex); } -void ArffFiles::load(const string& fileName, const string& name) +void ArffFiles::load(const std::string& fileName, const std::string& name) { int labelIndex; loadCommon(fileName); bool found = false; for (int i = 0; i < attributes.size(); ++i) { if (attributes[i].first == name) { - className = get<0>(attributes[i]); - classType = get<1>(attributes[i]); + className = std::get<0>(attributes[i]); + classType = std::get<1>(attributes[i]); attributes.erase(attributes.begin() + i); labelIndex = i; found = true; @@ -110,19 +108,19 @@ void ArffFiles::load(const string& fileName, const string& name) } } if (!found) { - throw invalid_argument("Class name not found"); + throw std::invalid_argument("Class name not found"); } generateDataset(labelIndex); } void ArffFiles::generateDataset(int labelIndex) { - X = vector>(attributes.size(), vector(lines.size())); - auto yy = vector(lines.size(), ""); - auto removeLines = vector(); // Lines with missing values + X = std::vector>(attributes.size(), std::vector(lines.size())); + auto yy = std::vector(lines.size(), ""); + auto removeLines = std::vector(); // Lines with missing values for (size_t i = 0; i < lines.size(); i++) { - stringstream ss(lines[i]); - string value; + std::stringstream ss(lines[i]); + std::string value; int pos = 0; int xIndex = 0; while (getline(ss, value, ',')) { @@ -146,21 +144,21 @@ void ArffFiles::generateDataset(int labelIndex) y = factorize(yy); } -string ArffFiles::trim(const string& source) +std::string ArffFiles::trim(const std::string& source) { - string s(source); + std::string s(source); s.erase(0, s.find_first_not_of(" '\n\r\t")); s.erase(s.find_last_not_of(" '\n\r\t") + 1); return s; } -vector ArffFiles::factorize(const vector& labels_t) +std::vector ArffFiles::factorize(const std::vector& labels_t) { - vector yy; + std::vector yy; yy.reserve(labels_t.size()); - map labelMap; + std::map labelMap; int i = 0; - for (const string& label : labels_t) { + for (const std::string& label : labels_t) { if (labelMap.find(label) == labelMap.end()) { labelMap[label] = i++; } diff --git a/lib/Files/ArffFiles.h b/lib/Files/ArffFiles.h index 5cacb27..25e5a8c 100644 --- a/lib/Files/ArffFiles.h +++ b/lib/Files/ArffFiles.h @@ -4,31 +4,29 @@ #include #include -using namespace std; - class ArffFiles { private: - vector lines; - vector> attributes; - string className; - string classType; - vector> X; - vector y; + std::vector lines; + std::vector> attributes; + std::string className; + std::string classType; + std::vector> X; + std::vector y; void generateDataset(int); - void loadCommon(string); + void loadCommon(std::string); public: ArffFiles(); - void load(const string&, bool = true); - void load(const string&, const string&); - vector getLines() const; + void load(const std::string&, bool = true); + void load(const std::string&, const std::string&); + std::vector getLines() const; unsigned long int getSize() const; - string getClassName() const; - string getClassType() const; - static string trim(const string&); - vector>& getX(); - vector& getY(); - vector> getAttributes() const; - static vector factorize(const vector& labels_t); + std::string getClassName() const; + std::string getClassType() const; + static std::string trim(const std::string&); + std::vector>& getX(); + std::vector& getY(); + std::vector> getAttributes() const; + static std::vector factorize(const std::vector& labels_t); }; #endif \ No newline at end of file diff --git a/lib/json b/lib/json new file mode 160000 index 0000000..6eab7a2 --- /dev/null +++ b/lib/json @@ -0,0 +1 @@ +Subproject commit 6eab7a2b187b10b2494e39c1961750bfd1bda500 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9fd677d..a8dab52 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,9 +1,8 @@ include_directories(${PyWrap_SOURCE_DIR}/lib/Files) +include_directories(${PyWrap_SOURCE_DIR}/lib/json/include) include_directories(${Python3_INCLUDE_DIRS}) include_directories(${TORCH_INCLUDE_DIRS}) add_executable(main main.cc STree.cc SVC.cc RandomForest.cc PyClassifier.cc PyWrap.cc) -add_executable(example example.cpp PyWrap.cc) target_link_libraries(main ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles) -target_link_libraries(example ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles) diff --git a/src/Classifier.h b/src/Classifier.h new file mode 100644 index 0000000..cefff5e --- /dev/null +++ b/src/Classifier.h @@ -0,0 +1,13 @@ +#ifndef CLASSIFER_H +#define CLASSIFER_H +#include + +namespace pywrap { + class Classifier { + public: + Classifier() = default; + virtual ~Classifier() = default; + virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0; + }; +} /* namespace pywrap */ +#endif /* CLASSIFER_H */ \ No newline at end of file diff --git a/src/PyClassifier.cc b/src/PyClassifier.cc index d108b99..a323927 100644 --- a/src/PyClassifier.cc +++ b/src/PyClassifier.cc @@ -1,9 +1,10 @@ #include "PyClassifier.h" +#include namespace pywrap { namespace bp = boost::python; namespace np = boost::python::numpy; - PyClassifier::PyClassifier(const std::string& module, const std::string& className) : module(module), className(className) + PyClassifier::PyClassifier(const std::string& module, const std::string& className) : module(module), className(className), fitted(false) { pyWrap = PyWrap::GetInstance(); pyWrap->importClass(module, className); @@ -36,10 +37,14 @@ namespace pywrap { } PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) { + if (!fitted && hyperparameters.size() > 0) { + std::cout << "Setting hyperparameters" << std::endl; + } auto [Xn, yn] = tensors2numpy(X, y); CPyObject Xp = bp::incref(bp::object(Xn).ptr()); CPyObject yp = bp::incref(bp::object(yn).ptr()); pyWrap->fit(module, this->className, Xp, yp); + fitted = true; return *this; } torch::Tensor PyClassifier::predict(torch::Tensor& X) @@ -69,4 +74,19 @@ namespace pywrap { auto result = pyWrap->score(module, className, Xp, yp); return result; } + void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters) + { + // Check if hyperparameters are valid, default is no hyperparameters + const std::vector validKeys = { }; + checkHyperparameters(validKeys, hyperparameters); + this->hyperparameters = hyperparameters; + } + void PyClassifier::checkHyperparameters(const std::vector& validKeys, const nlohmann::json& hyperparameters) + { + for (const auto& item : hyperparameters.items()) { + if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) { + throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid"); + } + } + } } /* namespace pywrap */ \ No newline at end of file diff --git a/src/PyClassifier.h b/src/PyClassifier.h index 35f76a5..012a085 100644 --- a/src/PyClassifier.h +++ b/src/PyClassifier.h @@ -2,15 +2,17 @@ #define PYCLASSIFER_H #include "boost/python/detail/wrap_python.hpp" #include +#include #include #include #include #include #include #include "PyWrap.h" +#include "Classifier.h" namespace pywrap { - class PyClassifier { + class PyClassifier : public Classifier { public: PyClassifier(const std::string& module, const std::string& className); virtual ~PyClassifier(); @@ -19,11 +21,15 @@ namespace pywrap { double score(torch::Tensor& X, torch::Tensor& y); std::string version(); std::string callMethodString(const std::string& method); + void setHyperparameters(const nlohmann::json& hyperparameters) override; + protected: + void checkHyperparameters(const std::vector& validKeys, const nlohmann::json& hyperparameters); + nlohmann::json hyperparameters; private: PyWrap* pyWrap; std::string module; std::string className; + bool fitted; }; - } /* namespace pywrap */ #endif /* PYCLASSIFER_H */ \ No newline at end of file diff --git a/src/STree.cc b/src/STree.cc index 15fa1b4..01b3f19 100644 --- a/src/STree.cc +++ b/src/STree.cc @@ -5,4 +5,11 @@ namespace pywrap { { return callMethodString("graph"); } + void STree::setHyperparameters(const nlohmann::json& hyperparameters) + { + // Check if hyperparameters are valid + const std::vector validKeys = { "C", "n_jobs", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy" }; + checkHyperparameters(validKeys, hyperparameters); + this->hyperparameters = hyperparameters; + } } /* namespace pywrap */ \ No newline at end of file diff --git a/src/STree.h b/src/STree.h index 4319ef2..a803e71 100644 --- a/src/STree.h +++ b/src/STree.h @@ -1,5 +1,6 @@ #ifndef STREE_H #define STREE_H +#include "nlohmann/json.hpp" #include "PyClassifier.h" namespace pywrap { @@ -8,6 +9,7 @@ namespace pywrap { STree() : PyClassifier("stree", "Stree") {}; ~STree() = default; std::string graph(); + void setHyperparameters(const nlohmann::json& hyperparameters) override; }; } /* namespace pywrap */ #endif /* STREE_H */ \ No newline at end of file diff --git a/src/example.cpp b/src/example.cpp deleted file mode 100644 index e60b8af..0000000 --- a/src/example.cpp +++ /dev/null @@ -1,257 +0,0 @@ -#include "boost/python/detail/wrap_python.hpp" -#include -#include -#include -#include -#include -#include "ArffFiles.h" -#include "PyHelper.hpp" -#include "PyWrap.h" - - -void errorAbort(const std::string& message) -{ - std::cerr << message << std::endl; - PyErr_Print(); - exit(1); -} -void print_array(pywrap::np::ndarray& array) -{ - std::cout << "Array: " << std::endl; - std::cout << pywrap::p::extract(pywrap::p::str(array)) << std::endl; -} -// np::ndarray to_numpy_matrix(torch::Tensor& input_data, np::dtype numpy_dtype) -// { -// p::tuple shape = p::make_tuple(input_data.size(0), input_data.size(1)); -// auto tensor_dtype = input_data.dtype(); -// p::tuple stride = p::make_tuple(sizeof(tensor_dtype) * input_data.size(1), sizeof(tensor_dtype)); -// auto dito = input_data.transpose(1, 0); -// np::ndarray result = np::from_data(dito.data_ptr(), numpy_dtype, shape, stride, p::object()); -// return result; -// } -// np::ndarray to_numpy_vector(torch::Tensor& input_data, np::dtype numpy_dtype) -// { -// p::tuple shape = p::make_tuple(input_data.size(0)); -// auto tensor_dtype = input_data.dtype(); -// p::tuple stride = p::make_tuple(sizeof(tensor_dtype), sizeof(tensor_dtype)); -// np::ndarray result = np::from_data(input_data.data_ptr(), numpy_dtype, shape, stride, p::object()); -// return result; -// } - -class Paths { -public: - static string datasets() - { - return "../discretizbench/datasets/"; - } -}; - -tuple, string, map>> loadDataset(const string& name, bool class_last) -{ - auto handler = ArffFiles(); - handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); - // Get Dataset X, y - vector> X = handler.getX(); - vector y = handler.getY(); - // Get className & Features - auto className = handler.getClassName(); - vector features; - auto attributes = handler.getAttributes(); - transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); - torch::Tensor Xd; - auto states = map>(); - Xd = torch::zeros({ static_cast(X.size()), static_cast(X[0].size()) }, torch::kFloat32); - for (int i = 0; i < features.size(); ++i) { - Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32)); - } - return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; -} - -using namespace pywrap; -np::ndarray tensor2numpy(torch::Tensor& X) -{ - int m = X.size(0); - int n = X.size(1); - auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin(), p::make_tuple(m, n), p::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2), p::object()); - Xn = Xn.transpose(); - return Xn; -} -pair tensors2numpy(torch::Tensor& X, torch::Tensor& y) -{ - int n = X.size(1); - auto yn = np::from_data(y.data_ptr(), np::dtype::get_builtin(), p::make_tuple(n), p::make_tuple(sizeof(y.dtype()) * 2), p::object()); - return { tensor2numpy(X), yn }; -} -pair getData(const string& dataset) -{ - auto [X, y, featuresx, classNamex, statesx] = loadDataset(dataset, true); - auto [Xn, yn] = tensors2numpy(X, y); - auto Xn_shapes = Xn.get_shape(); - auto yn_shapes = yn.get_shape(); - cout << "Xn_shapes: " << Xn_shapes[0] << ", " << Xn_shapes[1] << endl; - cout << "yn_shapes: " << yn_shapes[0] << endl; - cout << "X shapes: " << X.sizes() << endl; - cout << "y shapes: " << y.sizes() << endl; - assert(Xn_shapes[0] == X.sizes()[0]); - assert(Xn_shapes[1] == X.sizes()[1]); - assert(yn_shapes[0] == y.sizes()[0]); - - return { Xn, yn }; -} -int main(int argc, char** argv) -{ - cout << "* Begin." << endl; - { - PyWrap* wrapper = PyWrap::GetInstance(); - string dataset = "iris"; - // Convert Tensor to numpy array - // auto [Xn, yn] = tensors2numpy(X, y); - // cout << "Numpy array data: " << endl; - // print_array(Xn); - // cout << "Numpy array labels: " << endl; - // print_array(yn); - // Import module - string moduleName = "stree"; - string className = "Stree"; - // Import - { - cout << "--Import Phase--" << endl; - wrapper->importClass(moduleName, className); - cout << "--Import Phase end--" << endl; - } - // Version - { - cout << "--Version Phase--" << endl; - auto version = wrapper->version(moduleName, className); - cout << "Version: " << version << endl; - cout << "--Version Phase end--" << endl; - } - // Fit - { - cout << "--Fit Phase--" << endl; - auto [Xn, yn] = getData(dataset); - auto Xn_shapes = Xn.get_shape(); - auto yn_shapes = yn.get_shape(); - CPyObject Xp = boost::python::incref(boost::python::object(Xn).ptr()); - CPyObject yp = boost::python::incref(boost::python::object(yn).ptr()); - //print_array(yn); - // Call fit - cout << "Calling fit" << endl; - wrapper->fit(moduleName, className, Xp, yp); - cout << "--Fit Phase end--" << endl; - } - // Score - { - cout << "--Score Phase--" << endl; - auto [Xn, yn] = getData(dataset); - auto Xn_shapes = Xn.get_shape(); - auto yn_shapes = yn.get_shape(); - CPyObject Xp = boost::python::incref(boost::python::object(Xn).ptr()); - CPyObject yp = boost::python::incref(boost::python::object(yn).ptr()); - //print_array(yn); - // Call score - cout << "Calling score" << endl; - auto result = wrapper->score(moduleName, className, Xp, yp); - cout << "Score: " << result << endl; - cout << "--Score Phase end--" << endl; - } - // Call score - // { - // np::initialize(); - // cout << "--Score Phase--" << endl; - // auto [X, y, featuresx, classNamex, statesx] = loadDataset(dataset, true); - // auto [Xn, yn] = tensors2numpy(X, y); - // auto Xn_shapes = Xn.get_shape(); - // auto yn_shapes = yn.get_shape(); - // cout << "Xn_shapes: " << Xn_shapes[0] << ", " << Xn_shapes[1] << endl; - // cout << "yn_shapes: " << yn_shapes[0] << endl; - // cout << "X shapes: " << X.sizes() << endl; - // cout << "y shapes: " << y.sizes() << endl; - // assert(Xn_shapes[0] == X.sizes()[0]); - // assert(Xn_shapes[1] == X.sizes()[1]); - // assert(yn_shapes[0] == y.sizes()[0]); - // CPyObject Xp = Xn.ptr(); - // CPyObject yp = yn.ptr(); - // print_array(yn); - // cout << "Calling score" << endl; - // auto instance = wrapper->getClass(moduleName, className); - // CPyObject result; - // if (!(result = PyObject_CallMethod(instance, "score", "OO", Xp.getObject(), yp.getObject()))) - // errorAbort("Couldn't call method score"); - // auto score = PyFloat_AsDouble(result); - // //auto score = wrapper->score(moduleName, className, Xp, yp); - // cout << "Score: " << score << endl; - // cout << "--Score Phase end--" << endl; - // } - // Clean module - { - cout << "--Clean Phase--" << endl; - wrapper->clean(moduleName, className); - cout << "--Clean Phase end--" << endl; - } - } - cout << "* End." << endl; -} -// int main(int argc, char** argv) -// { -// auto [data_tensor, y_label, featuresx, classNamex, statesx] = loadDataset("iris", true); -// // CPyInstance pInstance; -// // auto wrapper = PyWrap(); -// PyWrap* wrapper = PyWrap::GetInstance(); -// // PyWrap* wrapper = PyWrap::GetInstance(); -// int m = data_tensor.size(0); -// int n = data_tensor.size(1); -// auto data_numpy = np::from_data(data_tensor.data_ptr(), np::dtype::get_builtin(), p::make_tuple(m, n), p::make_tuple(sizeof(data_tensor.dtype()) * 2 * n, sizeof(data_tensor.dtype()) * 2), p::object()); -// data_numpy = data_numpy.transpose(); -// auto y_numpy = np::from_data(y_label.data_ptr(), np::dtype::get_builtin(), p::make_tuple(n), p::make_tuple(sizeof(y_label.dtype()) * 2), p::object()); -// cout << "Numpy array data: " << endl; -// print_array(data_numpy); -// cout << "Numpy array labels: " << endl; -// print_array(y_numpy); -// cout << "primero" << endl; -// CPyObject p = data_numpy.ptr(); -// CPyObject yp = y_numpy.ptr(); -// string moduleName = "sklearn.svm"; -// string className = "SVC"; -// string method = "_repr_html_"; -// // CPyObject module = PyImport_ImportModule(moduleName.c_str()); -// // if (PyErr_Occurred()) { -// // errorAbort("Could't import module " + moduleName); -// // } -// // CPyObject classObject = PyObject_GetAttrString(module, className.c_str()); -// // if (PyErr_Occurred()) { -// // errorAbort("Couldn't find class " + className); -// // } -// // CPyObject instance = PyObject_CallObject(classObject, NULL); -// // if (PyErr_Occurred()) { -// // errorAbort("Couldn't create instance of class " + className); -// // } -// // wrapper.moduleClassMap.insert({ { moduleName, className }, { module, classObject, instance } }); -// wrapper->importClass(moduleName, className); -// PyObject* instance = wrapper->getClass(moduleName, className); -// CPyObject result; -// if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL))) -// errorAbort("Couldn't call method " + method); -// std::string value = PyUnicode_AsUTF8(result); -// cout << "Version: " << value << endl; -// cout << "Calling fit" << endl; -// p.AddRef(); -// yp.AddRef(); -// method = "fit"; -// wrapper->fit(moduleName, className, p, yp); -// // PyObject* instance2 = wrapper->getClass(moduleName, className); -// // if (!(result = PyObject_CallMethodObjArgs(instance2, PyUnicode_FromString(method.c_str()), p.getObject(), yp.getObject(), NULL))) -// // errorAbort("Couldn't call method fit"); -// // method = "fit"; -// // if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), p.getObject(), yp.getObject(), NULL))) -// // errorAbort("Couldn't call method fit"); -// cout << "Calling score" << endl; -// // method = "score"; -// // if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), p.getObject(), yp.getObject(), NULL))) -// // errorAbort("Couldn't call method score"); -// // float score = PyFloat_AsDouble(result); -// auto score = wrapper->score(moduleName, className, p, yp); -// cout << "Score: " << score << endl; -// wrapper->clean(moduleName, className); -// return 0; -// } \ No newline at end of file diff --git a/src/main.cc b/src/main.cc index a7e1f81..3f96720 100644 --- a/src/main.cc +++ b/src/main.cc @@ -52,6 +52,8 @@ int main(int argc, char* argv[]) cout << "X: " << X.sizes() << endl; cout << "y: " << y.sizes() << endl; auto clf = pywrap::STree(); + auto hyperparameters = nlohmann::json({ "max_depth": 3, "C" : 0.7 }); + clf.setHyperparameters(hyperparameters); cout << "STree Version: " << clf.version() << endl; auto svc = pywrap::SVC(); svc.fit(X, y, features, className, states);