Compare commits

...

11 Commits

28 changed files with 727 additions and 1100 deletions

1
.gitignore vendored
View File

@@ -38,3 +38,4 @@ cmake-build*/**
.idea
puml/**
.vscode/settings.json
CMakeUserPresets.json

10
.gitmodules vendored
View File

@@ -1,10 +0,0 @@
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
[submodule "lib/catch2"]
path = lib/catch2
url = https://github.com/catchorg/Catch2.git
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp

101
CLAUDE.md Normal file
View File

@@ -0,0 +1,101 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Overview
PyClassifiers is a C++ library that provides wrappers for Python machine learning classifiers. It enables C++ applications to use Python-based ML algorithms (scikit-learn, XGBoost, custom implementations) through a unified interface.
## Essential Commands
### Build System
```bash
# Setup build configurations
make debug # Configure debug build with testing and coverage
make release # Configure release build
# Build targets
make buildd # Build debug version
make buildr # Build release version
# Testing
make test # Run all unit tests
make test opt="-s" # Run tests with verbose output
make test opt="-c='Test Name'" # Run specific test section
# Coverage
make coverage # Run tests and generate coverage report
# Installation
sudo make install # Install library to system (requires release build)
# Utilities
make clean # Clean test artifacts
make help # Show all available targets
```
### Dependencies
- Requires Conan package manager (`pip install conan`)
- Miniconda installation required for Python classifiers
- Boost library (preferably system package: `sudo dnf install boost-devel`)
## Architecture
### Core Components
**PyWrap** (`pyclfs/PyWrap.h`): Singleton managing Python interpreter lifecycle and thread-safe Python/C++ communication.
**PyClassifier** (`pyclfs/PyClassifier.h`): Abstract base class inheriting from `bayesnet::BaseClassifier`. All Python classifier wrappers extend this class.
**Individual Classifiers**: Each classifier (STree, ODTE, SVC, RandomForest, XGBoost, AdaBoostPy) wraps specific Python modules with consistent C++ interface.
### Data Flow
- Uses PyTorch tensors for efficient C++/Python data exchange
- JSON-based hyperparameter configuration
- Automatic memory management for Python objects
## Key Directories
- `pyclfs/` - Core library source code
- `tests/` - Catch2 unit tests with ARFF test datasets
- `build_debug/` - Debug build artifacts
- `build_release/` - Release build artifacts
- `cmake/modules/` - Custom CMake modules
## Development Patterns
### Adding New Classifiers
1. Inherit from `PyClassifier` base class
2. Implement required virtual methods: `fit()`, `predict()`, `predict_proba()`
3. Use `PyWrap::getInstance()` for Python interpreter access
4. Handle hyperparameters via JSON configuration
5. Add corresponding unit tests in `tests/TestPythonClassifiers.cc`
### Python Integration
- All Python interactions go through PyWrap singleton
- Use RAII pattern for Python object management
- Convert data using PyTorch tensors (discrete/continuous data support)
- Handle Python exceptions and convert to C++ exceptions
### Testing
- Catch2 framework with parameterized tests using GENERATE()
- Test data in ARFF format located in `tests/data/`
- Performance benchmarks validate expected accuracy scores
- Coverage reports generated with gcovr
## Important Files
- `pyclfs/PyWrap.h` - Python interpreter management
- `pyclfs/PyClassifier.h` - Base classifier interface
- `CMakeLists.txt` - Main build configuration
- `Makefile` - Build automation and common tasks
- `conanfile.py` - Package dependencies
- `tests/TestPythonClassifiers.cc` - Main test suite
## Technical Requirements
- C++17 standard compliance
- Python 3.11+ required
- Boost library with Python and NumPy support
- PyTorch for tensor operations
- Thread-safe design for concurrent usage

View File

@@ -1,4 +1,5 @@
cmake_minimum_required(VERSION 3.20)
project(PyClassifiers
VERSION 1.0.3
DESCRIPTION "Python Classifiers Wrapper."
@@ -6,15 +7,7 @@ project(PyClassifiers
LANGUAGES CXX
)
if (CODE_COVERAGE AND NOT ENABLE_TESTING)
MESSAGE(FATAL_ERROR "Code coverage requires testing enabled")
endif (CODE_COVERAGE AND NOT ENABLE_TESTING)
find_package(Torch REQUIRED)
if (POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif ()
cmake_policy(SET CMP0135 NEW)
# Global CMake variables
# ----------------------
@@ -22,14 +15,23 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Debug mode")
else(CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Release mode")
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
# Options
# -------
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
option(INSTALL_GTEST "Enable installation of googletest." OFF)
# External libraries
# ------------------
find_package(Torch REQUIRED)
find_package(nlohmann_json CONFIG REQUIRED)
find_package(bayesnet CONFIG REQUIRED)
# Boost Library
set(Boost_USE_STATIC_LIBS OFF)
@@ -45,42 +47,24 @@ endif()
find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}")
# CMakes modules
# --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule)
if (CODE_COVERAGE)
enable_testing()
include(CodeCoverage)
MESSAGE("Code coverage enabled")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE)
if (ENABLE_CLANG_TIDY)
include(StaticAnalyzers) # clang-tidy
endif (ENABLE_CLANG_TIDY)
# External libraries - dependencies of PyClassifiers
# --------------------------------------------------
find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED)
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ${PyClassifiers_SOURCE_DIR}/../lib/include)
message(STATUS "BayesNet=${BayesNet}")
message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
# Subdirectories
# --------------
# Add the library
# ---------------
add_subdirectory(pyclfs)
# Testing
# -------
if (ENABLE_TESTING)
MESSAGE("Testing enabled")
add_git_submodule(lib/catch2)
add_git_submodule(lib/mdlp)
add_subdirectory(lib/Files)
find_package(Catch2 CONFIG REQUIRED)
find_package(arff-files CONFIG REQUIRED)
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
endif()
## Configure test data path
cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
configure_file(tests/config/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
enable_testing()
include(CTest)
add_subdirectory(tests)
endif (ENABLE_TESTING)
@@ -90,6 +74,6 @@ endif (ENABLE_TESTING)
install(TARGETS PyClassifiers
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
CONFIGURATIONS Release)
install(DIRECTORY pyclfs/ DESTINATION include/pyclassifiers FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h" PATTERN "*.hpp")
install(FILES ${Bayesnet_INCLUDE_DIRS}/bayesnet/config.h DESTINATION include/pyclassifiers CONFIGURATIONS Release)
)
install(DIRECTORY pyclfs/ DESTINATION include/pyclassifiers FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")

View File

@@ -3,6 +3,14 @@ f_debug = build_debug
app_targets = PyClassifiers
test_targets = unit_tests_pyclassifiers
# Set the number of parallel jobs to the number of available processors minus 7
CPUS := $(shell getconf _NPROCESSORS_ONLN 2>/dev/null \
|| nproc --all 2>/dev/null \
|| sysctl -n hw.ncpu)
# --- Your desired job count: CPUs 7, but never less than 1 --------------
JOBS := $(shell n=$(CPUS); [ $${n} -gt 7 ] && echo $$((n-7)) || echo 1)
define ClearTests
@for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \
@@ -16,6 +24,25 @@ define ClearTests
fi ;
endef
define build_target
@echo ">>> Building the project for $(1)..."
@if [ -d $(2) ]; then rm -fr $(2); fi
@conan install . --build=missing -of $(2) -s build_type=$(1)
@cmake -S . -B $(2) -DCMAKE_TOOLCHAIN_FILE=$(2)/build/$(1)/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=$(1) -D$(3)
@echo ">>> Will build using $(JOBS) parallel jobs"
echo ">>> Done"
endef
define compile_target
@echo ">>> Compiling for $(1)..."
if [ "$(3)" != "" ]; then \
target="-t$(3)"; \
else \
target=""; \
fi
@cmake --build $(2) --config $(1) --parallel $(JOBS) $(target)
@echo ">>> Done"
endef
setup: ## Install dependencies for tests and coverage
@if [ "$(shell uname)" = "Darwin" ]; then \
@@ -32,10 +59,10 @@ dependency: ## Create a dependency graph diagram of the project (build/dependenc
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) --parallel
@$(call compile_target,"Debug","$(f_debug)")
buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) --parallel
@$(call compile_target,"Release","$(f_release)")
clean: ## Clean the tests info
@echo ">>> Cleaning Debug PyClassifiers tests...";
@@ -48,19 +75,11 @@ install: ## Install library
@cmake --install $(f_release) --prefix $(prefix)
@echo ">>> Done";
debug: ## Build a debug version of the project
@echo ">>> Building Debug PyClassifiers...";
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@mkdir $(f_debug);
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
@echo ">>> Done";
debug: ## Build a debug version of the project with Conan
@$(call build_target,"Debug","$(f_debug)", "ENABLE_TESTING=ON")
release: ## Build a Release version of the project
@echo ">>> Building Release PyClassifiers...";
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release);
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
@echo ">>> Done";
release: ## Build a Release version of the project with Conan
@$(call build_target,"Release","$(f_release)", "ENABLE_TESTING=OFF")
opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@@ -81,6 +100,24 @@ coverage: ## Run tests and generate coverage report (build/index.html)
@gcovr $(f_debug)/tests
@echo ">>> Done";
# Conan package manager targets
# =============================
conan-create: ## Create Conan package
@echo ">>> Creating Conan package..."
@echo ">>> Creating Release build..."
@conan create . --build=missing -tf "" -s:a build_type=Release
@echo ">>> Creating Debug build..."
@conan create . --build=missing -tf "" -s:a build_type=Debug -o "&:enable_testing=False"
@echo ">>> Done"
conan-clean: ## Clean Conan cache and build folders
@echo ">>> Cleaning Conan cache and build folders..."
@conan remove "*" --confirm
@conan cache clean
@if test -d "$(f_release)" ; then rm -rf "$(f_release)"; fi
@if test -d "$(f_debug)" ; then rm -rf "$(f_debug)"; fi
@echo ">>> Done"
help: ## Show help message
@IFS=$$'\n' ; \

View File

@@ -52,6 +52,16 @@ Don't forget to add the export BOOST_ROOT statement to .bashrc or wherever it is
## Installation
### Prerequisites
Install Conan package manager:
```bash
pip install conan
```
### Build and Install
```bash
make release
make buildr

View File

@@ -0,0 +1,290 @@
# PyClassifiers Technical Analysis Report
## Executive Summary
PyClassifiers is a sophisticated C++ wrapper library for Python machine learning classifiers that demonstrates strong architectural design but contains several critical issues that need immediate attention. The codebase successfully bridges C++ and Python ML ecosystems but has significant vulnerabilities in memory management, thread safety, and security.
## Strengths
### ✅ Architecture & Design
- **Well-structured inheritance hierarchy** with consistent `PyClassifier` base class
- **Effective singleton pattern** for Python interpreter management
- **Clean abstraction layer** hiding Python complexity from C++ users
- **Modular design** allowing easy addition of new classifiers
- **PyTorch tensor integration** for efficient data exchange
### ✅ Functionality
- **Comprehensive ML classifier support** (scikit-learn, XGBoost, custom implementations)
- **Unified C++ interface** for diverse Python libraries
- **JSON-based hyperparameter configuration**
- **Cross-platform compatibility** through vcpkg and CMake
### ✅ Development Workflow
- **Automated build system** with debug/release configurations
- **Integrated testing** with Catch2 framework
- **Code coverage tracking** with gcovr
- **Package management** through vcpkg
## Critical Issues & Weaknesses
### 🚨 High Priority Issues
#### Memory Management Vulnerabilities
- **Location**: `pyclfs/PyHelper.hpp:38-63`, `pyclfs/PyClassifier.cc:20,28`
- **Issue**: Manual Python reference counting prone to leaks and double-free errors
- **Risk**: Memory corruption, application crashes
- **Example**: Incorrect tensor stride calculations could cause buffer overflows
#### Thread Safety Violations
- **Location**: `pyclfs/PyWrap.cc:92-96`, throughout Python operations
- **Issue**: Race conditions in singleton access, unprotected global state
- **Risk**: Data corruption, deadlocks in multi-threaded environments
- **Example**: `getClass()` method accesses `moduleClassMap` without mutex protection
#### Security Vulnerabilities
- **Location**: `pyclfs/PyWrap.cc:88`, build system
- **Issue**: Library calls `exit(1)` on errors, no input validation
- **Risk**: Denial of service, potential code injection
- **Example**: Unvalidated Python objects passed directly to interpreter
### 🔧 Medium Priority Issues
#### Build System Problems
- **Location**: `CMakeLists.txt:69-70`, `vcpkg.json:35`
- **Issue**: Fragile external dependencies, typos in configuration
- **Risk**: Build failures, supply chain vulnerabilities
- **Example**: Dependency on personal GitHub registry creates security risk
#### Error Handling Deficiencies
- **Location**: Throughout codebase, especially `pyclfs/PyWrap.cc`
- **Issue**: Inconsistent error handling, missing exception safety
- **Risk**: Unhandled exceptions, resource leaks
- **Example**: `errorAbort()` terminates process instead of throwing exceptions
#### Testing Inadequacies
- **Location**: `tests/` directory
- **Issue**: Limited test coverage, missing edge cases
- **Risk**: Undetected bugs, regression failures
- **Example**: No tests for error conditions or multi-threading
## Specific Code Issues
### Missing Declaration
```cpp
// File: pyclfs/PyClassifier.h - MISSING
protected:
std::vector<std::string> validHyperparameters; // This line missing
```
### Header Guard Mismatch
```cpp
// File: pyclfs/AdaBoostPy.h:15
#ifndef ADABOOSTPY_H
#define ADABOOSTPY_H
// ...
#endif /* ADABOOST_H */ // ❌ Should be ADABOOSTPY_H
```
### Unsafe Type Casting
```cpp
// File: pyclfs/PyClassifier.cc:97
long* data = reinterpret_cast<long*>(prediction.get_data()); // ❌ Unsafe
```
### Configuration Typo
```yaml
# File: vcpkg.json:35
"argpase" # ❌ Should be "argparse"
```
## Enhancement Proposals
### Immediate Actions (Critical)
1. **Fix memory management** - Implement RAII wrappers for Python objects
2. **Secure thread safety** - Add proper mutex protection for all shared state
3. **Replace exit() calls** - Use proper exception handling instead of process termination
4. **Fix configuration typos** - Correct vcpkg.json and header guards
5. **Add input validation** - Validate all data before Python operations
### Short-term Improvements (1-2 weeks)
6. **Enhance error handling** - Implement comprehensive exception hierarchy
7. **Improve test coverage** - Add edge cases, error conditions, and multi-threading tests
8. **Security hardening** - Add compiler security flags and static analysis
9. **Refactor build system** - Remove fragile dependencies and hardcoded paths
10. **Add performance testing** - Implement benchmarking and regression testing
### Long-term Enhancements (1-3 months)
11. **Implement async operations** - Add support for non-blocking ML operations
12. **Add model serialization** - Enable saving/loading trained models
13. **Expand classifier support** - Add more Python ML libraries
14. **Create comprehensive documentation** - API docs, examples, best practices
15. **Add monitoring/logging** - Implement structured logging and metrics
### Architectural Improvements
16. **Abstract Python details** - Hide Boost.Python implementation details
17. **Add configuration management** - Centralized configuration system
18. **Implement plugin architecture** - Dynamic classifier loading
19. **Add batch processing** - Efficient handling of large datasets
20. **Create C API** - Enable usage from other languages
## Validation Checklist
### Before Production Use
- [ ] Fix all memory management issues
- [ ] Implement proper thread safety
- [ ] Replace all exit() calls with exceptions
- [ ] Add comprehensive input validation
- [ ] Fix build system dependencies
- [ ] Achieve >90% test coverage
- [ ] Pass security static analysis
- [ ] Complete performance benchmarking
- [ ] Document all APIs
- [ ] Validate on multiple platforms
### Ongoing Maintenance
- [ ] Regular security audits
- [ ] Dependency vulnerability scanning
- [ ] Performance regression testing
- [ ] Memory leak detection
- [ ] Thread safety validation
- [ ] API compatibility testing
## Detailed Analysis Findings
### Core Architecture Analysis
The PyClassifiers library demonstrates a well-thought-out architecture with clear separation of concerns:
- **PyWrap**: Singleton managing Python interpreter lifecycle
- **PyClassifier**: Abstract base class providing unified interface
- **Individual Classifiers**: Concrete implementations for specific ML algorithms
However, several architectural decisions create vulnerabilities:
1. **Singleton Pattern Issues**: The PyWrap singleton lacks proper thread safety and creates global state dependencies
2. **Manual Resource Management**: Python object lifecycle management is error-prone
3. **Tight Coupling**: Direct Boost.Python dependencies throughout the codebase
### Python/C++ Integration Issues
The integration between Python and C++ reveals several critical problems:
#### Reference Counting Errors
```cpp
// In PyWrap.cc:245 - potential double increment
Py_INCREF(result);
return result; // Caller must free this object
```
#### Tensor Conversion Bugs
```cpp
// In PyClassifier.cc:20 - incorrect stride calculation
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<float>(),
bp::make_tuple(m, n),
bp::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2),
bp::object());
```
#### Inconsistent Error Handling
```cpp
// In PyWrap.cc:83-88 - terminates process instead of throwing
void PyWrap::errorAbort(const std::string& message) {
std::cerr << message << std::endl;
PyErr_Print();
RemoveInstance();
exit(1); // ❌ Should throw exception instead
}
```
### Memory Management Deep Dive
The memory management analysis reveals several critical issues:
#### Python Object Lifecycle
- Manual reference counting in `CPyObject` class
- Potential memory leaks in tensor conversion functions
- Inconsistent cleanup in destructor chains
#### Resource Cleanup
- `PyWrap::clean()` method has proper cleanup but is not exception-safe
- Missing RAII patterns for Python objects
- Global state cleanup issues in singleton destruction
### Thread Safety Analysis
Multiple thread safety violations were identified:
#### Unprotected Global State
```cpp
// In PyWrap.cc - unprotected access
std::map<clfId_t, std::tuple<PyObject*, PyObject*, PyObject*>> moduleClassMap;
```
#### Race Conditions
- `GetInstance()` method uses mutex but other methods don't
- Python interpreter operations lack proper synchronization
- Global variables accessed without protection
### Security Assessment
Several security vulnerabilities were found:
#### Input Validation
- No validation of tensor dimensions or data types
- Python objects passed directly without sanitization
- Missing bounds checking in array operations
#### Process Termination
- Library calls `exit(1)` which can be exploited for DoS
- No graceful error recovery mechanisms
- Process-wide Python interpreter state
### Testing Infrastructure Analysis
The testing infrastructure has significant gaps:
#### Coverage Gaps
- Only 4 small test datasets
- No error condition testing
- Missing multi-threading tests
- No performance regression testing
#### Test Quality Issues
- Hardcoded expected values without explanation
- No test isolation between test cases
- Limited API coverage
### Build System Assessment
The build system has several issues:
#### Dependency Management
- Fragile external dependencies with relative paths
- Personal GitHub registry creates supply chain risk
- Missing security-focused build flags
#### Configuration Problems
- Typos in vcpkg configuration
- Inconsistent CMake policies
- Missing platform-specific configurations
## Recommendations Priority Matrix
| Priority | Issue | Impact | Effort | Timeline |
|----------|-------|---------|--------|----------|
| Critical | Memory Management | High | Medium | 1 week |
| Critical | Thread Safety | High | Medium | 1 week |
| Critical | Security Vulnerabilities | High | Low | 3 days |
| High | Error Handling | Medium | Low | 1 week |
| High | Build System | Medium | Medium | 1 week |
| Medium | Testing Coverage | Medium | High | 2 weeks |
| Medium | Documentation | Low | High | 2 weeks |
| Low | Performance | Low | High | 1 month |
## Conclusion
The PyClassifiers library demonstrates solid architectural thinking and successfully provides a useful bridge between C++ and Python ML ecosystems. However, critical issues in memory management, thread safety, and security must be addressed before production use. The recommended fixes are achievable with focused effort and will significantly improve the library's robustness and security posture.
The library has strong potential for wider adoption once these fundamental issues are resolved. The modular architecture provides a good foundation for future enhancements and the integration with modern tools like PyTorch and vcpkg shows forward-thinking design decisions.
Immediate focus should be on the critical issues identified, followed by systematic improvement of testing infrastructure and documentation to ensure long-term maintainability and reliability.

View File

@@ -1,12 +0,0 @@
function(add_git_submodule dir)
find_package(Git REQUIRED)
if(NOT EXISTS ${dir}/CMakeLists.txt)
message(STATUS "🚨 Adding git submodule => ${dir}")
execute_process(COMMAND ${GIT_EXECUTABLE}
submodule update --init --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
endif()
add_subdirectory(${dir})
endfunction(add_git_submodule)

View File

@@ -1,742 +0,0 @@
# Copyright (c) 2012 - 2017, Lars Bilke
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# CHANGES:
#
# 2012-01-31, Lars Bilke
# - Enable Code Coverage
#
# 2013-09-17, Joakim Söderberg
# - Added support for Clang.
# - Some additional usage instructions.
#
# 2016-02-03, Lars Bilke
# - Refactored functions to use named parameters
#
# 2017-06-02, Lars Bilke
# - Merged with modified version from github.com/ufz/ogs
#
# 2019-05-06, Anatolii Kurotych
# - Remove unnecessary --coverage flag
#
# 2019-12-13, FeRD (Frank Dana)
# - Deprecate COVERAGE_LCOVR_EXCLUDES and COVERAGE_GCOVR_EXCLUDES lists in favor
# of tool-agnostic COVERAGE_EXCLUDES variable, or EXCLUDE setup arguments.
# - CMake 3.4+: All excludes can be specified relative to BASE_DIRECTORY
# - All setup functions: accept BASE_DIRECTORY, EXCLUDE list
# - Set lcov basedir with -b argument
# - Add automatic --demangle-cpp in lcovr, if 'c++filt' is available (can be
# overridden with NO_DEMANGLE option in setup_target_for_coverage_lcovr().)
# - Delete output dir, .info file on 'make clean'
# - Remove Python detection, since version mismatches will break gcovr
# - Minor cleanup (lowercase function names, update examples...)
#
# 2019-12-19, FeRD (Frank Dana)
# - Rename Lcov outputs, make filtered file canonical, fix cleanup for targets
#
# 2020-01-19, Bob Apthorpe
# - Added gfortran support
#
# 2020-02-17, FeRD (Frank Dana)
# - Make all add_custom_target()s VERBATIM to auto-escape wildcard characters
# in EXCLUDEs, and remove manual escaping from gcovr targets
#
# 2021-01-19, Robin Mueller
# - Add CODE_COVERAGE_VERBOSE option which will allow to print out commands which are run
# - Added the option for users to set the GCOVR_ADDITIONAL_ARGS variable to supply additional
# flags to the gcovr command
#
# 2020-05-04, Mihchael Davis
# - Add -fprofile-abs-path to make gcno files contain absolute paths
# - Fix BASE_DIRECTORY not working when defined
# - Change BYPRODUCT from folder to index.html to stop ninja from complaining about double defines
#
# 2021-05-10, Martin Stump
# - Check if the generator is multi-config before warning about non-Debug builds
#
# 2022-02-22, Marko Wehle
# - Change gcovr output from -o <filename> for --xml <filename> and --html <filename> output respectively.
# This will allow for Multiple Output Formats at the same time by making use of GCOVR_ADDITIONAL_ARGS, e.g. GCOVR_ADDITIONAL_ARGS "--txt".
#
# 2022-09-28, Sebastian Mueller
# - fix append_coverage_compiler_flags_to_target to correctly add flags
# - replace "-fprofile-arcs -ftest-coverage" with "--coverage" (equivalent)
#
# USAGE:
#
# 1. Copy this file into your cmake modules path.
#
# 2. Add the following line to your CMakeLists.txt (best inside an if-condition
# using a CMake option() to enable it just optionally):
# include(CodeCoverage)
#
# 3. Append necessary compiler flags for all supported source files:
# append_coverage_compiler_flags()
# Or for specific target:
# append_coverage_compiler_flags_to_target(YOUR_TARGET_NAME)
#
# 3.a (OPTIONAL) Set appropriate optimization flags, e.g. -O0, -O1 or -Og
#
# 4. If you need to exclude additional directories from the report, specify them
# using full paths in the COVERAGE_EXCLUDES variable before calling
# setup_target_for_coverage_*().
# Example:
# set(COVERAGE_EXCLUDES
# '${PROJECT_SOURCE_DIR}/src/dir1/*'
# '/path/to/my/src/dir2/*')
# Or, use the EXCLUDE argument to setup_target_for_coverage_*().
# Example:
# setup_target_for_coverage_lcov(
# NAME coverage
# EXECUTABLE testrunner
# EXCLUDE "${PROJECT_SOURCE_DIR}/src/dir1/*" "/path/to/my/src/dir2/*")
#
# 4.a NOTE: With CMake 3.4+, COVERAGE_EXCLUDES or EXCLUDE can also be set
# relative to the BASE_DIRECTORY (default: PROJECT_SOURCE_DIR)
# Example:
# set(COVERAGE_EXCLUDES "dir1/*")
# setup_target_for_coverage_gcovr_html(
# NAME coverage
# EXECUTABLE testrunner
# BASE_DIRECTORY "${PROJECT_SOURCE_DIR}/src"
# EXCLUDE "dir2/*")
#
# 5. Use the functions described below to create a custom make target which
# runs your test executable and produces a code coverage report.
#
# 6. Build a Debug build:
# cmake -DCMAKE_BUILD_TYPE=Debug ..
# make
# make my_coverage_target
#
include(CMakeParseArguments)
option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE)
# Check prereqs
find_program( GCOV_PATH gcov )
find_program( LCOV_PATH NAMES lcov lcov.bat lcov.exe lcov.perl)
find_program( FASTCOV_PATH NAMES fastcov fastcov.py )
find_program( GENHTML_PATH NAMES genhtml genhtml.perl genhtml.bat )
find_program( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/scripts/test)
find_program( CPPFILT_PATH NAMES c++filt )
if(NOT GCOV_PATH)
message(FATAL_ERROR "gcov not found! Aborting...")
endif() # NOT GCOV_PATH
# Check supported compiler (Clang, GNU and Flang)
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
foreach(LANG ${LANGUAGES})
if("${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang")
if("${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS 3)
message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...")
endif()
elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
endif()
endforeach()
set(COVERAGE_COMPILER_FLAGS "-g --coverage"
CACHE INTERNAL "")
if(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Clang)")
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag(-fprofile-abs-path HAVE_fprofile_abs_path)
if(HAVE_fprofile_abs_path)
set(COVERAGE_COMPILER_FLAGS "${COVERAGE_COMPILER_FLAGS} -fprofile-abs-path")
endif()
endif()
set(CMAKE_Fortran_FLAGS_COVERAGE
${COVERAGE_COMPILER_FLAGS}
CACHE STRING "Flags used by the Fortran compiler during coverage builds."
FORCE )
set(CMAKE_CXX_FLAGS_COVERAGE
${COVERAGE_COMPILER_FLAGS}
CACHE STRING "Flags used by the C++ compiler during coverage builds."
FORCE )
set(CMAKE_C_FLAGS_COVERAGE
${COVERAGE_COMPILER_FLAGS}
CACHE STRING "Flags used by the C compiler during coverage builds."
FORCE )
set(CMAKE_EXE_LINKER_FLAGS_COVERAGE
""
CACHE STRING "Flags used for linking binaries during coverage builds."
FORCE )
set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE
""
CACHE STRING "Flags used by the shared libraries linker during coverage builds."
FORCE )
mark_as_advanced(
CMAKE_Fortran_FLAGS_COVERAGE
CMAKE_CXX_FLAGS_COVERAGE
CMAKE_C_FLAGS_COVERAGE
CMAKE_EXE_LINKER_FLAGS_COVERAGE
CMAKE_SHARED_LINKER_FLAGS_COVERAGE )
get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG))
message(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading")
endif() # NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG)
if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
link_libraries(gcov)
endif()
# Defines a target for running and collection code coverage information
# Builds dependencies, runs the given executable and outputs reports.
# NOTE! The executable should always have a ZERO as exit code otherwise
# the coverage generation will not complete.
#
# setup_target_for_coverage_lcov(
# NAME testrunner_coverage # New target name
# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
# DEPENDENCIES testrunner # Dependencies to build first
# BASE_DIRECTORY "../" # Base directory for report
# # (defaults to PROJECT_SOURCE_DIR)
# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative
# # to BASE_DIRECTORY, with CMake 3.4+)
# NO_DEMANGLE # Don't demangle C++ symbols
# # even if c++filt is found
# )
function(setup_target_for_coverage_lcov)
set(options NO_DEMANGLE SONARQUBE)
set(oneValueArgs BASE_DIRECTORY NAME)
set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES LCOV_ARGS GENHTML_ARGS)
cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(NOT LCOV_PATH)
message(FATAL_ERROR "lcov not found! Aborting...")
endif() # NOT LCOV_PATH
if(NOT GENHTML_PATH)
message(FATAL_ERROR "genhtml not found! Aborting...")
endif() # NOT GENHTML_PATH
# Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
if(DEFINED Coverage_BASE_DIRECTORY)
get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
else()
set(BASEDIR ${PROJECT_SOURCE_DIR})
endif()
# Collect excludes (CMake 3.4+: Also compute absolute paths)
set(LCOV_EXCLUDES "")
foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_LCOV_EXCLUDES})
if(CMAKE_VERSION VERSION_GREATER 3.4)
get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR})
endif()
list(APPEND LCOV_EXCLUDES "${EXCLUDE}")
endforeach()
list(REMOVE_DUPLICATES LCOV_EXCLUDES)
# Conditional arguments
if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE})
set(GENHTML_EXTRA_ARGS "--demangle-cpp")
endif()
# Setting up commands which will be run to generate coverage data.
# Cleanup lcov
set(LCOV_CLEAN_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -directory .
-b ${BASEDIR} --zerocounters
)
# Create baseline to make sure untouched files show up in the report
set(LCOV_BASELINE_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -c -i -d . -b
${BASEDIR} -o ${Coverage_NAME}.base
)
# Run tests
set(LCOV_EXEC_TESTS_CMD
${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}
)
# Capturing lcov counters and generating report
set(LCOV_CAPTURE_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --directory . -b
${BASEDIR} --capture --output-file ${Coverage_NAME}.capture
)
# add baseline counters
set(LCOV_BASELINE_COUNT_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -a ${Coverage_NAME}.base
-a ${Coverage_NAME}.capture --output-file ${Coverage_NAME}.total
)
# filter collected data to final coverage report
set(LCOV_FILTER_CMD
${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --remove
${Coverage_NAME}.total ${LCOV_EXCLUDES} --output-file ${Coverage_NAME}.info
)
# Generate HTML output
set(LCOV_GEN_HTML_CMD
${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} -o
${Coverage_NAME} ${Coverage_NAME}.info
)
if(${Coverage_SONARQUBE})
# Generate SonarQube output
set(GCOVR_XML_CMD
${GCOVR_PATH} --sonarqube ${Coverage_NAME}_sonarqube.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS}
${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR}
)
set(GCOVR_XML_CMD_COMMAND
COMMAND ${GCOVR_XML_CMD}
)
set(GCOVR_XML_CMD_BYPRODUCTS ${Coverage_NAME}_sonarqube.xml)
set(GCOVR_XML_CMD_COMMENT COMMENT "SonarQube code coverage info report saved in ${Coverage_NAME}_sonarqube.xml.")
endif()
if(CODE_COVERAGE_VERBOSE)
message(STATUS "Executed command report")
message(STATUS "Command to clean up lcov: ")
string(REPLACE ";" " " LCOV_CLEAN_CMD_SPACED "${LCOV_CLEAN_CMD}")
message(STATUS "${LCOV_CLEAN_CMD_SPACED}")
message(STATUS "Command to create baseline: ")
string(REPLACE ";" " " LCOV_BASELINE_CMD_SPACED "${LCOV_BASELINE_CMD}")
message(STATUS "${LCOV_BASELINE_CMD_SPACED}")
message(STATUS "Command to run the tests: ")
string(REPLACE ";" " " LCOV_EXEC_TESTS_CMD_SPACED "${LCOV_EXEC_TESTS_CMD}")
message(STATUS "${LCOV_EXEC_TESTS_CMD_SPACED}")
message(STATUS "Command to capture counters and generate report: ")
string(REPLACE ";" " " LCOV_CAPTURE_CMD_SPACED "${LCOV_CAPTURE_CMD}")
message(STATUS "${LCOV_CAPTURE_CMD_SPACED}")
message(STATUS "Command to add baseline counters: ")
string(REPLACE ";" " " LCOV_BASELINE_COUNT_CMD_SPACED "${LCOV_BASELINE_COUNT_CMD}")
message(STATUS "${LCOV_BASELINE_COUNT_CMD_SPACED}")
message(STATUS "Command to filter collected data: ")
string(REPLACE ";" " " LCOV_FILTER_CMD_SPACED "${LCOV_FILTER_CMD}")
message(STATUS "${LCOV_FILTER_CMD_SPACED}")
message(STATUS "Command to generate lcov HTML output: ")
string(REPLACE ";" " " LCOV_GEN_HTML_CMD_SPACED "${LCOV_GEN_HTML_CMD}")
message(STATUS "${LCOV_GEN_HTML_CMD_SPACED}")
if(${Coverage_SONARQUBE})
message(STATUS "Command to generate SonarQube XML output: ")
string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}")
message(STATUS "${GCOVR_XML_CMD_SPACED}")
endif()
endif()
# Setup target
add_custom_target(${Coverage_NAME}
COMMAND ${LCOV_CLEAN_CMD}
COMMAND ${LCOV_BASELINE_CMD}
COMMAND ${LCOV_EXEC_TESTS_CMD}
COMMAND ${LCOV_CAPTURE_CMD}
COMMAND ${LCOV_BASELINE_COUNT_CMD}
COMMAND ${LCOV_FILTER_CMD}
COMMAND ${LCOV_GEN_HTML_CMD}
${GCOVR_XML_CMD_COMMAND}
# Set output files as GENERATED (will be removed on 'make clean')
BYPRODUCTS
${Coverage_NAME}.base
${Coverage_NAME}.capture
${Coverage_NAME}.total
${Coverage_NAME}.info
${GCOVR_XML_CMD_BYPRODUCTS}
${Coverage_NAME}/index.html
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS ${Coverage_DEPENDENCIES}
VERBATIM # Protect arguments to commands
COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report."
)
# Show where to find the lcov info report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ;
COMMENT "Lcov code coverage info report saved in ${Coverage_NAME}.info."
${GCOVR_XML_CMD_COMMENT}
)
# Show info where to find the report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ;
COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report."
)
endfunction() # setup_target_for_coverage_lcov
# Defines a target for running and collection code coverage information
# Builds dependencies, runs the given executable and outputs reports.
# NOTE! The executable should always have a ZERO as exit code otherwise
# the coverage generation will not complete.
#
# setup_target_for_coverage_gcovr_xml(
# NAME ctest_coverage # New target name
# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
# DEPENDENCIES executable_target # Dependencies to build first
# BASE_DIRECTORY "../" # Base directory for report
# # (defaults to PROJECT_SOURCE_DIR)
# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative
# # to BASE_DIRECTORY, with CMake 3.4+)
# )
# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the
# GCVOR command.
function(setup_target_for_coverage_gcovr_xml)
set(options NONE)
set(oneValueArgs BASE_DIRECTORY NAME)
set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES)
cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(NOT GCOVR_PATH)
message(FATAL_ERROR "gcovr not found! Aborting...")
endif() # NOT GCOVR_PATH
# Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
if(DEFINED Coverage_BASE_DIRECTORY)
get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
else()
set(BASEDIR ${PROJECT_SOURCE_DIR})
endif()
# Collect excludes (CMake 3.4+: Also compute absolute paths)
set(GCOVR_EXCLUDES "")
foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES})
if(CMAKE_VERSION VERSION_GREATER 3.4)
get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR})
endif()
list(APPEND GCOVR_EXCLUDES "${EXCLUDE}")
endforeach()
list(REMOVE_DUPLICATES GCOVR_EXCLUDES)
# Combine excludes to several -e arguments
set(GCOVR_EXCLUDE_ARGS "")
foreach(EXCLUDE ${GCOVR_EXCLUDES})
list(APPEND GCOVR_EXCLUDE_ARGS "-e")
list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}")
endforeach()
# Set up commands which will be run to generate coverage data
# Run tests
set(GCOVR_XML_EXEC_TESTS_CMD
${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}
)
# Running gcovr
set(GCOVR_XML_CMD
${GCOVR_PATH} --xml ${Coverage_NAME}.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS}
${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR}
)
if(CODE_COVERAGE_VERBOSE)
message(STATUS "Executed command report")
message(STATUS "Command to run tests: ")
string(REPLACE ";" " " GCOVR_XML_EXEC_TESTS_CMD_SPACED "${GCOVR_XML_EXEC_TESTS_CMD}")
message(STATUS "${GCOVR_XML_EXEC_TESTS_CMD_SPACED}")
message(STATUS "Command to generate gcovr XML coverage data: ")
string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}")
message(STATUS "${GCOVR_XML_CMD_SPACED}")
endif()
add_custom_target(${Coverage_NAME}
COMMAND ${GCOVR_XML_EXEC_TESTS_CMD}
COMMAND ${GCOVR_XML_CMD}
BYPRODUCTS ${Coverage_NAME}.xml
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS ${Coverage_DEPENDENCIES}
VERBATIM # Protect arguments to commands
COMMENT "Running gcovr to produce Cobertura code coverage report."
)
# Show info where to find the report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ;
COMMENT "Cobertura code coverage report saved in ${Coverage_NAME}.xml."
)
endfunction() # setup_target_for_coverage_gcovr_xml
# Defines a target for running and collection code coverage information
# Builds dependencies, runs the given executable and outputs reports.
# NOTE! The executable should always have a ZERO as exit code otherwise
# the coverage generation will not complete.
#
# setup_target_for_coverage_gcovr_html(
# NAME ctest_coverage # New target name
# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
# DEPENDENCIES executable_target # Dependencies to build first
# BASE_DIRECTORY "../" # Base directory for report
# # (defaults to PROJECT_SOURCE_DIR)
# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative
# # to BASE_DIRECTORY, with CMake 3.4+)
# )
# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the
# GCVOR command.
function(setup_target_for_coverage_gcovr_html)
set(options NONE)
set(oneValueArgs BASE_DIRECTORY NAME)
set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES)
cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(NOT GCOVR_PATH)
message(FATAL_ERROR "gcovr not found! Aborting...")
endif() # NOT GCOVR_PATH
# Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
if(DEFINED Coverage_BASE_DIRECTORY)
get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
else()
set(BASEDIR ${PROJECT_SOURCE_DIR})
endif()
# Collect excludes (CMake 3.4+: Also compute absolute paths)
set(GCOVR_EXCLUDES "")
foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES})
if(CMAKE_VERSION VERSION_GREATER 3.4)
get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR})
endif()
list(APPEND GCOVR_EXCLUDES "${EXCLUDE}")
endforeach()
list(REMOVE_DUPLICATES GCOVR_EXCLUDES)
# Combine excludes to several -e arguments
set(GCOVR_EXCLUDE_ARGS "")
foreach(EXCLUDE ${GCOVR_EXCLUDES})
list(APPEND GCOVR_EXCLUDE_ARGS "-e")
list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}")
endforeach()
# Set up commands which will be run to generate coverage data
# Run tests
set(GCOVR_HTML_EXEC_TESTS_CMD
${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}
)
# Create folder
set(GCOVR_HTML_FOLDER_CMD
${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/${Coverage_NAME}
)
# Running gcovr
set(GCOVR_HTML_CMD
${GCOVR_PATH} --html ${Coverage_NAME}/index.html --html-details -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS}
${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR}
)
if(CODE_COVERAGE_VERBOSE)
message(STATUS "Executed command report")
message(STATUS "Command to run tests: ")
string(REPLACE ";" " " GCOVR_HTML_EXEC_TESTS_CMD_SPACED "${GCOVR_HTML_EXEC_TESTS_CMD}")
message(STATUS "${GCOVR_HTML_EXEC_TESTS_CMD_SPACED}")
message(STATUS "Command to create a folder: ")
string(REPLACE ";" " " GCOVR_HTML_FOLDER_CMD_SPACED "${GCOVR_HTML_FOLDER_CMD}")
message(STATUS "${GCOVR_HTML_FOLDER_CMD_SPACED}")
message(STATUS "Command to generate gcovr HTML coverage data: ")
string(REPLACE ";" " " GCOVR_HTML_CMD_SPACED "${GCOVR_HTML_CMD}")
message(STATUS "${GCOVR_HTML_CMD_SPACED}")
endif()
add_custom_target(${Coverage_NAME}
COMMAND ${GCOVR_HTML_EXEC_TESTS_CMD}
COMMAND ${GCOVR_HTML_FOLDER_CMD}
COMMAND ${GCOVR_HTML_CMD}
BYPRODUCTS ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html # report directory
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS ${Coverage_DEPENDENCIES}
VERBATIM # Protect arguments to commands
COMMENT "Running gcovr to produce HTML code coverage report."
)
# Show info where to find the report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ;
COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report."
)
endfunction() # setup_target_for_coverage_gcovr_html
# Defines a target for running and collection code coverage information
# Builds dependencies, runs the given executable and outputs reports.
# NOTE! The executable should always have a ZERO as exit code otherwise
# the coverage generation will not complete.
#
# setup_target_for_coverage_fastcov(
# NAME testrunner_coverage # New target name
# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
# DEPENDENCIES testrunner # Dependencies to build first
# BASE_DIRECTORY "../" # Base directory for report
# # (defaults to PROJECT_SOURCE_DIR)
# EXCLUDE "src/dir1/" "src/dir2/" # Patterns to exclude.
# NO_DEMANGLE # Don't demangle C++ symbols
# # even if c++filt is found
# SKIP_HTML # Don't create html report
# POST_CMD perl -i -pe s!${PROJECT_SOURCE_DIR}/!!g ctest_coverage.json # E.g. for stripping source dir from file paths
# )
function(setup_target_for_coverage_fastcov)
set(options NO_DEMANGLE SKIP_HTML)
set(oneValueArgs BASE_DIRECTORY NAME)
set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES FASTCOV_ARGS GENHTML_ARGS POST_CMD)
cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(NOT FASTCOV_PATH)
message(FATAL_ERROR "fastcov not found! Aborting...")
endif()
if(NOT Coverage_SKIP_HTML AND NOT GENHTML_PATH)
message(FATAL_ERROR "genhtml not found! Aborting...")
endif()
# Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
if(Coverage_BASE_DIRECTORY)
get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
else()
set(BASEDIR ${PROJECT_SOURCE_DIR})
endif()
# Collect excludes (Patterns, not paths, for fastcov)
set(FASTCOV_EXCLUDES "")
foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_FASTCOV_EXCLUDES})
list(APPEND FASTCOV_EXCLUDES "${EXCLUDE}")
endforeach()
list(REMOVE_DUPLICATES FASTCOV_EXCLUDES)
# Conditional arguments
if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE})
set(GENHTML_EXTRA_ARGS "--demangle-cpp")
endif()
# Set up commands which will be run to generate coverage data
set(FASTCOV_EXEC_TESTS_CMD ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS})
set(FASTCOV_CAPTURE_CMD ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH}
--search-directory ${BASEDIR}
--process-gcno
--output ${Coverage_NAME}.json
--exclude ${FASTCOV_EXCLUDES}
)
set(FASTCOV_CONVERT_CMD ${FASTCOV_PATH}
-C ${Coverage_NAME}.json --lcov --output ${Coverage_NAME}.info
)
if(Coverage_SKIP_HTML)
set(FASTCOV_HTML_CMD ";")
else()
set(FASTCOV_HTML_CMD ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS}
-o ${Coverage_NAME} ${Coverage_NAME}.info
)
endif()
set(FASTCOV_POST_CMD ";")
if(Coverage_POST_CMD)
set(FASTCOV_POST_CMD ${Coverage_POST_CMD})
endif()
if(CODE_COVERAGE_VERBOSE)
message(STATUS "Code coverage commands for target ${Coverage_NAME} (fastcov):")
message(" Running tests:")
string(REPLACE ";" " " FASTCOV_EXEC_TESTS_CMD_SPACED "${FASTCOV_EXEC_TESTS_CMD}")
message(" ${FASTCOV_EXEC_TESTS_CMD_SPACED}")
message(" Capturing fastcov counters and generating report:")
string(REPLACE ";" " " FASTCOV_CAPTURE_CMD_SPACED "${FASTCOV_CAPTURE_CMD}")
message(" ${FASTCOV_CAPTURE_CMD_SPACED}")
message(" Converting fastcov .json to lcov .info:")
string(REPLACE ";" " " FASTCOV_CONVERT_CMD_SPACED "${FASTCOV_CONVERT_CMD}")
message(" ${FASTCOV_CONVERT_CMD_SPACED}")
if(NOT Coverage_SKIP_HTML)
message(" Generating HTML report: ")
string(REPLACE ";" " " FASTCOV_HTML_CMD_SPACED "${FASTCOV_HTML_CMD}")
message(" ${FASTCOV_HTML_CMD_SPACED}")
endif()
if(Coverage_POST_CMD)
message(" Running post command: ")
string(REPLACE ";" " " FASTCOV_POST_CMD_SPACED "${FASTCOV_POST_CMD}")
message(" ${FASTCOV_POST_CMD_SPACED}")
endif()
endif()
# Setup target
add_custom_target(${Coverage_NAME}
# Cleanup fastcov
COMMAND ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH}
--search-directory ${BASEDIR}
--zerocounters
COMMAND ${FASTCOV_EXEC_TESTS_CMD}
COMMAND ${FASTCOV_CAPTURE_CMD}
COMMAND ${FASTCOV_CONVERT_CMD}
COMMAND ${FASTCOV_HTML_CMD}
COMMAND ${FASTCOV_POST_CMD}
# Set output files as GENERATED (will be removed on 'make clean')
BYPRODUCTS
${Coverage_NAME}.info
${Coverage_NAME}.json
${Coverage_NAME}/index.html # report directory
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS ${Coverage_DEPENDENCIES}
VERBATIM # Protect arguments to commands
COMMENT "Resetting code coverage counters to zero. Processing code coverage counters and generating report."
)
set(INFO_MSG "fastcov code coverage info report saved in ${Coverage_NAME}.info and ${Coverage_NAME}.json.")
if(NOT Coverage_SKIP_HTML)
string(APPEND INFO_MSG " Open ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html in your browser to view the coverage report.")
endif()
# Show where to find the fastcov info report
add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E echo ${INFO_MSG}
)
endfunction() # setup_target_for_coverage_fastcov
function(append_coverage_compiler_flags)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE)
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE)
message(STATUS "Appending code coverage compiler flags: ${COVERAGE_COMPILER_FLAGS}")
endfunction() # append_coverage_compiler_flags
# Setup coverage for specific library
function(append_coverage_compiler_flags_to_target name)
separate_arguments(_flag_list NATIVE_COMMAND "${COVERAGE_COMPILER_FLAGS}")
target_compile_options(${name} PRIVATE ${_flag_list})
if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
target_link_libraries(${name} PRIVATE gcov)
endif()
endfunction()

View File

@@ -1,22 +0,0 @@
if(ENABLE_CLANG_TIDY)
find_program(CLANG_TIDY_COMMAND NAMES clang-tidy)
if(NOT CLANG_TIDY_COMMAND)
message(WARNING "🔴 CMake_RUN_CLANG_TIDY is ON but clang-tidy is not found!")
set(CMAKE_CXX_CLANG_TIDY "" CACHE STRING "" FORCE)
else()
message(STATUS "🟢 CMake_RUN_CLANG_TIDY is ON")
set(CLANGTIDY_EXTRA_ARGS
"-extra-arg=-Wno-unknown-warning-option"
)
set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND};-p=${CMAKE_BINARY_DIR};${CLANGTIDY_EXTRA_ARGS}" CACHE STRING "" FORCE)
add_custom_target(clang-tidy
COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target ${CMAKE_PROJECT_NAME}
COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target clang-tidy
COMMENT "Running clang-tidy..."
)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
endif()
endif(ENABLE_CLANG_TIDY)

82
conanfile.py Normal file
View File

@@ -0,0 +1,82 @@
import os, re, pathlib
from conan import ConanFile
from conan.tools.cmake import CMakeToolchain, CMake, cmake_layout, CMakeDeps
from conan.tools.files import copy
class PlatformConan(ConanFile):
name = "pyclassifiers"
version = "1.0.3"
# Binary configuration
settings = "os", "compiler", "build_type", "arch"
options = {
"enable_testing": [True, False],
}
default_options = {
"enable_testing": False,
}
# Sources are located in the same place as this recipe, copy them to the recipe
exports_sources = "CMakeLists.txt", "pyclfs/*", "tests/*", "LICENSE"
def set_version(self) -> None:
cmake = pathlib.Path(self.recipe_folder) / "CMakeLists.txt"
text = cmake.read_text(encoding="utf-8")
match = re.search(
r"""project\s*\([^\)]*VERSION\s+([0-9]+\.[0-9]+\.[0-9]+)""",
text, re.IGNORECASE | re.VERBOSE
)
if match:
self.version = match.group(1)
else:
raise Exception("Version not found in CMakeLists.txt")
self.version = match.group(1)
def requirements(self):
self.requires("libtorch/2.7.0")
self.requires("nlohmann_json/3.11.3")
self.requires("folding/1.1.1")
self.requires("fimdlp/2.1.0")
self.requires("bayesnet/1.2.0")
def build_requirements(self):
self.tool_requires("cmake/[>=3.30]")
self.test_requires("catch2/3.8.1")
self.test_requires("arff-files/1.2.0")
def layout(self):
cmake_layout(self)
def generate(self):
deps = CMakeDeps(self)
deps.generate()
tc = CMakeToolchain(self)
tc.generate()
def build(self):
cmake = CMake(self)
cmake.configure()
cmake.build()
if self.options.enable_testing:
# Run tests only if we're building with testing enabled
self.run("ctest --output-on-failure", cwd=self.build_folder)
def package(self):
copy(self, "LICENSE", src=self.source_folder, dst=os.path.join(self.package_folder, "licenses"))
cmake = CMake(self)
cmake.install()
def package_info(self):
self.cpp_info.libs = ["pyclassifiers"]
self.cpp_info.includedirs = ["include"]
self.cpp_info.set_property("cmake_find_mode", "both")
self.cpp_info.set_property("cmake_target_name", "pyclassifiers::pyclassifiers")
# Add compiler flags that might be needed
if self.settings.os == "Linux":
self.cpp_info.system_libs = ["pthread"]

View File

@@ -1,168 +0,0 @@
#include "ArffFiles.h"
#include <fstream>
#include <sstream>
#include <map>
#include <iostream>
ArffFiles::ArffFiles() = default;
std::vector<std::string> ArffFiles::getLines() const
{
return lines;
}
unsigned long int ArffFiles::getSize() const
{
return lines.size();
}
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
{
return attributes;
}
std::string ArffFiles::getClassName() const
{
return className;
}
std::string ArffFiles::getClassType() const
{
return classType;
}
std::vector<std::vector<float>>& ArffFiles::getX()
{
return X;
}
std::vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::loadCommon(std::string fileName)
{
std::ifstream file(fileName);
if (!file.is_open()) {
throw std::invalid_argument("Unable to open file");
}
std::string line;
std::string keyword;
std::string attribute;
std::string type;
std::string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
std::stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
type += type_w + " ";
attributes.emplace_back(trim(attribute), trim(type));
continue;
}
if (line[0] == '@') {
continue;
}
lines.push_back(line);
}
file.close();
if (attributes.empty())
throw std::invalid_argument("No attributes found");
}
void ArffFiles::load(const std::string& fileName, bool classLast)
{
int labelIndex;
loadCommon(fileName);
if (classLast) {
className = std::get<0>(attributes.back());
classType = std::get<1>(attributes.back());
attributes.pop_back();
labelIndex = static_cast<int>(attributes.size());
} else {
className = std::get<0>(attributes.front());
classType = std::get<1>(attributes.front());
attributes.erase(attributes.begin());
labelIndex = 0;
}
generateDataset(labelIndex);
}
void ArffFiles::load(const std::string& fileName, const std::string& name)
{
int labelIndex;
loadCommon(fileName);
bool found = false;
for (int i = 0; i < attributes.size(); ++i) {
if (attributes[i].first == name) {
className = std::get<0>(attributes[i]);
classType = std::get<1>(attributes[i]);
attributes.erase(attributes.begin() + i);
labelIndex = i;
found = true;
break;
}
}
if (!found) {
throw std::invalid_argument("Class name not found");
}
generateDataset(labelIndex);
}
void ArffFiles::generateDataset(int labelIndex)
{
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
auto yy = std::vector<std::string>(lines.size(), "");
auto removeLines = std::vector<int>(); // Lines with missing values
for (size_t i = 0; i < lines.size(); i++) {
std::stringstream ss(lines[i]);
std::string value;
int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) {
if (pos++ == labelIndex) {
yy[i] = value;
} else {
if (value == "?") {
X[xIndex++][i] = -1;
removeLines.push_back(i);
} else
X[xIndex++][i] = stof(value);
}
}
}
for (auto i : removeLines) {
yy.erase(yy.begin() + i);
for (auto& x : X) {
x.erase(x.begin() + i);
}
}
y = factorize(yy);
}
std::string ArffFiles::trim(const std::string& source)
{
std::string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s;
}
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
{
std::vector<int> yy;
yy.reserve(labels_t.size());
std::map<std::string, int> labelMap;
int i = 0;
for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}
yy.push_back(labelMap[label]);
}
return yy;
}

View File

@@ -1,32 +0,0 @@
#ifndef ARFFFILES_H
#define ARFFFILES_H
#include <string>
#include <vector>
class ArffFiles {
private:
std::vector<std::string> lines;
std::vector<std::pair<std::string, std::string>> attributes;
std::string className;
std::string classType;
std::vector<std::vector<float>> X;
std::vector<int> y;
void generateDataset(int);
void loadCommon(std::string);
public:
ArffFiles();
void load(const std::string&, bool = true);
void load(const std::string&, const std::string&);
std::vector<std::string> getLines() const;
unsigned long int getSize() const;
std::string getClassName() const;
std::string getClassType() const;
static std::string trim(const std::string&);
std::vector<std::vector<float>>& getX();
std::vector<int>& getY();
std::vector<std::pair<std::string, std::string>> getAttributes() const;
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
};
#endif

View File

@@ -1 +0,0 @@
add_library(ArffFiles ArffFiles.cc)

Submodule lib/catch2 deleted from 506276c592

Submodule lib/json deleted from 378e091795

Submodule lib/mdlp deleted from 7d62d6af4a

20
pyclfs/AdaBoostPy.cc Normal file
View File

@@ -0,0 +1,20 @@
#include "AdaBoostPy.h"
namespace pywrap {
AdaBoostPy::AdaBoostPy() : PyClassifier("sklearn.ensemble", "AdaBoostClassifier", true)
{
validHyperparameters = { "n_estimators", "n_jobs", "random_state" };
}
int AdaBoostPy::getNumberOfEdges() const
{
return callMethodSumOfItems("get_n_leaves");
}
int AdaBoostPy::getNumberOfStates() const
{
return callMethodSumOfItems("get_depth");
}
int AdaBoostPy::getNumberOfNodes() const
{
return callMethodSumOfItems("node_count");
}
} /* namespace pywrap */

15
pyclfs/AdaBoostPy.h Normal file
View File

@@ -0,0 +1,15 @@
#ifndef ADABOOSTPY_H
#define ADABOOSTPY_H
#include "PyClassifier.h"
namespace pywrap {
class AdaBoostPy : public PyClassifier {
public:
AdaBoostPy();
~AdaBoostPy() = default;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
int getNumberOfNodes() const override;
};
} /* namespace pywrap */
#endif /* ADABOOST_H */

View File

@@ -1,8 +1,10 @@
include_directories(
${Python3_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS}
${PyClassifiers_SOURCE_DIR}/lib/json/include
${Bayesnet_INCLUDE_DIRS}
)
add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc PyClassifier.cc PyWrap.cc)
target_link_libraries(PyClassifiers ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy)
add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc AdaBoostPy.cc PyClassifier.cc PyWrap.cc)
target_link_libraries(PyClassifiers PRIVATE
nlohmann_json::nlohmann_json torch::torch
Boost::boost Boost::python Boost::numpy
bayesnet::bayesnet
)

View File

@@ -93,11 +93,19 @@ namespace pywrap {
PyErr_Print();
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
}
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
if (xgboost) {
long* data = reinterpret_cast<long*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
} else {
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
}
}
torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
{
@@ -118,11 +126,19 @@ namespace pywrap {
PyErr_Print();
throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
}
double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
if (xgboost) {
float* data = reinterpret_cast<float*>(prediction.get_data());
std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
} else {
double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
}
}
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{
@@ -135,4 +151,4 @@ namespace pywrap {
{
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */
} /* namespace pywrap */

View File

@@ -49,6 +49,7 @@ namespace pywrap {
nlohmann::json hyperparameters;
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
std::vector<std::string> notes;
bool xgboost = false;
private:
PyWrap* pyWrap;
std::string module;

View File

@@ -5,5 +5,6 @@ namespace pywrap {
XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
{
validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
xgboost = true;
}
} /* namespace pywrap */

View File

@@ -2,15 +2,16 @@ if(ENABLE_TESTING)
set(TEST_PYCLASSIFIERS "unit_tests_pyclassifiers")
include_directories(
${PyClassifiers_SOURCE_DIR}
${PyClassifiers_SOURCE_DIR}/lib/Files
${PyClassifiers_SOURCE_DIR}/lib/mdlp
${PyClassifiers_SOURCE_DIR}/lib/json/include
${Python3_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS}
/usr/local/include
${CMAKE_BINARY_DIR}/configured_files/include
)
file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc")
set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES})
add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS})
target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles mdlp Catch2::Catch2WithMain)
endif(ENABLE_TESTING)
target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC
torch::torch ${Python3_LIBRARIES} ${LIBTORCH_PYTHON}
Boost::boost Boost::python Boost::numpy fimdlp::fimdlp
Catch2::Catch2WithMain nlohmann_json::nlohmann_json
bayesnet::bayesnet
)
endif(ENABLE_TESTING)

View File

@@ -10,14 +10,16 @@
#include "pyclfs/SVC.h"
#include "pyclfs/RandomForest.h"
#include "pyclfs/XGBoost.h"
#include "pyclfs/AdaBoostPy.h"
#include "pyclfs/ODTE.h"
#include "TestUtils.h"
#include <iostream>
TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{
map <pair<std::string, std::string>, float> scores = {
// Diabetes
{{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.854166687}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
{{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.856770813f}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
// Ecoli
{{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0},
// Glass
@@ -33,10 +35,10 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{"RandomForest", new pywrap::RandomForest()}
};
map<std::string, std::string> versions = {
{"ODTE", "1.0.0"},
{"STree", "1.3.2"},
{"SVC", "1.5.1"},
{"RandomForest", "1.5.1"}
{"ODTE", "1.0.0-1"},
{"STree", "1.4.0"},
{"SVC", "1.5.2"},
{"RandomForest", "1.5.2"}
};
auto clf = models[name];
@@ -58,6 +60,15 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
REQUIRE(clf->getVersion() == versions[name]);
}
}
TEST_CASE("AdaBoostClassifier", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
auto clf = pywrap::AdaBoostPy();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
clf.setHyperparameters(nlohmann::json::parse("{ \"n_estimators\": 100 }"));
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.9599999f).epsilon(raw.epsilon));
}
TEST_CASE("Classifiers features", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
@@ -116,23 +127,30 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
clf.setHyperparameters(hyperparameters);
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
std::cout << "XGBoost score: " << score << std::endl;
}
// TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
// {
// auto raw = RawDatasets("iris", true);
// auto clf = pywrap::XGBoost();
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// // nlohmann::json hyperparameters = { "n_jobs=1" };
// // clf.setHyperparameters(hyperparameters);
// auto predict = clf.predict(raw.Xt);
// for (int row = 0; row < predict.size(0); row++) {
// auto sum = 0.0;
// for (int col = 0; col < predict.size(1); col++) {
// std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
// sum += predict[row][col].item<int>();
// }
// std::cout << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
// }
// std::cout << predict << std::endl;
// }
TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", true);
auto clf = pywrap::XGBoost();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// nlohmann::json hyperparameters = { "n_jobs=1" };
// clf.setHyperparameters(hyperparameters);
auto predict_proba = clf.predict_proba(raw.Xt);
auto predict = clf.predict(raw.Xt);
// std::cout << "Predict proba: " << predict_proba << std::endl;
// std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
// assert(predict.size(0) == predict_proba.size(0));
for (int row = 0; row < predict_proba.size(0); row++) {
// auto sum = 0.0;
// std::cout << "Row " << std::setw(3) << row << ": ";
// for (int col = 0; col < predict_proba.size(1); col++) {
// std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
// sum += predict_proba[row][col].item<double>();
// }
// std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
}
}

View File

@@ -1,11 +1,11 @@
#include "TestUtils.h"
#include "bayesnet/config.h"
#include "SourceData.h"
class Paths {
public:
static std::string datasets()
{
return { data_path.begin(), data_path.end() };
return pywrap::SourceData("Test").getPath();
}
};

View File

@@ -5,8 +5,8 @@
#include <vector>
#include <map>
#include <tuple>
#include "ArffFiles.h"
#include "CPPFImdlp.h"
#include "ArffFiles.hpp"
#include "fimdlp/CPPFImdlp.h"
bool file_exists(const std::string& name);
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);
@@ -40,4 +40,4 @@ public:
double epsilon = 1e-5;
};
#endif //TEST_UTILS_H
#endif //TEST_UTILS_H

View File

@@ -0,0 +1,38 @@
#ifndef SOURCEDATA_H
#define SOURCEDATA_H
namespace pywrap {
enum fileType_t { CSV, ARFF, RDATA };
class SourceData {
public:
SourceData(std::string source)
{
if (source == "Surcov") {
path = "datasets/";
fileType = CSV;
} else if (source == "Arff") {
path = "datasets/";
fileType = ARFF;
} else if (source == "Tanveer") {
path = "data/";
fileType = RDATA;
} else if (source == "Test") {
path = "@TEST_DATA_PATH@/";
fileType = ARFF;
} else {
throw std::invalid_argument("Unknown source.");
}
}
std::string getPath()
{
return path;
}
fileType_t getFileType()
{
return fileType;
}
private:
std::string path;
fileType_t fileType;
};
}
#endif