Begin adding conan dependency manager

This commit is contained in:
2025-06-28 01:27:22 +02:00
parent e068bf0a54
commit 059fd33b4e
8 changed files with 183 additions and 22 deletions

77
CLAUDE.md Normal file
View File

@@ -0,0 +1,77 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Overview
This is a C++ implementation of the MDLP (Minimum Description Length Principle) discretization algorithm based on Fayyad & Irani's paper. The library provides discretization methods for continuous-valued attributes in classification learning.
## Build System
The project uses CMake with a Makefile wrapper for common tasks:
### Common Commands
- `make build` - Build release version with sample program
- `make test` - Run full test suite with coverage report
- `make install` - Install the library
### Build Configurations
- **Release**: Built in `build_release/` directory
- **Debug**: Built in `build_debug/` directory (for testing)
### Dependencies
- PyTorch (libtorch) - Required dependency
- GoogleTest - Fetched automatically for testing
- Coverage tools: lcov, genhtml
## Code Architecture
### Core Components
1. **Discretizer** (`src/Discretizer.h/cpp`) - Abstract base class for all discretizers
2. **CPPFImdlp** (`src/CPPFImdlp.h/cpp`) - Main MDLP algorithm implementation
3. **BinDisc** (`src/BinDisc.h/cpp`) - K-bins discretization (quantile/uniform strategies)
4. **Metrics** (`src/Metrics.h/cpp`) - Entropy and information gain calculations
### Key Data Types
- `samples_t` - Input data samples
- `labels_t` - Classification labels
- `indices_t` - Index arrays for sorting/processing
- `precision_t` - Floating-point precision type
### Algorithm Flow
1. Data is sorted using labels as tie-breakers for identical values
2. MDLP recursively finds optimal cut points using entropy-based criteria
3. Cut points are validated to ensure meaningful splits
4. Transform method maps continuous values to discrete bins
## Testing
Tests are built with GoogleTest and include:
- `Metrics_unittest` - Entropy/information gain tests
- `FImdlp_unittest` - Core MDLP algorithm tests
- `BinDisc_unittest` - K-bins discretization tests
- `Discretizer_unittest` - Base class functionality tests
### Running Tests
```bash
make test # Runs all tests and generates coverage report
cd build_debug/tests && ctest # Run tests directly
```
Coverage reports are generated at `build_debug/tests/coverage/index.html`.
## Sample Usage
The sample program demonstrates basic usage:
```bash
build_release/sample/sample -f iris -m 2
```
## Development Notes
- The library uses PyTorch tensors for efficient numerical operations
- Code follows C++17 standards
- Coverage is maintained at 100%
- The implementation handles edge cases like duplicate values and small intervals
- Conan package manager support is available via `conanfile.py`

View File

@@ -9,7 +9,7 @@ project(fimdlp
set(CMAKE_CXX_STANDARD 17)
cmake_policy(SET CMP0135 NEW)
find_package(Torch CONFIG REQUIRED)
find_package(Torch REQUIRED)
# Options
# -------
@@ -41,13 +41,12 @@ if (ENABLE_SAMPLE)
endif()
include_directories(
${TORCH_INCLUDE_DIRS}
${fimdlp_SOURCE_DIR}/src
${CMAKE_BINARY_DIR}/configured_files/include
)
add_library(fimdlp src/CPPFImdlp.cpp src/Metrics.cpp src/BinDisc.cpp src/Discretizer.cpp)
target_link_libraries(fimdlp "${TORCH_LIBRARIES}")
target_link_libraries(fimdlp torch::torch)
# Installation
# ------------

9
CMakeUserPresets.json Normal file
View File

@@ -0,0 +1,9 @@
{
"version": 4,
"vendor": {
"conan": {}
},
"include": [
"build/Release/generators/CMakePresets.json"
]
}

View File

@@ -1,5 +1,8 @@
import re
from conan import ConanFile, CMake
import os
from conan import ConanFile
from conan.tools.cmake import CMake, CMakeToolchain, cmake_layout, CMakeDeps
from conan.tools.files import save, load
class FimdlpConan(ConanFile):
name = "fimdlp"
@@ -10,25 +13,29 @@ class FimdlpConan(ConanFile):
description = "Discretization algorithm based on the paper by Fayyad & Irani."
topics = ("discretization", "classification", "machine learning")
settings = "os", "compiler", "build_type", "arch"
generators = "cmake"
exports_sources = "src/*", "CMakeLists.txt", "README.md"
exports_sources = "src/*", "CMakeLists.txt", "README.md", "config/*", "fimdlpConfig.cmake.in"
def init(self):
def set_version(self):
# Read the CMakeLists.txt file to get the version
# This is a simple example; you might want to use a more robust method
# to parse the CMakeLists.txt file.
# For example, you could use a regex to extract the version number.
with open("CMakeLists.txt", "r") as f:
lines = f.readlines()
for line in lines:
if "VERSION" in line:
# Extract the version number using regex
match = re.search(r"VERSION\s+(\d+\.\d+\.\d+)", line)
if match:
self.version = match.group(1)
try:
content = load(self, "CMakeLists.txt")
match = re.search(r"VERSION\s+(\d+\.\d+\.\d+)", content)
if match:
self.version = match.group(1)
except Exception:
self.version = "2.0.1" # fallback version
def requirements(self):
self.requires("libtorch/2.7.0") # Adjust version as necessary
self.requires("libtorch/2.7.0")
def layout(self):
cmake_layout(self)
def generate(self):
deps = CMakeDeps(self)
deps.generate()
tc = CMakeToolchain(self)
tc.generate()
def build(self):
cmake = CMake(self)
@@ -36,10 +43,13 @@ class FimdlpConan(ConanFile):
cmake.build()
def package(self):
# self.copy("*.h", dst="include", src="src/include")
# self.copy("*fimdlp*", dst="lib", keep_path=False)
cmake = CMake(self)
cmake.install()
def package_info(self):
self.cpp_info.libs = ["fimdlp"]
self.cpp_info.libs = ["fimdlp"]
self.cpp_info.includedirs = ["include"]
self.cpp_info.libdirs = ["lib"]
self.cpp_info.set_property("cmake_find_mode", "both")
self.cpp_info.set_property("cmake_target_name", "fimdlp::fimdlp")
self.cpp_info.set_property("cmake_file_name", "fimdlp")

View File

@@ -0,0 +1,9 @@
cmake_minimum_required(VERSION 3.20)
project(test_fimdlp)
set(CMAKE_CXX_STANDARD 17)
find_package(fimdlp REQUIRED)
add_executable(test_fimdlp test_fimdlp.cpp)
target_link_libraries(test_fimdlp fimdlp::fimdlp)

View File

@@ -0,0 +1,9 @@
{
"version": 4,
"vendor": {
"conan": {}
},
"include": [
"build/Release/generators/CMakePresets.json"
]
}

View File

@@ -0,0 +1,9 @@
[requires]
fimdlp/2.0.1
[generators]
CMakeDeps
CMakeToolchain
[layout]
cmake_layout

View File

@@ -0,0 +1,39 @@
#include <iostream>
#include <vector>
#include <fimdlp/CPPFImdlp.h>
#include <fimdlp/BinDisc.h>
int main() {
std::cout << "Testing FIMDLP package..." << std::endl;
// Test data - simple continuous values with binary classification
mdlp::samples_t data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
mdlp::labels_t labels = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1};
std::cout << "Created test data with " << data.size() << " samples" << std::endl;
// Test MDLP discretizer
mdlp::CPPFImdlp discretizer;
discretizer.fit(data, labels);
auto cut_points = discretizer.getCutPoints();
std::cout << "MDLP found " << cut_points.size() << " cut points" << std::endl;
for (size_t i = 0; i < cut_points.size(); ++i) {
std::cout << "Cut point " << i << ": " << cut_points[i] << std::endl;
}
// Test BinDisc discretizer
mdlp::BinDisc bin_discretizer(3, mdlp::strategy_t::UNIFORM); // 3 bins, uniform strategy
bin_discretizer.fit(data, labels);
auto bin_cut_points = bin_discretizer.getCutPoints();
std::cout << "BinDisc found " << bin_cut_points.size() << " cut points" << std::endl;
for (size_t i = 0; i < bin_cut_points.size(); ++i) {
std::cout << "Bin cut point " << i << ": " << bin_cut_points[i] << std::endl;
}
std::cout << "FIMDLP package test completed successfully!" << std::endl;
return 0;
}