mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 07:25:56 +00:00
Begin adding conan dependency manager
This commit is contained in:
77
CLAUDE.md
Normal file
77
CLAUDE.md
Normal file
@@ -0,0 +1,77 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
This is a C++ implementation of the MDLP (Minimum Description Length Principle) discretization algorithm based on Fayyad & Irani's paper. The library provides discretization methods for continuous-valued attributes in classification learning.
|
||||
|
||||
## Build System
|
||||
|
||||
The project uses CMake with a Makefile wrapper for common tasks:
|
||||
|
||||
### Common Commands
|
||||
- `make build` - Build release version with sample program
|
||||
- `make test` - Run full test suite with coverage report
|
||||
- `make install` - Install the library
|
||||
|
||||
### Build Configurations
|
||||
- **Release**: Built in `build_release/` directory
|
||||
- **Debug**: Built in `build_debug/` directory (for testing)
|
||||
|
||||
### Dependencies
|
||||
- PyTorch (libtorch) - Required dependency
|
||||
- GoogleTest - Fetched automatically for testing
|
||||
- Coverage tools: lcov, genhtml
|
||||
|
||||
## Code Architecture
|
||||
|
||||
### Core Components
|
||||
|
||||
1. **Discretizer** (`src/Discretizer.h/cpp`) - Abstract base class for all discretizers
|
||||
2. **CPPFImdlp** (`src/CPPFImdlp.h/cpp`) - Main MDLP algorithm implementation
|
||||
3. **BinDisc** (`src/BinDisc.h/cpp`) - K-bins discretization (quantile/uniform strategies)
|
||||
4. **Metrics** (`src/Metrics.h/cpp`) - Entropy and information gain calculations
|
||||
|
||||
### Key Data Types
|
||||
- `samples_t` - Input data samples
|
||||
- `labels_t` - Classification labels
|
||||
- `indices_t` - Index arrays for sorting/processing
|
||||
- `precision_t` - Floating-point precision type
|
||||
|
||||
### Algorithm Flow
|
||||
1. Data is sorted using labels as tie-breakers for identical values
|
||||
2. MDLP recursively finds optimal cut points using entropy-based criteria
|
||||
3. Cut points are validated to ensure meaningful splits
|
||||
4. Transform method maps continuous values to discrete bins
|
||||
|
||||
## Testing
|
||||
|
||||
Tests are built with GoogleTest and include:
|
||||
- `Metrics_unittest` - Entropy/information gain tests
|
||||
- `FImdlp_unittest` - Core MDLP algorithm tests
|
||||
- `BinDisc_unittest` - K-bins discretization tests
|
||||
- `Discretizer_unittest` - Base class functionality tests
|
||||
|
||||
### Running Tests
|
||||
```bash
|
||||
make test # Runs all tests and generates coverage report
|
||||
cd build_debug/tests && ctest # Run tests directly
|
||||
```
|
||||
|
||||
Coverage reports are generated at `build_debug/tests/coverage/index.html`.
|
||||
|
||||
## Sample Usage
|
||||
|
||||
The sample program demonstrates basic usage:
|
||||
```bash
|
||||
build_release/sample/sample -f iris -m 2
|
||||
```
|
||||
|
||||
## Development Notes
|
||||
|
||||
- The library uses PyTorch tensors for efficient numerical operations
|
||||
- Code follows C++17 standards
|
||||
- Coverage is maintained at 100%
|
||||
- The implementation handles edge cases like duplicate values and small intervals
|
||||
- Conan package manager support is available via `conanfile.py`
|
@@ -9,7 +9,7 @@ project(fimdlp
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
|
||||
find_package(Torch CONFIG REQUIRED)
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
# Options
|
||||
# -------
|
||||
@@ -41,13 +41,12 @@ if (ENABLE_SAMPLE)
|
||||
endif()
|
||||
|
||||
include_directories(
|
||||
${TORCH_INCLUDE_DIRS}
|
||||
${fimdlp_SOURCE_DIR}/src
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
|
||||
add_library(fimdlp src/CPPFImdlp.cpp src/Metrics.cpp src/BinDisc.cpp src/Discretizer.cpp)
|
||||
target_link_libraries(fimdlp "${TORCH_LIBRARIES}")
|
||||
target_link_libraries(fimdlp torch::torch)
|
||||
|
||||
# Installation
|
||||
# ------------
|
||||
|
9
CMakeUserPresets.json
Normal file
9
CMakeUserPresets.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"version": 4,
|
||||
"vendor": {
|
||||
"conan": {}
|
||||
},
|
||||
"include": [
|
||||
"build/Release/generators/CMakePresets.json"
|
||||
]
|
||||
}
|
48
conanfile.py
48
conanfile.py
@@ -1,5 +1,8 @@
|
||||
import re
|
||||
from conan import ConanFile, CMake
|
||||
import os
|
||||
from conan import ConanFile
|
||||
from conan.tools.cmake import CMake, CMakeToolchain, cmake_layout, CMakeDeps
|
||||
from conan.tools.files import save, load
|
||||
|
||||
class FimdlpConan(ConanFile):
|
||||
name = "fimdlp"
|
||||
@@ -10,25 +13,29 @@ class FimdlpConan(ConanFile):
|
||||
description = "Discretization algorithm based on the paper by Fayyad & Irani."
|
||||
topics = ("discretization", "classification", "machine learning")
|
||||
settings = "os", "compiler", "build_type", "arch"
|
||||
generators = "cmake"
|
||||
exports_sources = "src/*", "CMakeLists.txt", "README.md"
|
||||
exports_sources = "src/*", "CMakeLists.txt", "README.md", "config/*", "fimdlpConfig.cmake.in"
|
||||
|
||||
def init(self):
|
||||
def set_version(self):
|
||||
# Read the CMakeLists.txt file to get the version
|
||||
# This is a simple example; you might want to use a more robust method
|
||||
# to parse the CMakeLists.txt file.
|
||||
# For example, you could use a regex to extract the version number.
|
||||
with open("CMakeLists.txt", "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if "VERSION" in line:
|
||||
# Extract the version number using regex
|
||||
match = re.search(r"VERSION\s+(\d+\.\d+\.\d+)", line)
|
||||
if match:
|
||||
self.version = match.group(1)
|
||||
try:
|
||||
content = load(self, "CMakeLists.txt")
|
||||
match = re.search(r"VERSION\s+(\d+\.\d+\.\d+)", content)
|
||||
if match:
|
||||
self.version = match.group(1)
|
||||
except Exception:
|
||||
self.version = "2.0.1" # fallback version
|
||||
|
||||
def requirements(self):
|
||||
self.requires("libtorch/2.7.0") # Adjust version as necessary
|
||||
self.requires("libtorch/2.7.0")
|
||||
|
||||
def layout(self):
|
||||
cmake_layout(self)
|
||||
|
||||
def generate(self):
|
||||
deps = CMakeDeps(self)
|
||||
deps.generate()
|
||||
tc = CMakeToolchain(self)
|
||||
tc.generate()
|
||||
|
||||
def build(self):
|
||||
cmake = CMake(self)
|
||||
@@ -36,10 +43,13 @@ class FimdlpConan(ConanFile):
|
||||
cmake.build()
|
||||
|
||||
def package(self):
|
||||
# self.copy("*.h", dst="include", src="src/include")
|
||||
# self.copy("*fimdlp*", dst="lib", keep_path=False)
|
||||
cmake = CMake(self)
|
||||
cmake.install()
|
||||
|
||||
def package_info(self):
|
||||
self.cpp_info.libs = ["fimdlp"]
|
||||
self.cpp_info.libs = ["fimdlp"]
|
||||
self.cpp_info.includedirs = ["include"]
|
||||
self.cpp_info.libdirs = ["lib"]
|
||||
self.cpp_info.set_property("cmake_find_mode", "both")
|
||||
self.cpp_info.set_property("cmake_target_name", "fimdlp::fimdlp")
|
||||
self.cpp_info.set_property("cmake_file_name", "fimdlp")
|
9
test_consumer/CMakeLists.txt
Normal file
9
test_consumer/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(test_fimdlp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
find_package(fimdlp REQUIRED)
|
||||
|
||||
add_executable(test_fimdlp test_fimdlp.cpp)
|
||||
target_link_libraries(test_fimdlp fimdlp::fimdlp)
|
9
test_consumer/CMakeUserPresets.json
Normal file
9
test_consumer/CMakeUserPresets.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"version": 4,
|
||||
"vendor": {
|
||||
"conan": {}
|
||||
},
|
||||
"include": [
|
||||
"build/Release/generators/CMakePresets.json"
|
||||
]
|
||||
}
|
9
test_consumer/conanfile.txt
Normal file
9
test_consumer/conanfile.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
[requires]
|
||||
fimdlp/2.0.1
|
||||
|
||||
[generators]
|
||||
CMakeDeps
|
||||
CMakeToolchain
|
||||
|
||||
[layout]
|
||||
cmake_layout
|
39
test_consumer/test_fimdlp.cpp
Normal file
39
test_consumer/test_fimdlp.cpp
Normal file
@@ -0,0 +1,39 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
#include <fimdlp/BinDisc.h>
|
||||
|
||||
int main() {
|
||||
std::cout << "Testing FIMDLP package..." << std::endl;
|
||||
|
||||
// Test data - simple continuous values with binary classification
|
||||
mdlp::samples_t data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
|
||||
mdlp::labels_t labels = {0, 0, 0, 1, 1, 0, 1, 1, 1, 1};
|
||||
|
||||
std::cout << "Created test data with " << data.size() << " samples" << std::endl;
|
||||
|
||||
// Test MDLP discretizer
|
||||
mdlp::CPPFImdlp discretizer;
|
||||
discretizer.fit(data, labels);
|
||||
|
||||
auto cut_points = discretizer.getCutPoints();
|
||||
std::cout << "MDLP found " << cut_points.size() << " cut points" << std::endl;
|
||||
|
||||
for (size_t i = 0; i < cut_points.size(); ++i) {
|
||||
std::cout << "Cut point " << i << ": " << cut_points[i] << std::endl;
|
||||
}
|
||||
|
||||
// Test BinDisc discretizer
|
||||
mdlp::BinDisc bin_discretizer(3, mdlp::strategy_t::UNIFORM); // 3 bins, uniform strategy
|
||||
bin_discretizer.fit(data, labels);
|
||||
|
||||
auto bin_cut_points = bin_discretizer.getCutPoints();
|
||||
std::cout << "BinDisc found " << bin_cut_points.size() << " cut points" << std::endl;
|
||||
|
||||
for (size_t i = 0; i < bin_cut_points.size(); ++i) {
|
||||
std::cout << "Bin cut point " << i << ": " << bin_cut_points[i] << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "FIMDLP package test completed successfully!" << std::endl;
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user