From f1dae498ac9f011ae370ac0beaf9a2a9564a9262 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sat, 28 Jun 2025 18:41:33 +0200 Subject: [PATCH] Fix tests --- CHANGELOG.md | 36 +++++++++++++++++++++++++++++++-- CMakeUserPresets.json | 3 ++- Makefile | 3 ++- conanfile.py | 22 ++++++++++---------- src/BinDisc.cpp | 8 ++------ src/CPPFImdlp.cpp | 9 +++++---- tests/BinDisc_unittest.cpp | 14 +++---------- tests/Discretizer_unittest.cpp | 5 ++--- tests/FImdlp_unittest.cpp | 37 +++++++++++++++++----------------- 9 files changed, 80 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96d7957..7d2075f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,44 +5,53 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [2.1.0] - 2025-06-28 ### Added + - Conan dependency manager support - Technical analysis report ### Changed + - Updated README.md - Refactored library version and installation system - Updated config variable names ### Fixed + - Removed unneeded semicolon ## [2.0.1] - 2024-07-22 ### Added + - CMake install target and make install command - Flag to control sample building in Makefile ### Changed + - Library name changed to `fimdlp` - Updated version numbers across test files ### Fixed + - Version number consistency in tests ## [2.0.0] - 2024-07-04 ### Added + - Makefile with build & test actions for easier development - PyTorch (libtorch) integration for tensor operations ### Changed + - Major refactoring of build system - Updated build workflows and CI configuration ### Fixed + - BinDisc quantile calculation errors (#9) - Error in percentile method calculation - Integer type issues in calculations @@ -51,19 +60,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.2.1] - 2024-06-08 ### Added + - PyTorch tensor methods for discretization - Improved library build system ### Changed + - Refactored sample build process ### Fixed + - Library creation and linking issues - Multiple GitHub Actions workflow fixes ## [1.2.0] - 2024-06-05 ### Added + - **Discretizer** - Abstract base class for all discretization algorithms (#8) - **BinDisc** - K-bins discretization with quantile and uniform strategies (#7) - Transform method to discretize values using existing cut points @@ -71,11 +84,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Docker development container configuration ### Changed + - Refactored system types throughout the library - Improved sample program with better dataset handling - Enhanced build system with debug options ### Fixed + - Transform method initialization issues - ARFF file attribute name extraction - Sample program library binary separation @@ -83,17 +98,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.1.3] - 2024-06-05 ### Added + - `max_cutpoints` hyperparameter for controlling algorithm complexity - `max_depth` and `min_length` as configurable hyperparameters - Enhanced sample program with hyperparameter support - Additional datasets for testing ### Changed + - Improved constructor design and parameter handling - Enhanced test coverage and reporting - Refactored build system configuration ### Fixed + - Depth initialization in fit method - Code quality improvements and smell fixes - Exception handling in value cut point calculations @@ -101,29 +119,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.1.2] - 2023-04-01 ### Added + - Comprehensive test suite with GitHub Actions CI - SonarCloud integration for code quality analysis - Enhanced build system with automated testing ### Changed + - Improved GitHub Actions workflow configuration - Updated project structure for better maintainability ### Fixed + - Build system configuration issues - Test execution and coverage reporting ## [1.1.1] - 2023-02-22 ### Added + - Limits header for proper compilation - Enhanced build system support ### Changed + - Updated version numbering system - Improved SonarCloud configuration ### Fixed + - ValueCutPoint exception handling (removed unnecessary exception) - Build system compatibility issues - GitHub Actions token configuration @@ -131,17 +155,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.1.0] - 2023-02-21 ### Added + - Classic algorithm implementation for performance comparison - Enhanced ValueCutPoint logic with same_values detection - Glass dataset support in sample program - Debug configuration for development ### Changed + - Refactored ValueCutPoint algorithm for better accuracy - Improved candidate selection logic - Enhanced sample program with multiple datasets ### Fixed + - Sign error in valueCutPoint calculation - Final cut value computation - Duplicate dataset handling in sample @@ -149,6 +176,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.0.0.0] - 2022-12-21 ### Added + - Initial release of MDLP (Minimum Description Length Principle) discretization library - Core CPPFImdlp algorithm implementation based on Fayyad & Irani's paper - Entropy and information gain calculation methods @@ -158,6 +186,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ARFF file format support for datasets ### Features + - Recursive discretization using entropy-based criteria - Stable sorting with tie-breaking for identical values - Configurable algorithm parameters @@ -168,15 +197,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Release Notes ### Version 2.x + - **Breaking Changes**: Library renamed to `fimdlp` - **Major Enhancement**: PyTorch integration for improved performance - **New Features**: Comprehensive discretization framework with multiple algorithms ### Version 1.x + - **Core Algorithm**: MDLP discretization implementation - **Extensibility**: Hyperparameter support and algorithm variants - **Quality**: Comprehensive testing and CI/CD pipeline ### Version 1.0.x + - **Foundation**: Initial stable implementation -- **Algorithm**: Core MDLP discretization functionality \ No newline at end of file +- **Algorithm**: Core MDLP discretization functionality diff --git a/CMakeUserPresets.json b/CMakeUserPresets.json index 7671426..da9a347 100644 --- a/CMakeUserPresets.json +++ b/CMakeUserPresets.json @@ -5,6 +5,7 @@ }, "include": [ "build_release/build/Release/generators/CMakePresets.json", - "build_debug/build/Debug/generators/CMakePresets.json" + "build_debug/build/Debug/generators/CMakePresets.json", + "build/Release/generators/CMakePresets.json" ] } \ No newline at end of file diff --git a/Makefile b/Makefile index 00d3774..8cf166d 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ build: ## Build the project for Release @echo ">>> Building the project for Release..." @if [ -d $(f_release) ]; then rm -fr $(f_release); fi @conan install . --build=missing -of $(f_release) -s build_type=Release --profile:build=default --profile:host=default - cmake -S . -B $(f_release) -DCMAKE_TOOLCHAIN_FILE=$(f_release)/build/Release/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF -DENABLE_SAMPLE=ON + cmake -S . -B $(f_release) -DCMAKE_TOOLCHAIN_FILE=$(f_release)/build/Release/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF -DENABLE_SAMPLE=OFF @cmake --build $(f_release) -j 8 install: ## Install the project @@ -23,6 +23,7 @@ test: ## Build Debug version and run tests @conan install . --build=missing -of $(f_debug) -s build_type=Debug @cmake -B $(f_debug) -S . -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$(f_debug)/build/Debug/generators/conan_toolchain.cmake -DENABLE_TESTING=ON -DENABLE_SAMPLE=ON @cmake --build $(f_debug) -j 8 + @cp -r tests/datasets $(f_debug)/tests/datasets @cd $(f_debug)/tests && ctest --output-on-failure -j 8 @cd $(f_debug)/tests && $(lcov) --capture --directory ../ --demangle-cpp --ignore-errors source,source --ignore-errors mismatch --output-file coverage.info >/dev/null 2>&1; \ $(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \ diff --git a/conanfile.py b/conanfile.py index df9a1a3..174bbe5 100644 --- a/conanfile.py +++ b/conanfile.py @@ -1,7 +1,8 @@ +import os +import re from conan import ConanFile from conan.tools.cmake import CMakeToolchain, CMake, cmake_layout, CMakeDeps -from conan.tools.files import copy -import os +from conan.tools.files import load, copy class FimdlpConan(ConanFile): @@ -31,15 +32,14 @@ class FimdlpConan(ConanFile): # Sources are located in the same place as this recipe, copy them to the recipe exports_sources = "CMakeLists.txt", "src/*", "sample/*", "tests/*", "config/*", "fimdlpConfig.cmake.in" - def set_version(self): - # Read the CMakeLists.txt file to get the version - try: - content = load(self, "CMakeLists.txt") - match = re.search(r"VERSION\s+(\d+\.\d+\.\d+)", content) - if match: - self.version = match.group(1) - except Exception: - self.version = "0.0.1" # fallback version + def init(self): + content = load(self, "CMakeLists.txt") + version_pattern = re.compile(r'project\s*\([^\)]*VERSION\s+([0-9]+\.[0-9]+\.[0-9]+)', re.IGNORECASE | re.DOTALL) + match = version_pattern.search(content) + if match: + self.version = match.group(1) + else: + raise Exception("Version not found in CMakeLists.txt") def config_options(self): if self.settings.os == "Windows": diff --git a/src/BinDisc.cpp b/src/BinDisc.cpp index edc13a2..096fddf 100644 --- a/src/BinDisc.cpp +++ b/src/BinDisc.cpp @@ -41,12 +41,8 @@ namespace mdlp { } void BinDisc::fit(samples_t& X, labels_t& y) { - // Input validation for supervised interface - if (X.size() != y.size()) { - throw std::invalid_argument("X and y must have the same size"); - } - if (X.empty() || y.empty()) { - throw std::invalid_argument("X and y cannot be empty"); + if (X.empty()) { + throw std::invalid_argument("X cannot be empty"); } // BinDisc is inherently unsupervised, but we validate inputs for consistency diff --git a/src/CPPFImdlp.cpp b/src/CPPFImdlp.cpp index 910fe43..a073d35 100644 --- a/src/CPPFImdlp.cpp +++ b/src/CPPFImdlp.cpp @@ -29,7 +29,7 @@ namespace mdlp { if (proposed < 0.0f) { throw std::invalid_argument("proposed_cuts must be non-negative"); } - + direction = bound_dir_t::RIGHT; } @@ -39,7 +39,7 @@ namespace mdlp { if (proposed_cuts == 0) { return numeric_limits::max(); } - if (proposed_cuts < 0 || proposed_cuts > static_cast(X.size())) { + if (proposed_cuts > static_cast(X.size())) { throw invalid_argument("wrong proposed num_cuts value"); } if (proposed_cuts < 1) @@ -56,7 +56,7 @@ namespace mdlp { discretizedData.clear(); cutPoints.clear(); if (X.size() != y.size()) { - throw invalid_argument("X and y must have the same size"); + throw std::invalid_argument("X and y must have the same size: " + std::to_string(X.size()) + " != " + std::to_string(y.size())); } if (X.empty() || y.empty()) { throw invalid_argument("X and y must have at least one element"); @@ -105,9 +105,10 @@ namespace mdlp { // # of duplicates before cutpoint n = safe_subtract(safe_subtract(cut, 1), idxPrev); // # of duplicates after cutpoint - m = safe_subtract(safe_subtract(idxNext, cut), 1); + m = idxNext - cut - 1; // Decide which values to use if (backWall) { + m = int(idxNext - cut - 1) < 0 ? 0 : m; // Ensure m right cut = cut + m + 1; } else { cut = safe_subtract(cut, n); diff --git a/tests/BinDisc_unittest.cpp b/tests/BinDisc_unittest.cpp index 5ba1451..0102a45 100644 --- a/tests/BinDisc_unittest.cpp +++ b/tests/BinDisc_unittest.cpp @@ -153,20 +153,12 @@ namespace mdlp { TEST_F(TestBinDisc3U, EmptyUniform) { samples_t X = {}; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(2, cuts.size()); - EXPECT_NEAR(0, cuts.at(0), margin); - EXPECT_NEAR(0, cuts.at(1), margin); + EXPECT_THROW(fit(X), std::invalid_argument); } TEST_F(TestBinDisc3Q, EmptyQuantile) { samples_t X = {}; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(2, cuts.size()); - EXPECT_NEAR(0, cuts.at(0), margin); - EXPECT_NEAR(0, cuts.at(1), margin); + EXPECT_THROW(fit(X), std::invalid_argument); } TEST(TestBinDisc3, ExceptionNumberBins) { @@ -406,6 +398,6 @@ namespace mdlp { EXPECT_NEAR(exp.cutpoints_.at(i), cuts.at(i), margin); } } - std::cout << "* Number of experiments tested: " << num << std::endl; + // std::cout << "* Number of experiments tested: " << num << std::endl; } } diff --git a/tests/Discretizer_unittest.cpp b/tests/Discretizer_unittest.cpp index 3bc8a5f..a0ed153 100644 --- a/tests/Discretizer_unittest.cpp +++ b/tests/Discretizer_unittest.cpp @@ -17,13 +17,13 @@ namespace mdlp { const float margin = 1e-4; static std::string set_data_path() { - std::string path = "datasets/"; + std::string path = "tests/datasets/"; std::ifstream file(path + "iris.arff"); if (file.is_open()) { file.close(); return path; } - return "tests/datasets/"; + return "datasets/"; } const std::string data_path = set_data_path(); const labels_t iris_quantile = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; @@ -32,7 +32,6 @@ namespace mdlp { Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM); auto version = disc->version(); delete disc; - std::cout << "Version computed: " << version; EXPECT_EQ("2.1.0", version); } TEST(Discretizer, BinIrisUniform) diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp index ecc10bd..9dacd53 100644 --- a/tests/FImdlp_unittest.cpp +++ b/tests/FImdlp_unittest.cpp @@ -64,7 +64,7 @@ namespace mdlp { { EXPECT_EQ(computed.size(), expected.size()); for (unsigned long i = 0; i < computed.size(); i++) { - cout << "(" << computed[i] << ", " << expected[i] << ") "; + // cout << "(" << computed[i] << ", " << expected[i] << ") "; EXPECT_NEAR(computed[i], expected[i], precision); } } @@ -76,7 +76,7 @@ namespace mdlp { X = X_; y = y_; indices = sortIndices(X, y); - cout << "* " << title << endl; + // cout << "* " << title << endl; result = valueCutPoint(0, cut, 10); EXPECT_NEAR(result.first, midPoint, precision); EXPECT_EQ(result.second, limit); @@ -95,9 +95,9 @@ namespace mdlp { test.fit(X[feature], y); EXPECT_EQ(test.get_depth(), depths[feature]); auto computed = test.getCutPoints(); - cout << "Feature " << feature << ": "; + // cout << "Feature " << feature << ": "; checkCutPoints(computed, expected[feature]); - cout << endl; + // cout << endl; } } }; @@ -113,17 +113,16 @@ namespace mdlp { { X = { 1, 2, 3 }; y = { 1, 2 }; - EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size"); + EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size: " + std::to_string(X.size()) + " != " + std::to_string(y.size())); } - TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth) + TEST_F(TestFImdlp, FitErrorMinLength) { - auto testLength = CPPFImdlp(2, 10, 0); - auto testDepth = CPPFImdlp(3, 0, 0); - X = { 1, 2, 3 }; - y = { 1, 2, 3 }; - EXPECT_THROW_WITH_MESSAGE(testLength.fit(X, y), invalid_argument, "min_length must be greater than 2"); - EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0"); + EXPECT_THROW_WITH_MESSAGE(CPPFImdlp(2, 10, 0), invalid_argument, "min_length must be greater than 2"); + } + TEST_F(TestFImdlp, FitErrorMaxDepth) + { + EXPECT_THROW_WITH_MESSAGE(CPPFImdlp(3, 0, 0), invalid_argument, "max_depth must be greater than 0"); } TEST_F(TestFImdlp, JoinFit) @@ -137,14 +136,16 @@ namespace mdlp { checkCutPoints(computed, expected); } + TEST_F(TestFImdlp, FitErrorMinCutPoints) + { + EXPECT_THROW_WITH_MESSAGE(CPPFImdlp(3, 10, -1), invalid_argument, "proposed_cuts must be non-negative"); + } TEST_F(TestFImdlp, FitErrorMaxCutPoints) { - auto testmin = CPPFImdlp(2, 10, -1); - auto testmax = CPPFImdlp(3, 0, 200); - X = { 1, 2, 3 }; - y = { 1, 2, 3 }; - EXPECT_THROW_WITH_MESSAGE(testmin.fit(X, y), invalid_argument, "wrong proposed num_cuts value"); - EXPECT_THROW_WITH_MESSAGE(testmax.fit(X, y), invalid_argument, "wrong proposed num_cuts value"); + auto test = CPPFImdlp(3, 1, 8); + samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 }; + labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 }; + EXPECT_THROW_WITH_MESSAGE(test.fit(X_, y_), invalid_argument, "wrong proposed num_cuts value"); } TEST_F(TestFImdlp, SortIndices)