From f1dae498ac9f011ae370ac0beaf9a2a9564a9262 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?=
 <rmontanana@gmail.com>
Date: Sat, 28 Jun 2025 18:41:33 +0200
Subject: [PATCH] Fix tests

---
 CHANGELOG.md                   | 36 +++++++++++++++++++++++++++++++--
 CMakeUserPresets.json          |  3 ++-
 Makefile                       |  3 ++-
 conanfile.py                   | 22 ++++++++++----------
 src/BinDisc.cpp                |  8 ++------
 src/CPPFImdlp.cpp              |  9 +++++----
 tests/BinDisc_unittest.cpp     | 14 +++----------
 tests/Discretizer_unittest.cpp |  5 ++---
 tests/FImdlp_unittest.cpp      | 37 +++++++++++++++++-----------------
 9 files changed, 80 insertions(+), 57 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96d7957..7d2075f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,44 +5,53 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## [2.1.0] - 2025-06-28
 
 ### Added
+
 - Conan dependency manager support
 - Technical analysis report
 
 ### Changed
+
 - Updated README.md
 - Refactored library version and installation system
 - Updated config variable names
 
 ### Fixed
+
 - Removed unneeded semicolon
 
 ## [2.0.1] - 2024-07-22
 
 ### Added
+
 - CMake install target and make install command
 - Flag to control sample building in Makefile
 
 ### Changed
+
 - Library name changed to `fimdlp`
 - Updated version numbers across test files
 
 ### Fixed
+
 - Version number consistency in tests
 
 ## [2.0.0] - 2024-07-04
 
 ### Added
+
 - Makefile with build & test actions for easier development
 - PyTorch (libtorch) integration for tensor operations
 
 ### Changed
+
 - Major refactoring of build system
 - Updated build workflows and CI configuration
 
 ### Fixed
+
 - BinDisc quantile calculation errors (#9)
 - Error in percentile method calculation
 - Integer type issues in calculations
@@ -51,19 +60,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [1.2.1] - 2024-06-08
 
 ### Added
+
 - PyTorch tensor methods for discretization
 - Improved library build system
 
 ### Changed
+
 - Refactored sample build process
 
 ### Fixed
+
 - Library creation and linking issues
 - Multiple GitHub Actions workflow fixes
 
 ## [1.2.0] - 2024-06-05
 
 ### Added
+
 - **Discretizer** - Abstract base class for all discretization algorithms (#8)
 - **BinDisc** - K-bins discretization with quantile and uniform strategies (#7)
 - Transform method to discretize values using existing cut points
@@ -71,11 +84,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Docker development container configuration
 
 ### Changed
+
 - Refactored system types throughout the library
 - Improved sample program with better dataset handling
 - Enhanced build system with debug options
 
 ### Fixed
+
 - Transform method initialization issues
 - ARFF file attribute name extraction
 - Sample program library binary separation
@@ -83,17 +98,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [1.1.3] - 2024-06-05
 
 ### Added
+
 - `max_cutpoints` hyperparameter for controlling algorithm complexity
 - `max_depth` and `min_length` as configurable hyperparameters
 - Enhanced sample program with hyperparameter support
 - Additional datasets for testing
 
 ### Changed
+
 - Improved constructor design and parameter handling
 - Enhanced test coverage and reporting
 - Refactored build system configuration
 
 ### Fixed
+
 - Depth initialization in fit method
 - Code quality improvements and smell fixes
 - Exception handling in value cut point calculations
@@ -101,29 +119,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [1.1.2] - 2023-04-01
 
 ### Added
+
 - Comprehensive test suite with GitHub Actions CI
 - SonarCloud integration for code quality analysis
 - Enhanced build system with automated testing
 
 ### Changed
+
 - Improved GitHub Actions workflow configuration
 - Updated project structure for better maintainability
 
 ### Fixed
+
 - Build system configuration issues
 - Test execution and coverage reporting
 
 ## [1.1.1] - 2023-02-22
 
 ### Added
+
 - Limits header for proper compilation
 - Enhanced build system support
 
 ### Changed
+
 - Updated version numbering system
 - Improved SonarCloud configuration
 
 ### Fixed
+
 - ValueCutPoint exception handling (removed unnecessary exception)
 - Build system compatibility issues
 - GitHub Actions token configuration
@@ -131,17 +155,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [1.1.0] - 2023-02-21
 
 ### Added
+
 - Classic algorithm implementation for performance comparison
 - Enhanced ValueCutPoint logic with same_values detection
 - Glass dataset support in sample program
 - Debug configuration for development
 
 ### Changed
+
 - Refactored ValueCutPoint algorithm for better accuracy
 - Improved candidate selection logic
 - Enhanced sample program with multiple datasets
 
 ### Fixed
+
 - Sign error in valueCutPoint calculation
 - Final cut value computation
 - Duplicate dataset handling in sample
@@ -149,6 +176,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [1.0.0.0] - 2022-12-21
 
 ### Added
+
 - Initial release of MDLP (Minimum Description Length Principle) discretization library
 - Core CPPFImdlp algorithm implementation based on Fayyad & Irani's paper
 - Entropy and information gain calculation methods
@@ -158,6 +186,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - ARFF file format support for datasets
 
 ### Features
+
 - Recursive discretization using entropy-based criteria
 - Stable sorting with tie-breaking for identical values
 - Configurable algorithm parameters
@@ -168,15 +197,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## Release Notes
 
 ### Version 2.x
+
 - **Breaking Changes**: Library renamed to `fimdlp`
 - **Major Enhancement**: PyTorch integration for improved performance
 - **New Features**: Comprehensive discretization framework with multiple algorithms
 
 ### Version 1.x
+
 - **Core Algorithm**: MDLP discretization implementation
 - **Extensibility**: Hyperparameter support and algorithm variants
 - **Quality**: Comprehensive testing and CI/CD pipeline
 
 ### Version 1.0.x
+
 - **Foundation**: Initial stable implementation
-- **Algorithm**: Core MDLP discretization functionality
\ No newline at end of file
+- **Algorithm**: Core MDLP discretization functionality
diff --git a/CMakeUserPresets.json b/CMakeUserPresets.json
index 7671426..da9a347 100644
--- a/CMakeUserPresets.json
+++ b/CMakeUserPresets.json
@@ -5,6 +5,7 @@
     },
     "include": [
         "build_release/build/Release/generators/CMakePresets.json",
-        "build_debug/build/Debug/generators/CMakePresets.json"
+        "build_debug/build/Debug/generators/CMakePresets.json",
+        "build/Release/generators/CMakePresets.json"
     ]
 }
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 00d3774..8cf166d 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ build: ## Build the project for Release
 	@echo ">>> Building the project for Release..."
 	@if [ -d $(f_release) ]; then rm -fr $(f_release); fi
 	@conan install . --build=missing -of $(f_release) -s build_type=Release --profile:build=default --profile:host=default
-	cmake -S . -B $(f_release) -DCMAKE_TOOLCHAIN_FILE=$(f_release)/build/Release/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF -DENABLE_SAMPLE=ON
+	cmake -S . -B $(f_release) -DCMAKE_TOOLCHAIN_FILE=$(f_release)/build/Release/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=OFF -DENABLE_SAMPLE=OFF
 	@cmake --build $(f_release) -j 8
 
 install: ## Install the project
@@ -23,6 +23,7 @@ test: ## Build Debug version and run tests
 	@conan install . --build=missing -of $(f_debug) -s build_type=Debug
 	@cmake -B $(f_debug) -S . -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$(f_debug)/build/Debug/generators/conan_toolchain.cmake -DENABLE_TESTING=ON -DENABLE_SAMPLE=ON
 	@cmake --build $(f_debug) -j 8
+	@cp -r tests/datasets $(f_debug)/tests/datasets
 	@cd $(f_debug)/tests && ctest --output-on-failure -j 8
 	@cd $(f_debug)/tests && $(lcov) --capture --directory ../ --demangle-cpp --ignore-errors source,source --ignore-errors mismatch --output-file coverage.info >/dev/null 2>&1; \
 	$(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
diff --git a/conanfile.py b/conanfile.py
index df9a1a3..174bbe5 100644
--- a/conanfile.py
+++ b/conanfile.py
@@ -1,7 +1,8 @@
+import os
+import re
 from conan import ConanFile
 from conan.tools.cmake import CMakeToolchain, CMake, cmake_layout, CMakeDeps
-from conan.tools.files import copy
-import os
+from conan.tools.files import load, copy
 
 
 class FimdlpConan(ConanFile):
@@ -31,15 +32,14 @@ class FimdlpConan(ConanFile):
     # Sources are located in the same place as this recipe, copy them to the recipe
     exports_sources = "CMakeLists.txt", "src/*", "sample/*", "tests/*", "config/*", "fimdlpConfig.cmake.in"
 
-    def set_version(self):
-        # Read the CMakeLists.txt file to get the version
-        try:
-            content = load(self, "CMakeLists.txt")
-            match = re.search(r"VERSION\s+(\d+\.\d+\.\d+)", content)
-            if match:
-                self.version = match.group(1)
-        except Exception:
-            self.version = "0.0.1"  # fallback version
+    def init(self):
+        content = load(self, "CMakeLists.txt")
+        version_pattern = re.compile(r'project\s*\([^\)]*VERSION\s+([0-9]+\.[0-9]+\.[0-9]+)', re.IGNORECASE | re.DOTALL)
+        match = version_pattern.search(content)
+        if match:
+            self.version = match.group(1)
+        else:
+            raise Exception("Version not found in CMakeLists.txt")
     
     def config_options(self):
         if self.settings.os == "Windows":
diff --git a/src/BinDisc.cpp b/src/BinDisc.cpp
index edc13a2..096fddf 100644
--- a/src/BinDisc.cpp
+++ b/src/BinDisc.cpp
@@ -41,12 +41,8 @@ namespace mdlp {
     }
     void BinDisc::fit(samples_t& X, labels_t& y)
     {
-        // Input validation for supervised interface
-        if (X.size() != y.size()) {
-            throw std::invalid_argument("X and y must have the same size");
-        }
-        if (X.empty() || y.empty()) {
-            throw std::invalid_argument("X and y cannot be empty");
+        if (X.empty()) {
+            throw std::invalid_argument("X cannot be empty");
         }
 
         // BinDisc is inherently unsupervised, but we validate inputs for consistency
diff --git a/src/CPPFImdlp.cpp b/src/CPPFImdlp.cpp
index 910fe43..a073d35 100644
--- a/src/CPPFImdlp.cpp
+++ b/src/CPPFImdlp.cpp
@@ -29,7 +29,7 @@ namespace mdlp {
         if (proposed < 0.0f) {
             throw std::invalid_argument("proposed_cuts must be non-negative");
         }
-        
+
         direction = bound_dir_t::RIGHT;
     }
 
@@ -39,7 +39,7 @@ namespace mdlp {
         if (proposed_cuts == 0) {
             return numeric_limits<size_t>::max();
         }
-        if (proposed_cuts < 0 || proposed_cuts > static_cast<precision_t>(X.size())) {
+        if (proposed_cuts > static_cast<precision_t>(X.size())) {
             throw invalid_argument("wrong proposed num_cuts value");
         }
         if (proposed_cuts < 1)
@@ -56,7 +56,7 @@ namespace mdlp {
         discretizedData.clear();
         cutPoints.clear();
         if (X.size() != y.size()) {
-            throw invalid_argument("X and y must have the same size");
+            throw std::invalid_argument("X and y must have the same size: " + std::to_string(X.size()) + " != " + std::to_string(y.size()));
         }
         if (X.empty() || y.empty()) {
             throw invalid_argument("X and y must have at least one element");
@@ -105,9 +105,10 @@ namespace mdlp {
         // # of duplicates before cutpoint
         n = safe_subtract(safe_subtract(cut, 1), idxPrev);
         // # of duplicates after cutpoint
-        m = safe_subtract(safe_subtract(idxNext, cut), 1);
+        m = idxNext - cut - 1;
         // Decide which values to use
         if (backWall) {
+            m = int(idxNext - cut - 1) < 0 ? 0 : m; // Ensure m right
             cut = cut + m + 1;
         } else {
             cut = safe_subtract(cut, n);
diff --git a/tests/BinDisc_unittest.cpp b/tests/BinDisc_unittest.cpp
index 5ba1451..0102a45 100644
--- a/tests/BinDisc_unittest.cpp
+++ b/tests/BinDisc_unittest.cpp
@@ -153,20 +153,12 @@ namespace mdlp {
     TEST_F(TestBinDisc3U, EmptyUniform)
     {
         samples_t X = {};
-        fit(X);
-        auto cuts = getCutPoints();
-        ASSERT_EQ(2, cuts.size());
-        EXPECT_NEAR(0, cuts.at(0), margin);
-        EXPECT_NEAR(0, cuts.at(1), margin);
+        EXPECT_THROW(fit(X), std::invalid_argument);
     }
     TEST_F(TestBinDisc3Q, EmptyQuantile)
     {
         samples_t X = {};
-        fit(X);
-        auto cuts = getCutPoints();
-        ASSERT_EQ(2, cuts.size());
-        EXPECT_NEAR(0, cuts.at(0), margin);
-        EXPECT_NEAR(0, cuts.at(1), margin);
+        EXPECT_THROW(fit(X), std::invalid_argument);
     }
     TEST(TestBinDisc3, ExceptionNumberBins)
     {
@@ -406,6 +398,6 @@ namespace mdlp {
                 EXPECT_NEAR(exp.cutpoints_.at(i), cuts.at(i), margin);
             }
         }
-        std::cout << "* Number of experiments tested: " << num << std::endl;
+        // std::cout << "* Number of experiments tested: " << num << std::endl;
     }
 }
diff --git a/tests/Discretizer_unittest.cpp b/tests/Discretizer_unittest.cpp
index 3bc8a5f..a0ed153 100644
--- a/tests/Discretizer_unittest.cpp
+++ b/tests/Discretizer_unittest.cpp
@@ -17,13 +17,13 @@ namespace mdlp {
     const float margin = 1e-4;
     static std::string set_data_path()
     {
-        std::string path = "datasets/";
+        std::string path = "tests/datasets/";
         std::ifstream file(path + "iris.arff");
         if (file.is_open()) {
             file.close();
             return path;
         }
-        return "tests/datasets/";
+        return "datasets/";
     }
     const std::string data_path = set_data_path();
     const labels_t iris_quantile = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
@@ -32,7 +32,6 @@ namespace mdlp {
         Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
         auto version = disc->version();
         delete disc;
-        std::cout << "Version computed: " << version;
         EXPECT_EQ("2.1.0", version);
     }
     TEST(Discretizer, BinIrisUniform)
diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp
index ecc10bd..9dacd53 100644
--- a/tests/FImdlp_unittest.cpp
+++ b/tests/FImdlp_unittest.cpp
@@ -64,7 +64,7 @@ namespace mdlp {
         {
             EXPECT_EQ(computed.size(), expected.size());
             for (unsigned long i = 0; i < computed.size(); i++) {
-                cout << "(" << computed[i] << ", " << expected[i] << ") ";
+                // cout << "(" << computed[i] << ", " << expected[i] << ") ";
                 EXPECT_NEAR(computed[i], expected[i], precision);
             }
         }
@@ -76,7 +76,7 @@ namespace mdlp {
             X = X_;
             y = y_;
             indices = sortIndices(X, y);
-            cout << "* " << title << endl;
+            // cout << "* " << title << endl;
             result = valueCutPoint(0, cut, 10);
             EXPECT_NEAR(result.first, midPoint, precision);
             EXPECT_EQ(result.second, limit);
@@ -95,9 +95,9 @@ namespace mdlp {
                 test.fit(X[feature], y);
                 EXPECT_EQ(test.get_depth(), depths[feature]);
                 auto computed = test.getCutPoints();
-                cout << "Feature " << feature << ": ";
+                // cout << "Feature " << feature << ": ";
                 checkCutPoints(computed, expected[feature]);
-                cout << endl;
+                // cout << endl;
             }
         }
     };
@@ -113,17 +113,16 @@ namespace mdlp {
     {
         X = { 1, 2, 3 };
         y = { 1, 2 };
-        EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size");
+        EXPECT_THROW_WITH_MESSAGE(fit(X, y), invalid_argument, "X and y must have the same size: " + std::to_string(X.size()) + " != " + std::to_string(y.size()));
     }
 
-    TEST_F(TestFImdlp, FitErrorMinLengtMaxDepth)
+    TEST_F(TestFImdlp, FitErrorMinLength)
     {
-        auto testLength = CPPFImdlp(2, 10, 0);
-        auto testDepth = CPPFImdlp(3, 0, 0);
-        X = { 1, 2, 3 };
-        y = { 1, 2, 3 };
-        EXPECT_THROW_WITH_MESSAGE(testLength.fit(X, y), invalid_argument, "min_length must be greater than 2");
-        EXPECT_THROW_WITH_MESSAGE(testDepth.fit(X, y), invalid_argument, "max_depth must be greater than 0");
+        EXPECT_THROW_WITH_MESSAGE(CPPFImdlp(2, 10, 0), invalid_argument, "min_length must be greater than 2");
+    }
+    TEST_F(TestFImdlp, FitErrorMaxDepth)
+    {
+        EXPECT_THROW_WITH_MESSAGE(CPPFImdlp(3, 0, 0), invalid_argument, "max_depth must be greater than 0");
     }
 
     TEST_F(TestFImdlp, JoinFit)
@@ -137,14 +136,16 @@ namespace mdlp {
         checkCutPoints(computed, expected);
     }
 
+    TEST_F(TestFImdlp, FitErrorMinCutPoints)
+    {
+        EXPECT_THROW_WITH_MESSAGE(CPPFImdlp(3, 10, -1), invalid_argument, "proposed_cuts must be non-negative");
+    }
     TEST_F(TestFImdlp, FitErrorMaxCutPoints)
     {
-        auto testmin = CPPFImdlp(2, 10, -1);
-        auto testmax = CPPFImdlp(3, 0, 200);
-        X = { 1, 2, 3 };
-        y = { 1, 2, 3 };
-        EXPECT_THROW_WITH_MESSAGE(testmin.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
-        EXPECT_THROW_WITH_MESSAGE(testmax.fit(X, y), invalid_argument, "wrong proposed num_cuts value");
+        auto test = CPPFImdlp(3, 1, 8);
+        samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 };
+        labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 };
+        EXPECT_THROW_WITH_MESSAGE(test.fit(X_, y_), invalid_argument, "wrong proposed num_cuts value");
     }
 
     TEST_F(TestFImdlp, SortIndices)