mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-24 20:05:58 +00:00
Compare commits
2 Commits
c86c7f9ef0
...
v2.1.2
Author | SHA1 | Date | |
---|---|---|---|
6c63bb6e84
|
|||
|
42b91d1391 |
35
.github/workflows/build.yml
vendored
35
.github/workflows/build.yml
vendored
@@ -19,26 +19,29 @@ jobs:
|
|||||||
submodules: recursive
|
submodules: recursive
|
||||||
- name: Install sonar-scanner and build-wrapper
|
- name: Install sonar-scanner and build-wrapper
|
||||||
uses: SonarSource/sonarcloud-github-c-cpp@v2
|
uses: SonarSource/sonarcloud-github-c-cpp@v2
|
||||||
|
- name: Install Python and Conan
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get -y install python3 python3-pip
|
||||||
|
pip3 install conan
|
||||||
- name: Install lcov & gcovr
|
- name: Install lcov & gcovr
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -y install lcov
|
sudo apt-get -y install lcov
|
||||||
sudo apt-get -y install gcovr
|
sudo apt-get -y install gcovr
|
||||||
- name: Install Libtorch
|
- name: Setup Conan profileson
|
||||||
run: |
|
run: |
|
||||||
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcpu.zip
|
conan profile detect --force
|
||||||
unzip libtorch-cxx11-abi-shared-with-deps-2.3.1+cpu.zip
|
conan remote add cimmeria https://conan.rmontanana.es/artifactory/api/conan/Cimmeria
|
||||||
|
- name: Install dependencies with Conan
|
||||||
|
run: |
|
||||||
|
conan install . --build=missing -of build_debug -s build_type=Debug -o enable_testing=True
|
||||||
|
- name: Configure with CMake
|
||||||
|
run: |
|
||||||
|
cmake -S . -B build_debug -DCMAKE_TOOLCHAIN_FILE=build_debug/build/Debug/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Debug -DENABLE_TESTING=ON
|
||||||
- name: Tests & build-wrapper
|
- name: Tests & build-wrapper
|
||||||
run: |
|
run: |
|
||||||
cmake -S . -B build -Wno-dev -DCMAKE_PREFIX_PATH=$(pwd)/libtorch -DCMAKE_BUILD_TYPE=Debug -DENABLE_TESTING=ON
|
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build_debug --config Debug -j 4
|
||||||
build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ --config Debug
|
cp -r tests/datasets build_debug/tests/datasets
|
||||||
cmake --build build -j 4
|
cd build_debug/tests
|
||||||
cd build
|
ctest --output-on-failure -j 4
|
||||||
ctest -C Debug --output-on-failure -j 4
|
|
||||||
gcovr -f ../src/CPPFImdlp.cpp -f ../src/Metrics.cpp -f ../src/BinDisc.cpp -f ../src/Discretizer.cpp --txt --sonarqube=coverage.xml
|
|
||||||
- name: Run sonar-scanner
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
|
||||||
run: |
|
|
||||||
sonar-scanner --define sonar.cfamily.compile-commands="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
|
|
||||||
--define sonar.coverageReportPaths=build/coverage.xml
|
|
18
CHANGELOG.md
18
CHANGELOG.md
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [2.1.2] - 2025-08-22
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- make info now gives info about the build status
|
||||||
|
|
||||||
|
### Fix
|
||||||
|
|
||||||
|
- Mistake in entropy computation
|
||||||
|
|
||||||
|
## [2.1.1] - 2025-07-17
|
||||||
|
|
||||||
|
### Internal Changes
|
||||||
|
|
||||||
|
- Updated Libtorch to version 2.7.1
|
||||||
|
- Updated ArffFiles library to version 1.2.1
|
||||||
|
- Enhance CMake configuration for better compatibility
|
||||||
|
|
||||||
## [2.1.0] - 2025-06-28
|
## [2.1.0] - 2025-06-28
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
@@ -1,10 +1,10 @@
|
|||||||
cmake_minimum_required(VERSION 3.20)
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
project(fimdlp
|
project(fimdlp
|
||||||
|
VERSION 2.1.2
|
||||||
LANGUAGES CXX
|
LANGUAGES CXX
|
||||||
DESCRIPTION "Discretization algorithm based on the paper by Fayyad & Irani Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning."
|
DESCRIPTION "Discretization algorithm based on the paper by Fayyad & Irani Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning."
|
||||||
HOMEPAGE_URL "https://github.com/rmontanana/mdlp"
|
HOMEPAGE_URL "https://github.com/rmontanana/mdlp"
|
||||||
VERSION 2.1.0
|
|
||||||
)
|
)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
cmake_policy(SET CMP0135 NEW)
|
cmake_policy(SET CMP0135 NEW)
|
||||||
|
@@ -7,9 +7,11 @@ This directory contains the Conan package configuration for the fimdlp library.
|
|||||||
The package manages the following dependencies:
|
The package manages the following dependencies:
|
||||||
|
|
||||||
### Build Requirements
|
### Build Requirements
|
||||||
|
|
||||||
- **libtorch/2.4.1** - PyTorch C++ library for tensor operations
|
- **libtorch/2.4.1** - PyTorch C++ library for tensor operations
|
||||||
|
|
||||||
### Test Requirements (when testing enabled)
|
### Test Requirements (when testing enabled)
|
||||||
|
|
||||||
- **catch2/3.8.1** - Modern C++ testing framework
|
- **catch2/3.8.1** - Modern C++ testing framework
|
||||||
- **arff-files** - ARFF file format support (included locally in tests/lib/Files/)
|
- **arff-files** - ARFF file format support (included locally in tests/lib/Files/)
|
||||||
|
|
||||||
@@ -67,7 +69,7 @@ conan create . -o shared=True --profile:build=default --profile:host=default
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Add Cimmeria remote
|
# Add Cimmeria remote
|
||||||
conan remote add cimmeria <cimmeria-server-url>
|
conan remote add cimmeria https://conan.rmontanana.es/artifactory/api/conan/Cimmeria
|
||||||
|
|
||||||
# Login to Cimmeria
|
# Login to Cimmeria
|
||||||
conan remote login cimmeria <username>
|
conan remote login cimmeria <username>
|
||||||
@@ -150,4 +152,4 @@ conan create . -o enable_testing=True
|
|||||||
|
|
||||||
- C++17 compatible compiler
|
- C++17 compatible compiler
|
||||||
- CMake 3.20 or later
|
- CMake 3.20 or later
|
||||||
- Conan 2.0 or later
|
- Conan 2.0 or later
|
||||||
|
74
Makefile
74
Makefile
@@ -1,5 +1,5 @@
|
|||||||
SHELL := /bin/bash
|
SHELL := /bin/bash
|
||||||
.DEFAULT_GOAL := release
|
.DEFAULT_GOAL := help
|
||||||
.PHONY: debug release install test conan-create viewcoverage
|
.PHONY: debug release install test conan-create viewcoverage
|
||||||
lcov := lcov
|
lcov := lcov
|
||||||
|
|
||||||
@@ -8,21 +8,48 @@ f_release = build_release
|
|||||||
genhtml = genhtml
|
genhtml = genhtml
|
||||||
docscdir = docs
|
docscdir = docs
|
||||||
|
|
||||||
|
# Set the number of parallel jobs to the number of available processors minus 7
|
||||||
|
CPUS := $(shell getconf _NPROCESSORS_ONLN 2>/dev/null \
|
||||||
|
|| nproc --all 2>/dev/null \
|
||||||
|
|| sysctl -n hw.ncpu)
|
||||||
|
JOBS := $(shell n=$(CPUS); [ $${n} -gt 7 ] && echo $$((n-7)) || echo 1)
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
GREEN = \033[0;32m
|
||||||
|
YELLOW = \033[1;33m
|
||||||
|
RED = \033[0;31m
|
||||||
|
NC = \033[0m # No Color
|
||||||
|
|
||||||
define build_target
|
define build_target
|
||||||
@echo ">>> Building the project for $(1)..."
|
@echo ">>> Building the project for $(1)..."
|
||||||
@if [ -d $(2) ]; then rm -fr $(2); fi
|
@if [ -d $(2) ]; then rm -fr $(2); fi
|
||||||
@conan install . --build=missing -of $(2) -s build_type=$(1)
|
@conan install . --build=missing -of $(2) -s build_type=$(1) $(4)
|
||||||
@cmake -S . -B $(2) -DCMAKE_TOOLCHAIN_FILE=$(2)/build/$(1)/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=$(1) -D$(3)
|
@cmake -S . -B $(2) -DCMAKE_TOOLCHAIN_FILE=$(2)/build/$(1)/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=$(1) -D$(3)
|
||||||
@cmake --build $(2) --config $(1) -j 8
|
@cmake --build $(2) --config $(1) -j 8
|
||||||
endef
|
endef
|
||||||
|
|
||||||
|
define status_file_folder
|
||||||
|
@if [ -d $(1) ]; then \
|
||||||
|
st1=" ✅ $(GREEN)"; \
|
||||||
|
else \
|
||||||
|
st1=" ❌ $(RED)"; \
|
||||||
|
fi; \
|
||||||
|
if [ -f $(1)/libfimdlp.a ]; then \
|
||||||
|
st2=" ✅ $(GREEN)"; \
|
||||||
|
else \
|
||||||
|
st2=" ❌ $(RED)"; \
|
||||||
|
fi; \
|
||||||
|
printf " $(YELLOW)$(2):$(NC) $$st1 Folder $(NC) $$st2 Library $(NC)\n"
|
||||||
|
endef
|
||||||
|
|
||||||
|
|
||||||
debug: ## Build Debug version of the library
|
debug: ## Build Debug version of the library
|
||||||
@$(call build_target,"Debug","$(f_debug)", "ENABLE_TESTING=ON")
|
@$(call build_target,"Debug","$(f_debug)", "ENABLE_TESTING=ON", "-o enable_testing=True")
|
||||||
|
|
||||||
release: ## Build Release version of the library
|
release: ## Build Release version of the library
|
||||||
@$(call build_target,"Release","$(f_release)", "ENABLE_TESTING=OFF")
|
@$(call build_target,"Release","$(f_release)", "ENABLE_TESTING=OFF", "-o enable_testing=False")
|
||||||
|
|
||||||
install: ## Install the project
|
install: ## Install the library
|
||||||
@echo ">>> Installing the project..."
|
@echo ">>> Installing the project..."
|
||||||
@cmake --build $(f_release) --target install -j 8
|
@cmake --build $(f_release) --target install -j 8
|
||||||
|
|
||||||
@@ -31,7 +58,8 @@ test: ## Build Debug version and run tests
|
|||||||
@$(MAKE) debug;
|
@$(MAKE) debug;
|
||||||
@cp -r tests/datasets $(f_debug)/tests/datasets
|
@cp -r tests/datasets $(f_debug)/tests/datasets
|
||||||
@cd $(f_debug)/tests && ctest --output-on-failure -j 8
|
@cd $(f_debug)/tests && ctest --output-on-failure -j 8
|
||||||
@cd $(f_debug)/tests && $(lcov) --capture --directory ../ --demangle-cpp --ignore-errors source,source --ignore-errors mismatch --output-file coverage.info >/dev/null 2>&1; \
|
@echo ">>> Generating coverage report..."
|
||||||
|
@cd $(f_debug)/tests && $(lcov) --capture --directory ../ --demangle-cpp --ignore-errors source,source --ignore-errors mismatch --ignore-errors inconsistent --output-file coverage.info >/dev/null 2>&1; \
|
||||||
$(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
$(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
$(lcov) --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
$(lcov) --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
$(lcov) --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
$(lcov) --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
@@ -60,11 +88,41 @@ viewcoverage: ## View the html coverage report
|
|||||||
@xdg-open $(docscdir)/coverage/index.html || open $(docscdir)/coverage/index.html 2>/dev/null
|
@xdg-open $(docscdir)/coverage/index.html || open $(docscdir)/coverage/index.html 2>/dev/null
|
||||||
@echo ">>> Done";
|
@echo ">>> Done";
|
||||||
|
|
||||||
|
info: ## Show project information
|
||||||
|
@version=$$(grep -A1 "project(fimdlp" CMakeLists.txt | grep "VERSION" | sed 's/.*VERSION \([0-9.]*\).*/\1/'); \
|
||||||
|
printf "$(GREEN)FImdlp Library: $(YELLOW)ver. $$version$(NC)\n"
|
||||||
|
@echo ""
|
||||||
|
@printf "$(GREEN)Project folders:$(NC)\n"
|
||||||
|
$(call status_file_folder, $(f_release), "Build\ Release")
|
||||||
|
$(call status_file_folder, $(f_debug), "Build\ Debug\ \ ")
|
||||||
|
@echo ""
|
||||||
|
@printf "$(GREEN)Build commands:$(NC)\n"
|
||||||
|
@printf " $(YELLOW)make release$(NC) - Build library for release\n"
|
||||||
|
@printf " $(YELLOW)make debug$(NC) - Build library for debug\n"
|
||||||
|
@printf " $(YELLOW)make test$(NC) - Run tests\n"
|
||||||
|
@printf " $(YELLOW)Usage:$(NC) make help\n"
|
||||||
|
@echo ""
|
||||||
|
@printf " $(YELLOW)Parallel Jobs: $(GREEN)$(JOBS)$(NC)\n"
|
||||||
|
|
||||||
conan-create: ## Create the conan package
|
conan-create: ## Create the conan package
|
||||||
@echo ">>> Creating the conan package..."
|
@echo ">>> Creating the conan package..."
|
||||||
conan create . --build=missing -tf "" -s:a build_type=Release
|
conan create . --build=missing -tf "" -s:a build_type=Release
|
||||||
conan create . --build=missing -tf "" -s:a build_type=Debug -o "&:enable_testing=False"
|
conan create . --build=missing -tf "" -s:a build_type=Debug -o "&:enable_testing=False"
|
||||||
@echo ">>> Done"
|
@echo ">>> Done"
|
||||||
|
|
||||||
|
help: ## Show help message
|
||||||
|
@IFS=$$'\n' ; \
|
||||||
|
help_lines=(`grep -Fh "##" $(MAKEFILE_LIST) | grep -Fv fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||||
|
printf "%s\n\n" "Usage: make [task]"; \
|
||||||
|
printf "%-20s %s\n" "task" "help" ; \
|
||||||
|
printf "%-20s %s\n" "------" "----" ; \
|
||||||
|
for help_line in $${help_lines[@]}; do \
|
||||||
|
IFS=$$':' ; \
|
||||||
|
help_split=($$help_line) ; \
|
||||||
|
help_command=`echo $${help_split[0]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
|
||||||
|
help_info=`echo $${help_split[2]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
|
||||||
|
printf '\033[36m'; \
|
||||||
|
printf "%-20s %s" $$help_command ; \
|
||||||
|
printf '\033[0m'; \
|
||||||
|
printf "%s\n" $$help_info; \
|
||||||
|
done
|
@@ -3,7 +3,7 @@
|
|||||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||||
[](html/index.html)
|
[](html/index.html)
|
||||||
[](https://deepwiki.com/rmontanana/mdlp)
|
[](https://deepwiki.com/rmontanana/mdlp)
|
||||||
[](https://doi.org/10.5281/zenodo.14245443)
|
[](https://doi.org/10.5281/zenodo.16025501)
|
||||||
|
|
||||||
# <img src="logo.png" alt="logo" width="50"/> mdlp
|
# <img src="logo.png" alt="logo" width="50"/> mdlp
|
||||||
|
|
||||||
@@ -18,9 +18,7 @@ Other features:
|
|||||||
|
|
||||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||||
- Intervals have to have more than two examples to be evaluated (mdlp).
|
- Intervals have to have more than two examples to be evaluated (mdlp).
|
||||||
|
|
||||||
- The algorithm returns the cut points for the variable.
|
- The algorithm returns the cut points for the variable.
|
||||||
|
|
||||||
- The transform method uses the cut points returning its index in the following way:
|
- The transform method uses the cut points returning its index in the following way:
|
||||||
|
|
||||||
cut[i - 1] <= x < cut[i]
|
cut[i - 1] <= x < cut[i]
|
||||||
|
@@ -54,7 +54,7 @@ class FimdlpConan(ConanFile):
|
|||||||
self.requires("libtorch/2.7.1")
|
self.requires("libtorch/2.7.1")
|
||||||
|
|
||||||
def build_requirements(self):
|
def build_requirements(self):
|
||||||
self.requires("arff-files/1.2.0") # for tests and sample
|
self.requires("arff-files/1.2.1") # for tests and sample
|
||||||
if self.options.enable_testing:
|
if self.options.enable_testing:
|
||||||
self.test_requires("gtest/1.16.0")
|
self.test_requires("gtest/1.16.0")
|
||||||
|
|
||||||
|
@@ -98,7 +98,7 @@ namespace mdlp {
|
|||||||
const precision_t fraction =
|
const precision_t fraction =
|
||||||
(percentile / 100.0 - percentI) /
|
(percentile / 100.0 - percentI) /
|
||||||
(static_cast<precision_t>(indexLower + 1) / static_cast<precision_t>(data.size() - 1) - percentI);
|
(static_cast<precision_t>(indexLower + 1) / static_cast<precision_t>(data.size() - 1) - percentI);
|
||||||
if (const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; value != results.back() || first) // first needed as results.back() return is undefined for empty vectors
|
if (const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; first || results.empty() || value != results.back()) // Check empty before calling back()
|
||||||
results.push_back(value);
|
results.push_back(value);
|
||||||
first = false;
|
first = false;
|
||||||
}
|
}
|
||||||
|
@@ -41,6 +41,9 @@ namespace mdlp {
|
|||||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||||
inline precision_t safe_X_access(size_t idx) const
|
inline precision_t safe_X_access(size_t idx) const
|
||||||
{
|
{
|
||||||
|
if (indices.empty()) {
|
||||||
|
throw std::out_of_range("Indices array is empty");
|
||||||
|
}
|
||||||
if (idx >= indices.size()) {
|
if (idx >= indices.size()) {
|
||||||
throw std::out_of_range("Index out of bounds for indices array");
|
throw std::out_of_range("Index out of bounds for indices array");
|
||||||
}
|
}
|
||||||
@@ -52,6 +55,9 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
inline label_t safe_y_access(size_t idx) const
|
inline label_t safe_y_access(size_t idx) const
|
||||||
{
|
{
|
||||||
|
if (indices.empty()) {
|
||||||
|
throw std::out_of_range("Indices array is empty");
|
||||||
|
}
|
||||||
if (idx >= indices.size()) {
|
if (idx >= indices.size()) {
|
||||||
throw std::out_of_range("Index out of bounds for indices array");
|
throw std::out_of_range("Index out of bounds for indices array");
|
||||||
}
|
}
|
||||||
|
@@ -7,6 +7,7 @@
|
|||||||
#include "Metrics.h"
|
#include "Metrics.h"
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
@@ -18,8 +19,13 @@ namespace mdlp {
|
|||||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||||
{
|
{
|
||||||
set<int> nClasses;
|
set<int> nClasses;
|
||||||
|
if (indices.empty() || start >= indices.size() || end > indices.size()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
for (auto i = start; i < end; ++i) {
|
for (auto i = start; i < end; ++i) {
|
||||||
nClasses.insert(y[indices[i]]);
|
if (i < indices.size() && indices[i] < y.size()) {
|
||||||
|
nClasses.insert(y[indices[i]]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return static_cast<int>(nClasses.size());
|
return static_cast<int>(nClasses.size());
|
||||||
}
|
}
|
||||||
@@ -38,7 +44,7 @@ namespace mdlp {
|
|||||||
{
|
{
|
||||||
if (end - start < 2)
|
if (end - start < 2)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
// Check cache first with read lock
|
// Check cache first with read lock
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||||
@@ -46,15 +52,37 @@ namespace mdlp {
|
|||||||
return entropyCache[{start, end}];
|
return entropyCache[{start, end}];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute entropy outside of lock
|
// Compute entropy outside of lock
|
||||||
precision_t p;
|
precision_t p;
|
||||||
precision_t ventropy = 0;
|
precision_t ventropy = 0;
|
||||||
int nElements = 0;
|
int nElements = 0;
|
||||||
labels_t counts(numClasses + 1, 0);
|
|
||||||
|
|
||||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
if (indices.empty() || start >= indices.size() || end > indices.size()) {
|
||||||
counts[y[*i]]++;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// First pass: find max label to size counts array properly
|
||||||
|
size_t max_label = 0;
|
||||||
|
for (size_t i = start; i < end; ++i) {
|
||||||
|
if (i >= indices.size()) break;
|
||||||
|
size_t idx = indices[i];
|
||||||
|
if (idx >= y.size()) continue;
|
||||||
|
size_t label = y[idx];
|
||||||
|
if (label > max_label) {
|
||||||
|
max_label = label;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
labels_t counts(max_label + 1, 0);
|
||||||
|
|
||||||
|
// Second pass: count occurrences
|
||||||
|
for (size_t i = start; i < end; ++i) {
|
||||||
|
if (i >= indices.size()) break;
|
||||||
|
size_t idx = indices[i];
|
||||||
|
if (idx >= y.size()) continue;
|
||||||
|
size_t label = y[idx];
|
||||||
|
counts[label]++;
|
||||||
nElements++;
|
nElements++;
|
||||||
}
|
}
|
||||||
for (auto count : counts) {
|
for (auto count : counts) {
|
||||||
@@ -63,13 +91,13 @@ namespace mdlp {
|
|||||||
ventropy -= p * log2(p);
|
ventropy -= p * log2(p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update cache with write lock
|
// Update cache with write lock
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||||
entropyCache[{start, end}] = ventropy;
|
entropyCache[{start, end}] = ventropy;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ventropy;
|
return ventropy;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,7 +110,7 @@ namespace mdlp {
|
|||||||
return igCache[make_tuple(start, cut, end)];
|
return igCache[make_tuple(start, cut, end)];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute information gain outside of lock
|
// Compute information gain outside of lock
|
||||||
precision_t iGain;
|
precision_t iGain;
|
||||||
precision_t entropyInterval;
|
precision_t entropyInterval;
|
||||||
@@ -91,7 +119,7 @@ namespace mdlp {
|
|||||||
size_t nElementsLeft = cut - start;
|
size_t nElementsLeft = cut - start;
|
||||||
size_t nElementsRight = end - cut;
|
size_t nElementsRight = end - cut;
|
||||||
size_t nElements = end - start;
|
size_t nElements = end - start;
|
||||||
|
|
||||||
entropyInterval = entropy(start, end);
|
entropyInterval = entropy(start, end);
|
||||||
entropyLeft = entropy(start, cut);
|
entropyLeft = entropy(start, cut);
|
||||||
entropyRight = entropy(cut, end);
|
entropyRight = entropy(cut, end);
|
||||||
@@ -99,13 +127,13 @@ namespace mdlp {
|
|||||||
(static_cast<precision_t>(nElementsLeft) * entropyLeft +
|
(static_cast<precision_t>(nElementsLeft) * entropyLeft +
|
||||||
static_cast<precision_t>(nElementsRight) * entropyRight) /
|
static_cast<precision_t>(nElementsRight) * entropyRight) /
|
||||||
static_cast<precision_t>(nElements);
|
static_cast<precision_t>(nElements);
|
||||||
|
|
||||||
// Update cache with write lock
|
// Update cache with write lock
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(cache_mutex);
|
std::lock_guard<std::mutex> lock(cache_mutex);
|
||||||
igCache[make_tuple(start, cut, end)] = iGain;
|
igCache[make_tuple(start, cut, end)] = iGain;
|
||||||
}
|
}
|
||||||
|
|
||||||
return iGain;
|
return iGain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -41,7 +41,7 @@ namespace mdlp {
|
|||||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||||
auto version = disc->version();
|
auto version = disc->version();
|
||||||
delete disc;
|
delete disc;
|
||||||
EXPECT_EQ("2.1.0", version);
|
EXPECT_EQ("2.1.2", version);
|
||||||
}
|
}
|
||||||
TEST(Discretizer, BinIrisUniform)
|
TEST(Discretizer, BinIrisUniform)
|
||||||
{
|
{
|
||||||
|
@@ -417,6 +417,28 @@ namespace mdlp {
|
|||||||
EXPECT_THROW_WITH_MESSAGE(safe_y_access(2), std::out_of_range, "Index out of bounds for y array");
|
EXPECT_THROW_WITH_MESSAGE(safe_y_access(2), std::out_of_range, "Index out of bounds for y array");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(TestFImdlp, SafeXAccessEmptyIndices)
|
||||||
|
{
|
||||||
|
// Test safe_X_access with empty indices array
|
||||||
|
X = { 1.0f, 2.0f, 3.0f };
|
||||||
|
y = { 1, 2, 3 };
|
||||||
|
indices = {}; // empty indices array
|
||||||
|
|
||||||
|
// This should trigger the indices.empty() exception in safe_X_access
|
||||||
|
EXPECT_THROW_WITH_MESSAGE(safe_X_access(0), std::out_of_range, "Indices array is empty");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestFImdlp, SafeYAccessEmptyIndices)
|
||||||
|
{
|
||||||
|
// Test safe_y_access with empty indices array
|
||||||
|
X = { 1.0f, 2.0f, 3.0f };
|
||||||
|
y = { 1, 2, 3 };
|
||||||
|
indices = {}; // empty indices array
|
||||||
|
|
||||||
|
// This should trigger the indices.empty() exception in safe_y_access
|
||||||
|
EXPECT_THROW_WITH_MESSAGE(safe_y_access(0), std::out_of_range, "Indices array is empty");
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(TestFImdlp, SafeSubtractUnderflow)
|
TEST_F(TestFImdlp, SafeSubtractUnderflow)
|
||||||
{
|
{
|
||||||
// Test safe_subtract with underflow condition (b > a)
|
// Test safe_subtract with underflow condition (b > a)
|
||||||
|
@@ -37,12 +37,15 @@ namespace mdlp {
|
|||||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||||
setData(y, indices);
|
setData(y, indices);
|
||||||
ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
|
ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
|
||||||
|
y = { 0, 0, 1, 2, 3 };
|
||||||
|
ASSERT_NEAR(1.5f, entropy(0, 4), precision);
|
||||||
|
ASSERT_NEAR(1.921928f, entropy(0, 5), precision);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(TestMetrics, EntropyDouble)
|
TEST_F(TestMetrics, EntropyDouble)
|
||||||
{
|
{
|
||||||
y = { 0, 0, 1, 2, 3 };
|
y = { 0, 0, 1, 2, 3 };
|
||||||
samples_t expected_entropies = { 0.0, 0.0, 0.91829583, 1.5, 1.4575424759098898 };
|
samples_t expected_entropies = { 0.0, 0.0, 0.91829583, 1.5, 1.9219280948873623 };
|
||||||
for (auto idx = 0; idx < y.size(); ++idx) {
|
for (auto idx = 0; idx < y.size(); ++idx) {
|
||||||
ASSERT_NEAR(expected_entropies[idx], entropy(0, idx + 1), precision);
|
ASSERT_NEAR(expected_entropies[idx], entropy(0, idx + 1), precision);
|
||||||
}
|
}
|
||||||
@@ -56,4 +59,28 @@ namespace mdlp {
|
|||||||
setData(y, indices);
|
setData(y, indices);
|
||||||
ASSERT_NEAR(0.108032f, informationGain(0, 5, 10), precision);
|
ASSERT_NEAR(0.108032f, informationGain(0, 5, 10), precision);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(TestMetrics, EntropyBoundsChecking)
|
||||||
|
{
|
||||||
|
// Test the conditions that cause entropy to return 0
|
||||||
|
|
||||||
|
// Test 1: Empty indices array
|
||||||
|
indices_t empty_indices = {};
|
||||||
|
labels_t test_y = { 1, 2, 3 };
|
||||||
|
setData(test_y, empty_indices);
|
||||||
|
EXPECT_EQ(0, entropy(0, 1)) << "Should return 0 when indices is empty";
|
||||||
|
|
||||||
|
// Test 2: start >= indices.size()
|
||||||
|
indices_t small_indices = { 0, 1 };
|
||||||
|
setData(test_y, small_indices);
|
||||||
|
EXPECT_EQ(0, entropy(2, 3)) << "Should return 0 when start >= indices.size()";
|
||||||
|
EXPECT_EQ(0, entropy(5, 6)) << "Should return 0 when start >> indices.size()";
|
||||||
|
|
||||||
|
// Test 3: end > indices.size()
|
||||||
|
EXPECT_EQ(0, entropy(0, 3)) << "Should return 0 when end > indices.size()";
|
||||||
|
EXPECT_EQ(0, entropy(1, 5)) << "Should return 0 when end >> indices.size()";
|
||||||
|
|
||||||
|
// Test edge case: start == indices.size()
|
||||||
|
EXPECT_EQ(0, entropy(2, 2)) << "Should return 0 when start == indices.size()";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user