mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-24 11:55:57 +00:00
Compare commits
9 Commits
v2.1.2
...
c86c7f9ef0
Author | SHA1 | Date | |
---|---|---|---|
c86c7f9ef0
|
|||
828e6a28c0
|
|||
8f2a0015d9
|
|||
8945a3f16e
|
|||
80b7d6e6f7
|
|||
b1d550f211
|
|||
8a1b68376d
|
|||
|
563a84659f | ||
1b9d924ebe
|
10
CHANGELOG.md
10
CHANGELOG.md
@@ -5,16 +5,6 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [2.1.2] - 2025-08-22
|
||||
|
||||
### Added
|
||||
|
||||
- make info now gives info about the build status
|
||||
|
||||
### Fix
|
||||
|
||||
- Mistake in entropy computation
|
||||
|
||||
## [2.1.1] - 2025-07-17
|
||||
|
||||
### Internal Changes
|
||||
|
@@ -1,10 +1,10 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(fimdlp
|
||||
VERSION 2.1.2
|
||||
LANGUAGES CXX
|
||||
DESCRIPTION "Discretization algorithm based on the paper by Fayyad & Irani Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/mdlp"
|
||||
VERSION 2.1.1
|
||||
)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
|
45
Makefile
45
Makefile
@@ -8,18 +8,6 @@ f_release = build_release
|
||||
genhtml = genhtml
|
||||
docscdir = docs
|
||||
|
||||
# Set the number of parallel jobs to the number of available processors minus 7
|
||||
CPUS := $(shell getconf _NPROCESSORS_ONLN 2>/dev/null \
|
||||
|| nproc --all 2>/dev/null \
|
||||
|| sysctl -n hw.ncpu)
|
||||
JOBS := $(shell n=$(CPUS); [ $${n} -gt 7 ] && echo $$((n-7)) || echo 1)
|
||||
|
||||
# Colors for output
|
||||
GREEN = \033[0;32m
|
||||
YELLOW = \033[1;33m
|
||||
RED = \033[0;31m
|
||||
NC = \033[0m # No Color
|
||||
|
||||
define build_target
|
||||
@echo ">>> Building the project for $(1)..."
|
||||
@if [ -d $(2) ]; then rm -fr $(2); fi
|
||||
@@ -28,21 +16,6 @@ define build_target
|
||||
@cmake --build $(2) --config $(1) -j 8
|
||||
endef
|
||||
|
||||
define status_file_folder
|
||||
@if [ -d $(1) ]; then \
|
||||
st1=" ✅ $(GREEN)"; \
|
||||
else \
|
||||
st1=" ❌ $(RED)"; \
|
||||
fi; \
|
||||
if [ -f $(1)/libfimdlp.a ]; then \
|
||||
st2=" ✅ $(GREEN)"; \
|
||||
else \
|
||||
st2=" ❌ $(RED)"; \
|
||||
fi; \
|
||||
printf " $(YELLOW)$(2):$(NC) $$st1 Folder $(NC) $$st2 Library $(NC)\n"
|
||||
endef
|
||||
|
||||
|
||||
debug: ## Build Debug version of the library
|
||||
@$(call build_target,"Debug","$(f_debug)", "ENABLE_TESTING=ON", "-o enable_testing=True")
|
||||
|
||||
@@ -88,22 +61,6 @@ viewcoverage: ## View the html coverage report
|
||||
@xdg-open $(docscdir)/coverage/index.html || open $(docscdir)/coverage/index.html 2>/dev/null
|
||||
@echo ">>> Done";
|
||||
|
||||
info: ## Show project information
|
||||
@version=$$(grep -A1 "project(fimdlp" CMakeLists.txt | grep "VERSION" | sed 's/.*VERSION \([0-9.]*\).*/\1/'); \
|
||||
printf "$(GREEN)FImdlp Library: $(YELLOW)ver. $$version$(NC)\n"
|
||||
@echo ""
|
||||
@printf "$(GREEN)Project folders:$(NC)\n"
|
||||
$(call status_file_folder, $(f_release), "Build\ Release")
|
||||
$(call status_file_folder, $(f_debug), "Build\ Debug\ \ ")
|
||||
@echo ""
|
||||
@printf "$(GREEN)Build commands:$(NC)\n"
|
||||
@printf " $(YELLOW)make release$(NC) - Build library for release\n"
|
||||
@printf " $(YELLOW)make debug$(NC) - Build library for debug\n"
|
||||
@printf " $(YELLOW)make test$(NC) - Run tests\n"
|
||||
@printf " $(YELLOW)Usage:$(NC) make help\n"
|
||||
@echo ""
|
||||
@printf " $(YELLOW)Parallel Jobs: $(GREEN)$(JOBS)$(NC)\n"
|
||||
|
||||
conan-create: ## Create the conan package
|
||||
@echo ">>> Creating the conan package..."
|
||||
conan create . --build=missing -tf "" -s:a build_type=Release
|
||||
@@ -112,7 +69,7 @@ conan-create: ## Create the conan package
|
||||
|
||||
help: ## Show help message
|
||||
@IFS=$$'\n' ; \
|
||||
help_lines=(`grep -Fh "##" $(MAKEFILE_LIST) | grep -Fv fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||
printf "%s\n\n" "Usage: make [task]"; \
|
||||
printf "%-20s %s\n" "task" "help" ; \
|
||||
printf "%-20s %s\n" "------" "----" ; \
|
||||
|
@@ -98,7 +98,7 @@ namespace mdlp {
|
||||
const precision_t fraction =
|
||||
(percentile / 100.0 - percentI) /
|
||||
(static_cast<precision_t>(indexLower + 1) / static_cast<precision_t>(data.size() - 1) - percentI);
|
||||
if (const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; first || results.empty() || value != results.back()) // Check empty before calling back()
|
||||
if (const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; value != results.back() || first) // first needed as results.back() return is undefined for empty vectors
|
||||
results.push_back(value);
|
||||
first = false;
|
||||
}
|
||||
|
@@ -41,9 +41,6 @@ namespace mdlp {
|
||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||
inline precision_t safe_X_access(size_t idx) const
|
||||
{
|
||||
if (indices.empty()) {
|
||||
throw std::out_of_range("Indices array is empty");
|
||||
}
|
||||
if (idx >= indices.size()) {
|
||||
throw std::out_of_range("Index out of bounds for indices array");
|
||||
}
|
||||
@@ -55,9 +52,6 @@ namespace mdlp {
|
||||
}
|
||||
inline label_t safe_y_access(size_t idx) const
|
||||
{
|
||||
if (indices.empty()) {
|
||||
throw std::out_of_range("Indices array is empty");
|
||||
}
|
||||
if (idx >= indices.size()) {
|
||||
throw std::out_of_range("Index out of bounds for indices array");
|
||||
}
|
||||
|
@@ -7,7 +7,6 @@
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
@@ -19,13 +18,8 @@ namespace mdlp {
|
||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||
{
|
||||
set<int> nClasses;
|
||||
if (indices.empty() || start >= indices.size() || end > indices.size()) {
|
||||
return 0;
|
||||
}
|
||||
for (auto i = start; i < end; ++i) {
|
||||
if (i < indices.size() && indices[i] < y.size()) {
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
return static_cast<int>(nClasses.size());
|
||||
}
|
||||
@@ -57,32 +51,10 @@ namespace mdlp {
|
||||
precision_t p;
|
||||
precision_t ventropy = 0;
|
||||
int nElements = 0;
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
|
||||
if (indices.empty() || start >= indices.size() || end > indices.size()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// First pass: find max label to size counts array properly
|
||||
size_t max_label = 0;
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
if (i >= indices.size()) break;
|
||||
size_t idx = indices[i];
|
||||
if (idx >= y.size()) continue;
|
||||
size_t label = y[idx];
|
||||
if (label > max_label) {
|
||||
max_label = label;
|
||||
}
|
||||
}
|
||||
|
||||
labels_t counts(max_label + 1, 0);
|
||||
|
||||
// Second pass: count occurrences
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
if (i >= indices.size()) break;
|
||||
size_t idx = indices[i];
|
||||
if (idx >= y.size()) continue;
|
||||
size_t label = y[idx];
|
||||
counts[label]++;
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
nElements++;
|
||||
}
|
||||
for (auto count : counts) {
|
||||
|
@@ -41,7 +41,7 @@ namespace mdlp {
|
||||
Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM);
|
||||
auto version = disc->version();
|
||||
delete disc;
|
||||
EXPECT_EQ("2.1.2", version);
|
||||
EXPECT_EQ("2.1.1", version);
|
||||
}
|
||||
TEST(Discretizer, BinIrisUniform)
|
||||
{
|
||||
|
@@ -417,28 +417,6 @@ namespace mdlp {
|
||||
EXPECT_THROW_WITH_MESSAGE(safe_y_access(2), std::out_of_range, "Index out of bounds for y array");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SafeXAccessEmptyIndices)
|
||||
{
|
||||
// Test safe_X_access with empty indices array
|
||||
X = { 1.0f, 2.0f, 3.0f };
|
||||
y = { 1, 2, 3 };
|
||||
indices = {}; // empty indices array
|
||||
|
||||
// This should trigger the indices.empty() exception in safe_X_access
|
||||
EXPECT_THROW_WITH_MESSAGE(safe_X_access(0), std::out_of_range, "Indices array is empty");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SafeYAccessEmptyIndices)
|
||||
{
|
||||
// Test safe_y_access with empty indices array
|
||||
X = { 1.0f, 2.0f, 3.0f };
|
||||
y = { 1, 2, 3 };
|
||||
indices = {}; // empty indices array
|
||||
|
||||
// This should trigger the indices.empty() exception in safe_y_access
|
||||
EXPECT_THROW_WITH_MESSAGE(safe_y_access(0), std::out_of_range, "Indices array is empty");
|
||||
}
|
||||
|
||||
TEST_F(TestFImdlp, SafeSubtractUnderflow)
|
||||
{
|
||||
// Test safe_subtract with underflow condition (b > a)
|
||||
|
@@ -37,15 +37,12 @@ namespace mdlp {
|
||||
y = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 };
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.468996f, entropy(0, 10), precision);
|
||||
y = { 0, 0, 1, 2, 3 };
|
||||
ASSERT_NEAR(1.5f, entropy(0, 4), precision);
|
||||
ASSERT_NEAR(1.921928f, entropy(0, 5), precision);
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, EntropyDouble)
|
||||
{
|
||||
y = { 0, 0, 1, 2, 3 };
|
||||
samples_t expected_entropies = { 0.0, 0.0, 0.91829583, 1.5, 1.9219280948873623 };
|
||||
samples_t expected_entropies = { 0.0, 0.0, 0.91829583, 1.5, 1.4575424759098898 };
|
||||
for (auto idx = 0; idx < y.size(); ++idx) {
|
||||
ASSERT_NEAR(expected_entropies[idx], entropy(0, idx + 1), precision);
|
||||
}
|
||||
@@ -59,28 +56,4 @@ namespace mdlp {
|
||||
setData(y, indices);
|
||||
ASSERT_NEAR(0.108032f, informationGain(0, 5, 10), precision);
|
||||
}
|
||||
|
||||
TEST_F(TestMetrics, EntropyBoundsChecking)
|
||||
{
|
||||
// Test the conditions that cause entropy to return 0
|
||||
|
||||
// Test 1: Empty indices array
|
||||
indices_t empty_indices = {};
|
||||
labels_t test_y = { 1, 2, 3 };
|
||||
setData(test_y, empty_indices);
|
||||
EXPECT_EQ(0, entropy(0, 1)) << "Should return 0 when indices is empty";
|
||||
|
||||
// Test 2: start >= indices.size()
|
||||
indices_t small_indices = { 0, 1 };
|
||||
setData(test_y, small_indices);
|
||||
EXPECT_EQ(0, entropy(2, 3)) << "Should return 0 when start >= indices.size()";
|
||||
EXPECT_EQ(0, entropy(5, 6)) << "Should return 0 when start >> indices.size()";
|
||||
|
||||
// Test 3: end > indices.size()
|
||||
EXPECT_EQ(0, entropy(0, 3)) << "Should return 0 when end > indices.size()";
|
||||
EXPECT_EQ(0, entropy(1, 5)) << "Should return 0 when end >> indices.size()";
|
||||
|
||||
// Test edge case: start == indices.size()
|
||||
EXPECT_EQ(0, entropy(2, 2)) << "Should return 0 when start == indices.size()";
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user