Compare commits

..

1 Commits

Author SHA1 Message Date
3615a1463c Fix some issues in FeatureSelect 2025-05-31 14:36:51 +02:00
18 changed files with 209 additions and 181 deletions

View File

@@ -7,18 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
## [1.1.1] - 2025-05-08
### Internal
- Fix the vcpkg configuration in building the library.
- Fix the sample app to use the vcpkg configuration.
## [1.1.0] - 2025-04-27
### Internal
- Add changes to .clang-format to adjust to vscode format style thanks to <https://clang-format-configurator.site/>
- Add changes to .clang-format to ajust to vscode format style thanks to <https://clang-format-configurator.site/>
- Remove all the dependencies as git submodules and add them as vcpkg dependencies.
- Fix the dependencies versions for this specific BayesNet version.

View File

@@ -1,19 +1,21 @@
cmake_minimum_required(VERSION 3.27)
cmake_minimum_required(VERSION 3.20)
project(bayesnet
VERSION 1.1.1
project(BayesNet
VERSION 1.1.0
DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX
)
set(CMAKE_CXX_STANDARD 17)
cmake_policy(SET CMP0135 NEW)
if (CODE_COVERAGE AND NOT ENABLE_TESTING)
MESSAGE(FATAL_ERROR "Code coverage requires testing enabled")
endif (CODE_COVERAGE AND NOT ENABLE_TESTING)
find_package(Torch CONFIG REQUIRED)
find_package(fimdlp CONFIG REQUIRED)
find_package(nlohmann_json CONFIG REQUIRED)
find_package(folding CONFIG REQUIRED)
find_package(Torch REQUIRED)
if (POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif ()
# Global CMake variables
# ----------------------
@@ -31,83 +33,76 @@ endif()
# Options
# -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy" OFF)
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
option(INSTALL_GTEST "Enable installation of googletest" OFF)
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
option(INSTALL_GTEST "Enable installation of googletest." OFF)
add_subdirectory(config)
# CMakes modules
# --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Debug mode")
set(ENABLE_TESTING ON)
set(CODE_COVERAGE ON)
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
message(STATUS "Languages=${LANGUAGES}")
if (CODE_COVERAGE)
enable_testing()
include(CodeCoverage)
MESSAGE(STATUS "Code coverage enabled")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE)
if (ENABLE_CLANG_TIDY)
include(StaticAnalyzers) # clang-tidy
include(StaticAnalyzers) # clang-tidy
endif (ENABLE_CLANG_TIDY)
# Add the library
# ---------------
include_directories(
${bayesnet_SOURCE_DIR}
${CMAKE_BINARY_DIR}/configured_files/include
)
# External libraries - dependencies of BayesNet
# ---------------------------------------------
file(GLOB_RECURSE Sources "bayesnet/*.cc")
find_package(Torch CONFIG REQUIRED)
find_package(fimdlp CONFIG REQUIRED)
find_package(nlohmann_json CONFIG REQUIRED)
find_package(folding CONFIG REQUIRED)
add_library(bayesnet ${Sources})
target_link_libraries(bayesnet fimdlp::fimdlp folding::folding "${TORCH_LIBRARIES}")
# Subdirectories
# --------------
add_subdirectory(config)
add_subdirectory(bayesnet)
# Testing
# -------
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Debug mode")
set(ENABLE_TESTING ON)
set(CODE_COVERAGE ON)
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
if (ENABLE_TESTING)
MESSAGE(STATUS "Testing enabled")
find_package(Catch2 CONFIG REQUIRED)
find_package(arff-files CONFIG REQUIRED)
enable_testing()
include(CTest)
add_subdirectory(tests)
else(ENABLE_TESTING)
message("Release mode")
MESSAGE(STATUS "Testing enabled")
find_package(Catch2 CONFIG REQUIRED)
include(CTest)
add_subdirectory(tests)
endif (ENABLE_TESTING)
# Installation
# ------------
include(CMakePackageConfigHelpers)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/bayesnetConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion
)
configure_package_config_file(
${CMAKE_CURRENT_SOURCE_DIR}/bayesnetConfig.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/bayesnetConfig.cmake"
INSTALL_DESTINATION share/bayesnet)
install(TARGETS bayesnet
EXPORT bayesnetTargets
install(TARGETS BayesNet
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
CONFIGURATIONS Release)
install(DIRECTORY bayesnet/ DESTINATION include/bayesnet FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h")
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h DESTINATION include/bayesnet CONFIGURATIONS Release)
install(DIRECTORY bayesnet/
DESTINATION include/bayesnet
FILES_MATCHING
CONFIGURATIONS Release
PATTERN "*.h")
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h
DESTINATION include/bayesnet
CONFIGURATIONS Release)
install(EXPORT bayesnetTargets
FILE bayesnetTargets.cmake
NAMESPACE bayesnet::
DESTINATION share/bayesnet)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/bayesnetConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/bayesnetConfigVersion.cmake"
DESTINATION share/bayesnet
)
# Documentation
# -------------
find_package(Doxygen)
if (Doxygen_FOUND)
set(DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs)
set(doxyfile_in ${DOC_DIR}/Doxyfile.in)
set(doxyfile ${DOC_DIR}/Doxyfile)
configure_file(${doxyfile_in} ${doxyfile} @ONLY)
doxygen_add_docs(doxygen
WORKING_DIRECTORY ${DOC_DIR}
CONFIG_FILE ${doxyfile})
else (Doxygen_FOUND)
MESSAGE("* Doxygen not found")
endif (Doxygen_FOUND)

View File

@@ -5,7 +5,7 @@ SHELL := /bin/bash
f_release = build_Release
f_debug = build_Debug
f_diagrams = diagrams
app_targets = bayesnet
app_targets = BayesNet
test_targets = TestBayesNet
clang-uml = clang-uml
plantuml = plantuml
@@ -86,13 +86,10 @@ init: ## Initialize the project installing dependencies
clean: ## Clean the project
@echo ">>> Cleaning the project..."
@if test -d build_Debug ; then echo "- Deleting build_Debug folder" ; rm -rf build_Debug; fi
@if test -d build_Release ; then echo "- Deleting build_Release folder" ; rm -rf build_Release; fi
@if test -f CMakeCache.txt ; then echo "- Deleting CMakeCache.txt"; rm -f CMakeCache.txt; fi
@for folder in $(f_release) $(f_debug) vpcpkg_installed install_test ; do \
if test -d "$$folder" ; then \
echo "- Deleting $$folder folder" ; \
rm -rf "$$folder"; \
fi; \
done
@if test -d vcpkg_installed ; then echo "- Deleting vcpkg_installed folder" ; rm -rf vcpkg_installed; fi
@$(MAKE) clean-test
@echo ">>> Done";

View File

@@ -1,84 +1,141 @@
// ***************************************************************
// **
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
// **
#include <limits>
#include "bayesnet/utils/bayesnetUtils.h"
#include "FeatureSelect.h"
namespace bayesnet {
FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
namespace bayesnet {
using namespace torch::indexing; // for Ellipsis constant
//---------------------------------------------------------------------
// ctor
//---------------------------------------------------------------------
FeatureSelect::FeatureSelect(const torch::Tensor& samples,
const std::vector<std::string>& features,
const std::string& className,
int maxFeatures,
int classNumStates,
const torch::Tensor& weights)
: Metrics(samples, features, className, classNumStates),
maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures),
weights(weights)
{
}
//---------------------------------------------------------------------
// public helpers
//---------------------------------------------------------------------
void FeatureSelect::initialize()
{
selectedFeatures.clear();
selectedScores.clear();
suLabels.clear();
suFeatures.clear();
fitted = false;
}
//---------------------------------------------------------------------
// Symmetrical Uncertainty (SU)
//---------------------------------------------------------------------
double FeatureSelect::symmetricalUncertainty(int a, int b)
{
/*
Compute symmetrical uncertainty. Normalize* information gain (mutual
information) with the entropies of the features in order to compensate
the bias due to high cardinality features. *Range [0, 1]
(https://www.sciencedirect.com/science/article/pii/S0020025519303603)
*/
auto x = samples.index({ a, "..." });
auto y = samples.index({ b, "..." });
auto mu = mutualInformation(x, y, weights);
auto hx = entropy(x, weights);
auto hy = entropy(y, weights);
return 2.0 * mu / (hx + hy);
* Compute symmetrical uncertainty. Normalises the information gain
* (mutual information) with the entropies of the variables to compensate
* the bias due to highcardinality features. Range: [0, 1]
* See: https://www.sciencedirect.com/science/article/pii/S0020025519303603
*/
auto x = samples.index({ a, Ellipsis }); // row a => feature a
auto y = (b >= 0) ? samples.index({ b, Ellipsis }) // row b (>=0) => feature b
: samples.index({ -1, Ellipsis }); // 1 treated as last row = labels
double mu = mutualInformation(x, y, weights);
double hx = entropy(x, weights);
double hy = entropy(y, weights);
const double denom = hx + hy;
if (denom == 0.0) return 0.0; // perfectly pure variables
return 2.0 * mu / denom;
}
//---------------------------------------------------------------------
// SU featureclass
//---------------------------------------------------------------------
void FeatureSelect::computeSuLabels()
{
// Compute Simmetrical Uncertainty between features and labels
// Compute Symmetrical Uncertainty between each feature and the class labels
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
for (int i = 0; i < features.size(); ++i) {
suLabels.push_back(symmetricalUncertainty(i, -1));
const int classIdx = static_cast<int>(samples.size(0)) - 1; // labels in last row
suLabels.reserve(features.size());
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
suLabels.emplace_back(symmetricalUncertainty(i, classIdx));
}
}
double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature)
//---------------------------------------------------------------------
// SU featurefeature with cache
//---------------------------------------------------------------------
double FeatureSelect::computeSuFeatures(int firstFeature, int secondFeature)
{
// Compute Simmetrical Uncertainty between features
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
try {
return suFeatures.at({ firstFeature, secondFeature });
}
catch (const std::out_of_range& e) {
double result = symmetricalUncertainty(firstFeature, secondFeature);
suFeatures[{firstFeature, secondFeature}] = result;
return result;
}
// Order the pair to exploit symmetry => only one entry in the map
auto ordered = std::minmax(firstFeature, secondFeature);
const std::pair<int, int> key{ ordered.first, ordered.second };
auto it = suFeatures.find(key);
if (it != suFeatures.end()) return it->second;
double result = symmetricalUncertainty(key.first, key.second);
suFeatures[key] = result; // store once (symmetry handled by ordering)
return result;
}
//---------------------------------------------------------------------
// Correlationbased Feature Selection (CFS) merit
//---------------------------------------------------------------------
double FeatureSelect::computeMeritCFS()
{
double rcf = 0;
for (auto feature : selectedFeatures) {
rcf += suLabels[feature];
}
double rff = 0;
int n = selectedFeatures.size();
for (const auto& item : doCombinations(selectedFeatures)) {
rff += computeSuFeatures(item.first, item.second);
}
return rcf / sqrt(n + (n * n - n) * rff);
const int n = static_cast<int>(selectedFeatures.size());
if (n == 0) return 0.0;
// average r_cf (featureclass)
double rcf_sum = 0.0;
for (int f : selectedFeatures) rcf_sum += suLabels[f];
const double rcf_avg = rcf_sum / n;
// average r_ff (featurefeature)
double rff_sum = 0.0;
const auto& pairs = doCombinations(selectedFeatures); // generates each unordered pair once
for (const auto& p : pairs) rff_sum += computeSuFeatures(p.first, p.second);
const double numPairs = n * (n - 1) * 0.5;
const double rff_avg = (numPairs > 0) ? rff_sum / numPairs : 0.0;
// Merit_S = k * r_cf / sqrt( k + k*(k1) * r_ff ) (Hall, 1999)
const double k = static_cast<double>(n);
return (k * rcf_avg) / std::sqrt(k + k * (k - 1) * rff_avg);
}
//---------------------------------------------------------------------
// getters
//---------------------------------------------------------------------
std::vector<int> FeatureSelect::getFeatures() const
{
if (!fitted) {
throw std::runtime_error("FeatureSelect not fitted");
}
if (!fitted) throw std::runtime_error("FeatureSelect not fitted");
return selectedFeatures;
}
std::vector<double> FeatureSelect::getScores() const
{
if (!fitted) {
throw std::runtime_error("FeatureSelect not fitted");
}
if (!fitted) throw std::runtime_error("FeatureSelect not fitted");
return selectedScores;
}
}
} // namespace bayesnet

View File

@@ -1,4 +0,0 @@
@PACKAGE_INIT@
include("${CMAKE_CURRENT_LIST_DIR}/bayesnetTargets.cmake")

View File

@@ -11,4 +11,4 @@ static constexpr std::string_view project_name = "@PROJECT_NAME@";
static constexpr std::string_view project_version = "@PROJECT_VERSION@";
static constexpr std::string_view project_description = "@PROJECT_DESCRIPTION@";
static constexpr std::string_view git_sha = "@GIT_SHA@";
static constexpr std::string_view data_path = "@bayesnet_SOURCE_DIR@/tests/data/";
static constexpr std::string_view data_path = "@BayesNet_SOURCE_DIR@/tests/data/";

1
lib/catch2 Submodule

Submodule lib/catch2 added at 029fe3b460

1
lib/folding Submodule

Submodule lib/folding added at 2ac43e32ac

1
lib/json Submodule

Submodule lib/json added at 620034ecec

1
lib/mdlp Submodule

Submodule lib/mdlp added at 7d62d6af4a

View File

@@ -1,16 +1,15 @@
cmake_minimum_required(VERSION 3.20)
project(bayesnet_sample VERSION 0.1.0 LANGUAGES CXX)
project(bayesnet_sample)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)
find_package(Torch CONFIG REQUIRED)
find_package(bayesnet CONFIG REQUIRED)
find_package(fimdlp CONFIG REQUIRED)
find_package(folding CONFIG REQUIRED)
find_package(arff-files CONFIG REQUIRED)
find_package(bayesnet CONFIG REQUIRED)
find_package(nlohman_json CONFIG REQUIRED)
add_executable(bayesnet_sample sample.cc)
target_link_libraries(bayesnet_sample PRIVATE
@@ -18,5 +17,6 @@ target_link_libraries(bayesnet_sample PRIVATE
arff-files::arff-files
"${TORCH_LIBRARIES}"
bayesnet::bayesnet
nlohmann_json::nlohmann_json
folding::folding
)

View File

@@ -1,21 +1,21 @@
{
"default-registry": {
"kind": "git",
"baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605",
"repository": "https://github.com/microsoft/vcpkg"
},
"registries": [
{
"kind": "git",
"repository": "https://github.com/rmontanana/vcpkg-stash",
"baseline": "1ea69243c0e8b0de77c9d1dd6e1d7593ae7f3627",
"baseline": "393efa4e74e053b6f02c4ab03738c8fe796b28e5",
"packages": [
"arff-files",
"bayesnet",
"fimdlp",
"folding",
"bayesnet",
"arff-files",
"fimdlp",
"libtorch-bin"
]
}
]
],
"default-registry": {
"kind": "git",
"repository": "https://github.com/microsoft/vcpkg",
"baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605"
}
}

View File

@@ -2,32 +2,11 @@
"name": "sample-project",
"version-string": "0.1.0",
"dependencies": [
"bayesnet",
"folding",
"arff-files",
"fimdlp",
"libtorch-bin",
"folding",
"bayesnet"
],
"overrides": [
{
"name": "arff-files",
"version": "1.1.0"
},
{
"name": "fimdlp",
"version": "2.0.1"
},
{
"name": "libtorch-bin",
"version": "2.7.0"
},
{
"name": "bayesnet",
"version": "1.1.1"
},
{
"name": "folding",
"version": "1.1.1"
}
"nlohmann-json",
"libtorch-bin"
]
}

View File

@@ -1,13 +1,18 @@
if(ENABLE_TESTING)
include_directories(
${BayesNet_SOURCE_DIR}/tests/lib/Files
${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/mdlp/src
${BayesNet_SOURCE_DIR}/lib/log
${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR}
${CMAKE_BINARY_DIR}/configured_files/include
)
file(GLOB_RECURSE BayesNet_SOURCES "${bayesnet_SOURCE_DIR}/bayesnet/*.cc")
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestXSPnDE.cc TestXBA2DE.cc
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestXBAODE.cc TestA2DE.cc
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc TestXSPODE.cc ${BayesNet_SOURCES})
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp::fimdlp PRIVATE Catch2::Catch2WithMain)
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp PRIVATE Catch2::Catch2WithMain)
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")

View File

@@ -20,7 +20,7 @@
#include "bayesnet/ensembles/AODELd.h"
#include "bayesnet/ensembles/BoostAODE.h"
const std::string ACTUAL_VERSION = "1.1.1";
const std::string ACTUAL_VERSION = "1.1.0";
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{

1
tests/lib/Files Submodule

Submodule tests/lib/Files added at a4329f5f9d

1
tests/lib/catch2 Submodule

Submodule tests/lib/catch2 added at 506276c592

View File

@@ -8,7 +8,7 @@
{
"kind": "git",
"repository": "https://github.com/rmontanana/vcpkg-stash",
"baseline": "1ea69243c0e8b0de77c9d1dd6e1d7593ae7f3627",
"baseline": "393efa4e74e053b6f02c4ab03738c8fe796b28e5",
"packages": [
"arff-files",
"fimdlp",