Compare commits
8 Commits
FixSelectF
...
da357ac5ba
Author | SHA1 | Date | |
---|---|---|---|
da357ac5ba
|
|||
833455803e
|
|||
74a9d29dc1 | |||
36ce6effe9
|
|||
250036f224
|
|||
b11620bbe8
|
|||
8a02a3a5cb
|
|||
7f6f49b3d0
|
13
CHANGELOG.md
13
CHANGELOG.md
@@ -7,11 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [1.1.1] - 2025-05-20
|
||||
|
||||
### Internal
|
||||
|
||||
- Fix CFS metric expression in the FeatureSelection class.
|
||||
- Fix the vcpkg configuration in building the library.
|
||||
- Fix the sample app to use the vcpkg configuration.
|
||||
- Add predict_proba method to all Ld classifiers.
|
||||
- Refactor the computeCPT method in the Node class with libtorch vectorized operations.
|
||||
- Refactor the sample to use local discretization models.
|
||||
|
||||
## [1.1.0] - 2025-04-27
|
||||
|
||||
### Internal
|
||||
|
||||
- Add changes to .clang-format to ajust to vscode format style thanks to <https://clang-format-configurator.site/>
|
||||
- Add changes to .clang-format to adjust to vscode format style thanks to <https://clang-format-configurator.site/>
|
||||
- Remove all the dependencies as git submodules and add them as vcpkg dependencies.
|
||||
- Fix the dependencies versions for this specific BayesNet version.
|
||||
|
||||
|
135
CMakeLists.txt
135
CMakeLists.txt
@@ -1,21 +1,19 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
cmake_minimum_required(VERSION 3.27)
|
||||
|
||||
project(BayesNet
|
||||
VERSION 1.1.0
|
||||
project(bayesnet
|
||||
VERSION 1.1.1
|
||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||
LANGUAGES CXX
|
||||
)
|
||||
|
||||
if (CODE_COVERAGE AND NOT ENABLE_TESTING)
|
||||
MESSAGE(FATAL_ERROR "Code coverage requires testing enabled")
|
||||
endif (CODE_COVERAGE AND NOT ENABLE_TESTING)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
if (POLICY CMP0135)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
endif ()
|
||||
find_package(Torch CONFIG REQUIRED)
|
||||
find_package(fimdlp CONFIG REQUIRED)
|
||||
find_package(nlohmann_json CONFIG REQUIRED)
|
||||
find_package(folding CONFIG REQUIRED)
|
||||
|
||||
# Global CMake variables
|
||||
# ----------------------
|
||||
@@ -33,76 +31,83 @@ endif()
|
||||
|
||||
# Options
|
||||
# -------
|
||||
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
||||
option(ENABLE_TESTING "Unit testing build" OFF)
|
||||
option(CODE_COVERAGE "Collect coverage from test library" OFF)
|
||||
option(INSTALL_GTEST "Enable installation of googletest." OFF)
|
||||
option(ENABLE_CLANG_TIDY "Enable to add clang tidy" OFF)
|
||||
option(ENABLE_TESTING "Unit testing build" OFF)
|
||||
option(CODE_COVERAGE "Collect coverage from test library" OFF)
|
||||
option(INSTALL_GTEST "Enable installation of googletest" OFF)
|
||||
|
||||
# CMakes modules
|
||||
# --------------
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
MESSAGE("Debug mode")
|
||||
set(ENABLE_TESTING ON)
|
||||
set(CODE_COVERAGE ON)
|
||||
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
|
||||
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
message(STATUS "Languages=${LANGUAGES}")
|
||||
if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE(STATUS "Code coverage enabled")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
add_subdirectory(config)
|
||||
|
||||
if (ENABLE_CLANG_TIDY)
|
||||
include(StaticAnalyzers) # clang-tidy
|
||||
include(StaticAnalyzers) # clang-tidy
|
||||
endif (ENABLE_CLANG_TIDY)
|
||||
|
||||
# External libraries - dependencies of BayesNet
|
||||
# ---------------------------------------------
|
||||
# Add the library
|
||||
# ---------------
|
||||
include_directories(
|
||||
${bayesnet_SOURCE_DIR}
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
|
||||
find_package(Torch CONFIG REQUIRED)
|
||||
find_package(fimdlp CONFIG REQUIRED)
|
||||
find_package(nlohmann_json CONFIG REQUIRED)
|
||||
find_package(folding CONFIG REQUIRED)
|
||||
file(GLOB_RECURSE Sources "bayesnet/*.cc")
|
||||
|
||||
# Subdirectories
|
||||
# --------------
|
||||
add_subdirectory(config)
|
||||
add_subdirectory(bayesnet)
|
||||
add_library(bayesnet ${Sources})
|
||||
target_link_libraries(bayesnet fimdlp::fimdlp folding::folding "${TORCH_LIBRARIES}")
|
||||
|
||||
# Testing
|
||||
# -------
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
MESSAGE("Debug mode")
|
||||
set(ENABLE_TESTING ON)
|
||||
set(CODE_COVERAGE ON)
|
||||
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
if (ENABLE_TESTING)
|
||||
MESSAGE(STATUS "Testing enabled")
|
||||
find_package(Catch2 CONFIG REQUIRED)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
MESSAGE(STATUS "Testing enabled")
|
||||
find_package(Catch2 CONFIG REQUIRED)
|
||||
find_package(arff-files CONFIG REQUIRED)
|
||||
enable_testing()
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
else(ENABLE_TESTING)
|
||||
message("Release mode")
|
||||
endif (ENABLE_TESTING)
|
||||
|
||||
# Installation
|
||||
# ------------
|
||||
install(TARGETS BayesNet
|
||||
include(CMakePackageConfigHelpers)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/bayesnetConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
)
|
||||
|
||||
configure_package_config_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bayesnetConfig.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/bayesnetConfig.cmake"
|
||||
INSTALL_DESTINATION share/bayesnet)
|
||||
|
||||
install(TARGETS bayesnet
|
||||
EXPORT bayesnetTargets
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib
|
||||
CONFIGURATIONS Release)
|
||||
install(DIRECTORY bayesnet/ DESTINATION include/bayesnet FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h")
|
||||
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h DESTINATION include/bayesnet CONFIGURATIONS Release)
|
||||
|
||||
# Documentation
|
||||
# -------------
|
||||
find_package(Doxygen)
|
||||
if (Doxygen_FOUND)
|
||||
set(DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs)
|
||||
set(doxyfile_in ${DOC_DIR}/Doxyfile.in)
|
||||
set(doxyfile ${DOC_DIR}/Doxyfile)
|
||||
configure_file(${doxyfile_in} ${doxyfile} @ONLY)
|
||||
doxygen_add_docs(doxygen
|
||||
WORKING_DIRECTORY ${DOC_DIR}
|
||||
CONFIG_FILE ${doxyfile})
|
||||
else (Doxygen_FOUND)
|
||||
MESSAGE("* Doxygen not found")
|
||||
endif (Doxygen_FOUND)
|
||||
install(DIRECTORY bayesnet/
|
||||
DESTINATION include/bayesnet
|
||||
FILES_MATCHING
|
||||
CONFIGURATIONS Release
|
||||
PATTERN "*.h")
|
||||
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h
|
||||
DESTINATION include/bayesnet
|
||||
CONFIGURATIONS Release)
|
||||
|
||||
install(EXPORT bayesnetTargets
|
||||
FILE bayesnetTargets.cmake
|
||||
NAMESPACE bayesnet::
|
||||
DESTINATION share/bayesnet)
|
||||
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/bayesnetConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/bayesnetConfigVersion.cmake"
|
||||
DESTINATION share/bayesnet
|
||||
)
|
||||
|
14
Makefile
14
Makefile
@@ -5,7 +5,7 @@ SHELL := /bin/bash
|
||||
f_release = build_Release
|
||||
f_debug = build_Debug
|
||||
f_diagrams = diagrams
|
||||
app_targets = BayesNet
|
||||
app_targets = bayesnet
|
||||
test_targets = TestBayesNet
|
||||
clang-uml = clang-uml
|
||||
plantuml = plantuml
|
||||
@@ -86,10 +86,13 @@ init: ## Initialize the project installing dependencies
|
||||
|
||||
clean: ## Clean the project
|
||||
@echo ">>> Cleaning the project..."
|
||||
@if test -d build_Debug ; then echo "- Deleting build_Debug folder" ; rm -rf build_Debug; fi
|
||||
@if test -d build_Release ; then echo "- Deleting build_Release folder" ; rm -rf build_Release; fi
|
||||
@if test -f CMakeCache.txt ; then echo "- Deleting CMakeCache.txt"; rm -f CMakeCache.txt; fi
|
||||
@if test -d vcpkg_installed ; then echo "- Deleting vcpkg_installed folder" ; rm -rf vcpkg_installed; fi
|
||||
@for folder in $(f_release) $(f_debug) vpcpkg_installed install_test ; do \
|
||||
if test -d "$$folder" ; then \
|
||||
echo "- Deleting $$folder folder" ; \
|
||||
rm -rf "$$folder"; \
|
||||
fi; \
|
||||
done
|
||||
@$(MAKE) clean-test
|
||||
@echo ">>> Done";
|
||||
|
||||
@@ -108,12 +111,13 @@ release: ## Build a Release version of the project
|
||||
@echo ">>> Done";
|
||||
|
||||
fname = "tests/data/iris.arff"
|
||||
model = "TANLd"
|
||||
sample: ## Build sample
|
||||
@echo ">>> Building Sample...";
|
||||
@if [ -d ./sample/build ]; then rm -rf ./sample/build; fi
|
||||
@cd sample && cmake -B build -S . -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake && \
|
||||
cmake --build build -t bayesnet_sample
|
||||
sample/build/bayesnet_sample $(fname)
|
||||
sample/build/bayesnet_sample $(fname) $(model)
|
||||
@echo ">>> Done";
|
||||
|
||||
fname = "tests/data/iris.arff"
|
||||
|
@@ -6,6 +6,7 @@
|
||||
[](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
[](https://deepwiki.com/Doctorado-ML/BayesNet)
|
||||

|
||||
[](https://gitea.rmontanana.es/rmontanana/BayesNet)
|
||||
[](https://doi.org/10.5281/zenodo.14210344)
|
||||
|
@@ -28,6 +28,11 @@ namespace bayesnet {
|
||||
auto Xt = prepareX(X);
|
||||
return KDB::predict(Xt);
|
||||
}
|
||||
torch::Tensor KDBLd::predict_proba(torch::Tensor& X)
|
||||
{
|
||||
auto Xt = prepareX(X);
|
||||
return KDB::predict_proba(Xt);
|
||||
}
|
||||
std::vector<std::string> KDBLd::graph(const std::string& name) const
|
||||
{
|
||||
return KDB::graph(name);
|
||||
|
@@ -18,6 +18,7 @@ namespace bayesnet {
|
||||
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
};
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ namespace bayesnet {
|
||||
Proposal::~Proposal()
|
||||
{
|
||||
for (auto& [key, value] : discretizers) {
|
||||
delete value;
|
||||
delete value;
|
||||
}
|
||||
}
|
||||
void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
|
||||
@@ -23,6 +23,7 @@ namespace bayesnet {
|
||||
throw std::invalid_argument("y must be an integer tensor");
|
||||
}
|
||||
}
|
||||
// Fit method for single classifier
|
||||
map<std::string, std::vector<int>> Proposal::localDiscretizationProposal(const map<std::string, std::vector<int>>& oldStates, Network& model)
|
||||
{
|
||||
// order of local discretization is important. no good 0, 1, 2...
|
||||
|
@@ -43,6 +43,11 @@ namespace bayesnet {
|
||||
auto Xt = prepareX(X);
|
||||
return SPODE::predict(Xt);
|
||||
}
|
||||
torch::Tensor SPODELd::predict_proba(torch::Tensor& X)
|
||||
{
|
||||
auto Xt = prepareX(X);
|
||||
return SPODE::predict_proba(Xt);
|
||||
}
|
||||
std::vector<std::string> SPODELd::graph(const std::string& name) const
|
||||
{
|
||||
return SPODE::graph(name);
|
||||
|
@@ -19,6 +19,7 @@ namespace bayesnet {
|
||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
};
|
||||
}
|
||||
|
@@ -29,6 +29,11 @@ namespace bayesnet {
|
||||
auto Xt = prepareX(X);
|
||||
return TAN::predict(Xt);
|
||||
}
|
||||
torch::Tensor TANLd::predict_proba(torch::Tensor& X)
|
||||
{
|
||||
auto Xt = prepareX(X);
|
||||
return TAN::predict_proba(Xt);
|
||||
}
|
||||
std::vector<std::string> TANLd::graph(const std::string& name) const
|
||||
{
|
||||
return TAN::graph(name);
|
||||
|
@@ -18,6 +18,7 @@ namespace bayesnet {
|
||||
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "TANLd") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||
};
|
||||
}
|
||||
#endif // !TANLD_H
|
@@ -5,6 +5,7 @@
|
||||
// ***************************************************************
|
||||
|
||||
#include "Node.h"
|
||||
#include <iterator>
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
@@ -94,39 +95,51 @@ namespace bayesnet {
|
||||
{
|
||||
dimensions.clear();
|
||||
dimensions.reserve(parents.size() + 1);
|
||||
// Get dimensions of the CPT
|
||||
dimensions.push_back(numStates);
|
||||
for (const auto& parent : parents) {
|
||||
dimensions.push_back(parent->getNumStates());
|
||||
}
|
||||
//transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
|
||||
// Create a tensor initialized with smoothing
|
||||
cpTable = torch::full(dimensions, smoothing, torch::kDouble);
|
||||
// Create a map for quick feature index lookup
|
||||
|
||||
// Build feature index map
|
||||
std::unordered_map<std::string, int> featureIndexMap;
|
||||
for (size_t i = 0; i < features.size(); ++i) {
|
||||
featureIndexMap[features[i]] = i;
|
||||
}
|
||||
// Fill table with counts
|
||||
// Get the index of this node's feature
|
||||
int name_index = featureIndexMap[name];
|
||||
// Get parent indices in dataset
|
||||
std::vector<int> parent_indices;
|
||||
parent_indices.reserve(parents.size());
|
||||
|
||||
// Gather indices for node and parents
|
||||
std::vector<int64_t> all_indices;
|
||||
all_indices.push_back(featureIndexMap[name]);
|
||||
for (const auto& parent : parents) {
|
||||
parent_indices.push_back(featureIndexMap[parent->getName()]);
|
||||
all_indices.push_back(featureIndexMap[parent->getName()]);
|
||||
}
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
|
||||
coordinates.clear();
|
||||
auto sample = dataset.index({ "...", n_sample });
|
||||
coordinates.push_back(sample[name_index]);
|
||||
for (size_t i = 0; i < parent_indices.size(); ++i) {
|
||||
coordinates.push_back(sample[parent_indices[i]]);
|
||||
|
||||
// Extract relevant columns: shape (num_features, num_samples)
|
||||
auto indices_tensor = dataset.index_select(0, torch::tensor(all_indices, torch::kLong));
|
||||
indices_tensor = indices_tensor.transpose(0, 1).to(torch::kLong); // (num_samples, num_features)
|
||||
|
||||
// Manual flattening of indices
|
||||
std::vector<int64_t> strides(all_indices.size(), 1);
|
||||
for (int i = strides.size() - 2; i >= 0; --i) {
|
||||
strides[i] = strides[i + 1] * cpTable.size(i + 1);
|
||||
}
|
||||
auto indices_tensor_cpu = indices_tensor.cpu();
|
||||
auto indices_accessor = indices_tensor_cpu.accessor<int64_t, 2>();
|
||||
std::vector<int64_t> flat_indices(indices_tensor.size(0));
|
||||
for (int64_t i = 0; i < indices_tensor.size(0); ++i) {
|
||||
int64_t idx = 0;
|
||||
for (size_t j = 0; j < strides.size(); ++j) {
|
||||
idx += indices_accessor[i][j] * strides[j];
|
||||
}
|
||||
// Increment the count of the corresponding coordinate
|
||||
cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
|
||||
flat_indices[i] = idx;
|
||||
}
|
||||
|
||||
// Accumulate weights into flat CPT
|
||||
auto flat_cpt = cpTable.flatten();
|
||||
auto flat_indices_tensor = torch::from_blob(flat_indices.data(), { (int64_t)flat_indices.size() }, torch::kLong).clone();
|
||||
flat_cpt.index_add_(0, flat_indices_tensor, weights.cpu());
|
||||
cpTable = flat_cpt.view(cpTable.sizes());
|
||||
|
||||
// Normalize the counts (dividing each row by the sum of the row)
|
||||
cpTable /= cpTable.sum(0, true);
|
||||
}
|
||||
|
4
bayesnetConfig.cmake.in
Normal file
4
bayesnetConfig.cmake.in
Normal file
@@ -0,0 +1,4 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/bayesnetTargets.cmake")
|
||||
|
@@ -11,4 +11,4 @@ static constexpr std::string_view project_name = "@PROJECT_NAME@";
|
||||
static constexpr std::string_view project_version = "@PROJECT_VERSION@";
|
||||
static constexpr std::string_view project_description = "@PROJECT_DESCRIPTION@";
|
||||
static constexpr std::string_view git_sha = "@GIT_SHA@";
|
||||
static constexpr std::string_view data_path = "@BayesNet_SOURCE_DIR@/tests/data/";
|
||||
static constexpr std::string_view data_path = "@bayesnet_SOURCE_DIR@/tests/data/";
|
Submodule lib/catch2 deleted from 029fe3b460
Submodule lib/folding deleted from 2ac43e32ac
1
lib/json
1
lib/json
Submodule lib/json deleted from 620034ecec
1
lib/mdlp
1
lib/mdlp
Submodule lib/mdlp deleted from 7d62d6af4a
@@ -1,22 +1,40 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(bayesnet_sample)
|
||||
project(bayesnet_sample VERSION 0.1.0 LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
|
||||
find_package(Torch CONFIG REQUIRED)
|
||||
find_package(bayesnet CONFIG REQUIRED)
|
||||
find_package(fimdlp CONFIG REQUIRED)
|
||||
find_package(folding CONFIG REQUIRED)
|
||||
find_package(arff-files CONFIG REQUIRED)
|
||||
find_package(nlohman_json CONFIG REQUIRED)
|
||||
find_package(nlohmann_json CONFIG REQUIRED)
|
||||
|
||||
option(BAYESNET_VCPKG_CONFIG "Use vcpkg config for BayesNet" ON)
|
||||
|
||||
if (BAYESNET_VCPKG_CONFIG)
|
||||
message(STATUS "Using BayesNet vcpkg config")
|
||||
find_package(bayesnet CONFIG REQUIRED)
|
||||
set(BayesNet_LIBRARIES bayesnet::bayesnet)
|
||||
else(BAYESNET_VCPKG_CONFIG)
|
||||
message(STATUS "Using BayesNet local library config")
|
||||
find_library(bayesnet NAMES libbayesnet bayesnet libbayesnet.a PATHS ${Platform_SOURCE_DIR}/../lib/lib REQUIRED)
|
||||
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ${Platform_SOURCE_DIR}/../lib/include)
|
||||
add_library(bayesnet::bayesnet UNKNOWN IMPORTED)
|
||||
set_target_properties(bayesnet::bayesnet PROPERTIES
|
||||
IMPORTED_LOCATION ${bayesnet}
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${Bayesnet_INCLUDE_DIRS}
|
||||
)
|
||||
endif(BAYESNET_VCPKG_CONFIG)
|
||||
message(STATUS "BayesNet: ${bayesnet}")
|
||||
|
||||
add_executable(bayesnet_sample sample.cc)
|
||||
target_link_libraries(bayesnet_sample PRIVATE
|
||||
fimdlp::fimdlp
|
||||
arff-files::arff-files
|
||||
"${TORCH_LIBRARIES}"
|
||||
bayesnet::bayesnet
|
||||
nlohmann_json::nlohmann_json
|
||||
bayesnet::bayesnet
|
||||
folding::folding
|
||||
)
|
||||
|
123
sample/sample.cc
123
sample/sample.cc
@@ -4,9 +4,22 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <ArffFiles/ArffFiles.hpp>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
#include <bayesnet/ensembles/XBAODE.h>
|
||||
#include <bayesnet/classifiers/TANLd.h>
|
||||
#include <bayesnet/classifiers/KDBLd.h>
|
||||
#include <bayesnet/ensembles/AODELd.h>
|
||||
|
||||
torch::Tensor matrix2tensor(const std::vector<std::vector<float>>& matrix)
|
||||
{
|
||||
auto tensor = torch::empty({ static_cast<int>(matrix.size()), static_cast<int>(matrix[0].size()) }, torch::kFloat32);
|
||||
for (int i = 0; i < matrix.size(); ++i) {
|
||||
tensor.index_put_({ i, "..." }, torch::tensor(matrix[i], torch::kFloat32));
|
||||
}
|
||||
return tensor;
|
||||
}
|
||||
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
{
|
||||
@@ -19,63 +32,89 @@ std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, m
|
||||
}
|
||||
return Xd;
|
||||
}
|
||||
tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last)
|
||||
std::tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string> loadArff(const std::string& name, bool class_last)
|
||||
{
|
||||
auto handler = ArffFiles();
|
||||
handler.load(name, class_last);
|
||||
// Get Dataset X, y
|
||||
std::vector<mdlp::samples_t>& X = handler.getX();
|
||||
mdlp::labels_t& y = handler.getY();
|
||||
// Get className & Features
|
||||
auto className = handler.getClassName();
|
||||
std::vector<mdlp::samples_t> X = handler.getX();
|
||||
mdlp::labels_t y = handler.getY();
|
||||
std::vector<std::string> features;
|
||||
auto attributes = handler.getAttributes();
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
|
||||
torch::Tensor Xd;
|
||||
auto states = map<std::string, std::vector<int>>();
|
||||
auto Xr = discretizeDataset(X, y);
|
||||
Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
||||
auto item = states.at(features[i]);
|
||||
iota(begin(item), end(item), 0);
|
||||
Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
|
||||
}
|
||||
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
|
||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
|
||||
auto Xt = matrix2tensor(X);
|
||||
auto yt = torch::tensor(y, torch::kInt32);
|
||||
return { Xt, yt, features, handler.getClassName() };
|
||||
}
|
||||
// tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last)
|
||||
// {
|
||||
// auto [X, y, features, className] = loadArff(name, class_last);
|
||||
// // Discretize the dataset
|
||||
// torch::Tensor Xd;
|
||||
// auto states = map<std::string, std::vector<int>>();
|
||||
// // Fill the class states
|
||||
// states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
|
||||
// iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
// auto Xr = discretizeDataset(X, y);
|
||||
// Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
|
||||
// for (int i = 0; i < features.size(); ++i) {
|
||||
// states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
||||
// auto item = states.at(features[i]);
|
||||
// iota(begin(item), end(item), 0);
|
||||
// Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
|
||||
// }
|
||||
// auto yt = torch::tensor(y, torch::kInt32);
|
||||
// return { Xd, yt, features, className, states };
|
||||
// }
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
std::cerr << "Usage: " << argv[0] << " <file_name>" << std::endl;
|
||||
if (argc < 3) {
|
||||
std::cerr << "Usage: " << argv[0] << " <arff_file_name> <model>" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::string file_name = argv[1];
|
||||
torch::Tensor X, y;
|
||||
std::vector<std::string> features;
|
||||
std::string className;
|
||||
map<std::string, std::vector<int>> states;
|
||||
auto clf = bayesnet::XBAODE(); // false for not using voting in predict
|
||||
std::cout << "Library version: " << clf.getVersion() << std::endl;
|
||||
tie(X, y, features, className, states) = loadDataset(file_name, true);
|
||||
torch::Tensor weights = torch::full({ X.size(1) }, 15, torch::kDouble);
|
||||
torch::Tensor dataset;
|
||||
try {
|
||||
auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
|
||||
dataset = torch::cat({ X, yresized }, 0);
|
||||
std::string model_name = argv[2];
|
||||
std::map<std::string, bayesnet::Classifier*> models{ {"TANLd", new bayesnet::TANLd()}, {"KDBLd", new bayesnet::KDBLd(2)}, {"AODELd", new bayesnet::AODELd() }
|
||||
};
|
||||
if (models.find(model_name) == models.end()) {
|
||||
std::cerr << "Model not found: " << model_name << std::endl;
|
||||
std::cerr << "Available models: ";
|
||||
for (const auto& model : models) {
|
||||
std::cerr << model.first << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
return 1;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::stringstream oss;
|
||||
oss << "* Error in X and y dimensions *\n";
|
||||
oss << "X dimensions: " << dataset.sizes() << "\n";
|
||||
oss << "y dimensions: " << y.sizes();
|
||||
throw std::runtime_error(oss.str());
|
||||
auto clf = models[model_name];
|
||||
std::cout << "Library version: " << clf->getVersion() << std::endl;
|
||||
// auto [X, y, features, className, states] = loadDataset(file_name, true);
|
||||
auto [Xt, yt, features, className] = loadArff(file_name, true);
|
||||
std::map<std::string, std::vector<int>> states;
|
||||
// int m = Xt.size(1);
|
||||
// auto weights = torch::full({ m }, 1 / m, torch::kDouble);
|
||||
// auto dataset = buildDataset(Xv, yv);
|
||||
// try {
|
||||
// auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
|
||||
// dataset = torch::cat({ X, yresized }, 0);
|
||||
// }
|
||||
// catch (const std::exception& e) {
|
||||
// std::stringstream oss;
|
||||
// oss << "* Error in X and y dimensions *\n";
|
||||
// oss << "X dimensions: " << dataset.sizes() << "\n";
|
||||
// oss << "y dimensions: " << y.sizes();
|
||||
// throw std::runtime_error(oss.str());
|
||||
// }
|
||||
clf->fit(Xt, yt, features, className, states, bayesnet::Smoothing_t::ORIGINAL);
|
||||
auto total = yt.size(0);
|
||||
auto y_proba = clf->predict_proba(Xt);
|
||||
auto y_pred = y_proba.argmax(1);
|
||||
auto accuracy_value = (y_pred == yt).sum().item<float>() / total;
|
||||
auto score = clf->score(Xt, yt);
|
||||
std::cout << "File: " << file_name << " Model: " << model_name << " score: " << score << " Computed accuracy: " << accuracy_value << std::endl;
|
||||
for (const auto clf : models) {
|
||||
delete clf.second;
|
||||
}
|
||||
clf.fit(dataset, features, className, states, weights, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto score = clf.score(X, y);
|
||||
std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -1,21 +1,21 @@
|
||||
{
|
||||
"default-registry": {
|
||||
"kind": "git",
|
||||
"baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605",
|
||||
"repository": "https://github.com/microsoft/vcpkg"
|
||||
},
|
||||
"registries": [
|
||||
{
|
||||
"kind": "git",
|
||||
"repository": "https://github.com/rmontanana/vcpkg-stash",
|
||||
"baseline": "393efa4e74e053b6f02c4ab03738c8fe796b28e5",
|
||||
"baseline": "1ea69243c0e8b0de77c9d1dd6e1d7593ae7f3627",
|
||||
"packages": [
|
||||
"folding",
|
||||
"bayesnet",
|
||||
"arff-files",
|
||||
"bayesnet",
|
||||
"fimdlp",
|
||||
"folding",
|
||||
"libtorch-bin"
|
||||
]
|
||||
}
|
||||
],
|
||||
"default-registry": {
|
||||
"kind": "git",
|
||||
"repository": "https://github.com/microsoft/vcpkg",
|
||||
"baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605"
|
||||
}
|
||||
]
|
||||
}
|
@@ -2,11 +2,32 @@
|
||||
"name": "sample-project",
|
||||
"version-string": "0.1.0",
|
||||
"dependencies": [
|
||||
"bayesnet",
|
||||
"folding",
|
||||
"arff-files",
|
||||
"fimdlp",
|
||||
"nlohmann-json",
|
||||
"libtorch-bin"
|
||||
"libtorch-bin",
|
||||
"folding",
|
||||
"nlohmann-json"
|
||||
],
|
||||
"overrides": [
|
||||
{
|
||||
"name": "arff-files",
|
||||
"version": "1.1.0"
|
||||
},
|
||||
{
|
||||
"name": "fimdlp",
|
||||
"version": "2.0.1"
|
||||
},
|
||||
{
|
||||
"name": "libtorch-bin",
|
||||
"version": "2.7.0"
|
||||
},
|
||||
{
|
||||
"name": "bayesnet",
|
||||
"version": "1.1.1"
|
||||
},
|
||||
{
|
||||
"name": "folding",
|
||||
"version": "1.1.1"
|
||||
}
|
||||
]
|
||||
}
|
@@ -1,18 +1,13 @@
|
||||
if(ENABLE_TESTING)
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/tests/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||
${BayesNet_SOURCE_DIR}/lib/log
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${BayesNet_SOURCE_DIR}
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
||||
file(GLOB_RECURSE BayesNet_SOURCES "${bayesnet_SOURCE_DIR}/bayesnet/*.cc")
|
||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestXSPnDE.cc TestXBA2DE.cc
|
||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestXBAODE.cc TestA2DE.cc
|
||||
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc TestXSPODE.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp PRIVATE Catch2::Catch2WithMain)
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp::fimdlp PRIVATE Catch2::Catch2WithMain)
|
||||
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
||||
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
|
||||
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")
|
||||
|
@@ -20,7 +20,7 @@
|
||||
#include "bayesnet/ensembles/AODELd.h"
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
|
||||
const std::string ACTUAL_VERSION = "1.1.0";
|
||||
const std::string ACTUAL_VERSION = "1.1.1";
|
||||
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
{
|
||||
|
@@ -8,7 +8,7 @@
|
||||
{
|
||||
"kind": "git",
|
||||
"repository": "https://github.com/rmontanana/vcpkg-stash",
|
||||
"baseline": "393efa4e74e053b6f02c4ab03738c8fe796b28e5",
|
||||
"baseline": "1ea69243c0e8b0de77c9d1dd6e1d7593ae7f3627",
|
||||
"packages": [
|
||||
"arff-files",
|
||||
"fimdlp",
|
||||
|
Reference in New Issue
Block a user