Compare commits

..

23 Commits
conan ... main

Author SHA1 Message Date
89142f8997 Update version number 2025-07-19 22:47:32 +02:00
17ee6a909a Merge pull request 'Create version 1.2.1' (#40) from ldi into main
Reviewed-on: #40
2025-07-19 20:42:25 +00:00
56d85b1a43 Update test libraries version number 2025-07-19 22:25:17 +02:00
481c702302 Update libraries versions 2025-07-19 22:12:27 +02:00
3e0b790cfe Update Changelog 2025-07-08 18:57:57 +02:00
e2a0c5f4a5 Add Notes to Proposal convergence 2025-07-08 18:50:09 +02:00
aa77745e55 Fix TANLd valid_hyperparameters 2025-07-08 17:28:27 +02:00
e5227c5f4b Add dataset tests to Ld models 2025-07-08 16:07:16 +02:00
ed380b1494 Complete implementation with tests 2025-07-08 11:42:20 +02:00
2c7352ac38 Fix classifier build in proposal 2025-07-07 02:10:08 +02:00
0ce7f664b4 remove unneeded files 2025-07-07 00:38:00 +02:00
62fa85a1b3 Complete proposal 2025-07-07 00:37:16 +02:00
97894cc49c First approach with derived class 2025-07-06 18:49:05 +02:00
090172c6c5 Add Claude local discretization analysis 2025-07-04 12:19:58 +02:00
3048244a27 Add cache clean to conan-clean 2025-07-04 11:56:55 +02:00
c142ff2c4a Compact Makefile and remove unneeded in CMakeLists 2025-07-03 09:55:05 +02:00
a5841000d3 Change optimization flag in Release 2025-07-02 13:56:54 +02:00
e7e80cfa9c Update CHANGELOG 2025-07-02 00:52:53 +02:00
1d58cea276 Add build_type option to sample target in Makefile 2025-07-02 00:51:31 +02:00
189d314990 Fix Conan debug build
Fix smell issues in markdown and python
2025-07-02 00:44:24 +02:00
28be43db02 Update sample target in Makefile 2025-07-01 18:42:20 +02:00
55a24fbaf0 Update optimization flag 2025-07-01 16:49:04 +02:00
3b170324f4 Merge pull request 'conan' (#38) from conan into main
Reviewed-on: #38
2025-07-01 14:33:50 +00:00
26 changed files with 1032 additions and 197 deletions

View File

@@ -5,9 +5,17 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [1.2.1] - 2025-07-19
## [1.2.0] - 2025-06-30
### Internal
- Update Libtorch to version 2.7.1
- Update libraries versions:
- mdlp: 2.1.1
- Folding: 1.1.2
- ArffFiles: 1.2.1
## [1.2.0] - 2025-07-08
### Internal
@@ -17,7 +25,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- *ld_proposed_cuts*: number of cut points to return.
- *mdlp_min_length*: minimum length of a partition in MDLP algorithm to be evaluated for partition.
- *mdlp_max_depth*: maximum level of recursion in MDLP algorithm.
- *max_iterations*: maximum number of iterations of discretization-build model loop.
- *verbose_convergence*: display status messages during the convergence process.
- Remove vcpkg as a dependency manager, now the library is built with Conan package manager and CMake.
- Add `build_type` option to the sample target in the Makefile to allow building in *Debug* or *Release* mode. Default is *Debug*.
## [1.1.1] - 2025-05-20

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.27)
project(bayesnet
VERSION 1.2.0
VERSION 1.2.1
DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX
@@ -18,16 +18,18 @@ set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
endif()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Debug mode")
else(CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Release mode")
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
# Options
# -------
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
find_package(Torch CONFIG REQUIRED)
if(NOT TARGET torch::torch)
@@ -63,23 +65,21 @@ target_link_libraries(bayesnet
arff-files::arff-files
)
# Testing
# -------
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Debug mode")
else(CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Release mode")
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
if (ENABLE_TESTING)
MESSAGE(STATUS "Testing enabled")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors")
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
endif()
find_package(Catch2 CONFIG REQUIRED)
find_package(arff-files CONFIG REQUIRED)
enable_testing()
include(CTest)
add_subdirectory(tests)
else(ENABLE_TESTING)
endif (ENABLE_TESTING)
# Installation

View File

@@ -1,6 +1,6 @@
SHELL := /bin/bash
.DEFAULT_GOAL := help
.PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean updatebadge doc doc-install init clean-test conan-debug conan-release conan-create conan-upload conan-clean conan-sample
.PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean updatebadge doc doc-install init clean-test debug release conan-create conan-upload conan-clean sample
f_release = build_Release
f_debug = build_Debug
@@ -17,6 +17,14 @@ mansrcdir = docs/man3
mandestdir = /usr/local/share/man
sed_command_link = 's/e">LCOV -/e"><a href="https:\/\/rmontanana.github.io\/bayesnet">Back to manual<\/a> LCOV -/g'
sed_command_diagram = 's/Diagram"/Diagram" width="100%" height="100%" /g'
# Set the number of parallel jobs to the number of available processors minus 7
CPUS := $(shell getconf _NPROCESSORS_ONLN 2>/dev/null \
|| nproc --all 2>/dev/null \
|| sysctl -n hw.ncpu)
# --- Your desired job count: CPUs 7, but never less than 1 --------------
JOBS := $(shell n=$(CPUS); [ $${n} -gt 7 ] && echo $$((n-7)) || echo 1)
define ClearTests
@for t in $(test_targets); do \
@@ -31,6 +39,15 @@ define ClearTests
fi ;
endef
define setup_target
@echo ">>> Setup the project for $(1)..."
@if [ -d $(2) ]; then rm -fr $(2); fi
@conan install . --build=missing -of $(2) -s build_type=$(1)
@cmake -S . -B $(2) -DCMAKE_TOOLCHAIN_FILE=$(2)/build/$(1)/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=$(1) -D$(3)
@echo ">>> Will build using $(JOBS) parallel jobs"
@echo ">>> Done"
endef
setup: ## Install dependencies for tests and coverage
@if [ "$(shell uname)" = "Darwin" ]; then \
brew install gcovr; \
@@ -57,11 +74,17 @@ clean: ## Clean the project
# Build targets
# =============
debug: ## Setup debug version using Conan
@$(call setup_target,"Debug","$(f_debug)","ENABLE_TESTING=ON")
release: ## Setup release version using Conan
@$(call setup_target,"Release","$(f_release)","ENABLE_TESTING=OFF")
buildd: ## Build the debug targets
cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(JOBS)
buildr: ## Build the release targets
cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(JOBS)
# Install targets
@@ -91,7 +114,7 @@ opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet tests...";
@$(MAKE) clean-test
@cmake --build $(f_debug) -t $(test_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
@cmake --build $(f_debug) -t $(test_targets) --parallel $(JOBS)
@for t in $(test_targets); do \
echo ">>> Running $$t...";\
if [ -f $(f_debug)/tests/$$t ]; then \
@@ -160,6 +183,7 @@ doc: ## Generate documentation
@echo ">>> Done";
diagrams: ## Create an UML class diagram & dependency of the project (diagrams/BayesNet.png)
@echo ">>> Creating diagrams..."
@which $(plantuml) || (echo ">>> Please install plantuml"; exit 1)
@which $(dot) || (echo ">>> Please install graphviz"; exit 1)
@which $(clang-uml) || (echo ">>> Please install clang-uml"; exit 1)
@@ -172,6 +196,7 @@ diagrams: ## Create an UML class diagram & dependency of the project (diagrams/B
$(MAKE) debug
cd $(f_debug) && cmake .. --graphviz=dependency.dot
@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
@echo ">>> Done";
docdir = ""
doc-install: ## Install documentation
@@ -190,62 +215,29 @@ doc-install: ## Install documentation
# Conan package manager targets
# =============================
debug: ## Build debug version using Conan
@echo ">>> Building *Debug* BayesNet with Conan..."
@rm -rf $(f_debug) # wipe previous tree
@conan install . \
-s build_type=Debug \
--build=missing \
-of $(f_debug) \
--profile=debug
@cmake -S . -B $(f_debug) \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_TESTING=ON \
-DCODE_COVERAGE=ON \
-DCMAKE_TOOLCHAIN_FILE=$(f_debug)/build/Debug/generators/conan_toolchain.cmake
@echo ">>> Done"
release: ## Build release version using Conan
@echo ">>> Building Release BayesNet with Conan..."
@conan install . \
-s build_type=Release \
--build=missing \
-of $(f_debug) \
--profile=release
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release)
@conan install . -s build_type=Release --build=missing -of $(f_release)
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$(f_release)/build/Release/generators/conan_toolchain.cmake
@echo ">>> Done"
conan-create: ## Create Conan package
@echo ">>> Creating Conan package..."
@conan create . --build=missing -tf "" --profile=release
@conan create . --build=missing -tf "" --profile=debug -o "&:enable_coverage=False" -o "&:enable_testing=False"
@echo ">>> Done"
profile ?= release
remote ?= Cimmeria
conan-upload: ## Upload package to Conan remote (profile=release remote=Cimmeria)
@echo ">>> Uploading to Conan remote $(remote) with profile $(profile)..."
@conan upload bayesnet/$(grep version conanfile.py | cut -d'"' -f2) -r $(remote) --confirm
@conan create . --build=missing -tf "" -s:a build_type=Release
@conan create . --build=missing -tf "" -s:a build_type=Debug -o "&:enable_coverage=False" -o "&:enable_testing=False"
@echo ">>> Done"
conan-clean: ## Clean Conan cache and build folders
@echo ">>> Cleaning Conan cache and build folders..."
@conan remove "*" --confirm
@conan cache clean
@if test -d "$(f_release)" ; then rm -rf "$(f_release)"; fi
@if test -d "$(f_debug)" ; then rm -rf "$(f_debug)"; fi
@echo ">>> Done"
fname = "tests/data/iris.arff"
model = "TANLd"
build_type = "Debug"
sample: ## Build sample with Conan
@echo ">>> Building Sample with Conan...";
@if [ -d ./sample/build ]; then rm -rf ./sample/build; fi
@cd sample && conan install . --output-folder=build --build=missing
@cd sample && cmake -B build -S . -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=build/conan_toolchain.cmake && \
cmake --build build -t bayesnet_sample
@cd sample && conan install . --output-folder=build --build=missing -s build_type=$(build_type) -o "&:enable_coverage=False" -o "&:enable_testing=False"
@cd sample && cmake -B build -S . -DCMAKE_BUILD_TYPE=$(build_type) -DCMAKE_TOOLCHAIN_FILE=build/conan_toolchain.cmake && \
cmake --build build -t bayesnet_sample --parallel $(JOBS)
sample/build/bayesnet_sample $(fname) $(model)
@echo ">>> Done";
@@ -267,4 +259,4 @@ help: ## Show help message
printf "%-20s %s" $$help_command ; \
printf '\033[0m'; \
printf "%s\n" $$help_info; \
done
done

View File

@@ -8,7 +8,7 @@
[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Doctorado-ML/BayesNet)
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es&logo=gitea)
[![Coverage Badge](https://img.shields.io/badge/Coverage-98,0%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet)
[![Coverage Badge](https://img.shields.io/badge/Coverage-99,1%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet)
[![DOI](https://zenodo.org/badge/667782806.svg)](https://doi.org/10.5281/zenodo.14210344)
Bayesian Network Classifiers library

View File

@@ -37,6 +37,7 @@ namespace bayesnet {
std::vector<std::string> getNotes() const override { return notes; }
std::string dump_cpt() const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
Network& getModel() { return model; }
protected:
bool fitted;
unsigned int m, n; // m: number of samples, n: number of features

View File

@@ -5,40 +5,38 @@
// ***************************************************************
#include "KDBLd.h"
#include <memory>
namespace bayesnet {
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className)
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className, KDB::notes)
{
validHyperparameters = validHyperparameters_ld;
validHyperparameters.push_back("k");
validHyperparameters.push_back("theta");
}
void KDBLd::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("k")) {
k = hyperparameters["k"];
hyperparameters.erase("k");
}
if (hyperparameters.contains("theta")) {
theta = hyperparameters["theta"];
hyperparameters.erase("theta");
}
Proposal::setHyperparameters(hyperparameters);
}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
return commonFit(features_, className_, states_, smoothing);
}
KDBLd& KDBLd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{
if (!torch::is_floating_point(dataset)) {
throw std::runtime_error("Dataset must be a floating point tensor");
}
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
return commonFit(features_, className_, states_, smoothing);
}
KDBLd& KDBLd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{
features = features_;
className = className_;
states = iterativeLocalDiscretization(y, static_cast<KDB*>(this), dataset, features, className, states_, smoothing);
KDB::fit(dataset, features, className, states, smoothing);
states = localDiscretizationProposal(states, model);
return *this;
}
torch::Tensor KDBLd::predict(torch::Tensor& X)
@@ -55,4 +53,4 @@ namespace bayesnet {
{
return KDB::graph(name);
}
}
}

View File

@@ -15,8 +15,15 @@ namespace bayesnet {
explicit KDBLd(int k);
virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
KDBLd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
KDBLd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
std::vector<std::string> graph(const std::string& name = "KDB") const override;
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
void setHyperparameters(const nlohmann::json& hyperparameters_) override
{
auto hyperparameters = hyperparameters_;
Proposal::setHyperparameters(hyperparameters);
KDB::setHyperparameters(hyperparameters);
}
torch::Tensor predict(torch::Tensor& X) override;
torch::Tensor predict_proba(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };

View File

@@ -5,14 +5,22 @@
// ***************************************************************
#include "Proposal.h"
#include <iostream>
#include <cmath>
#include <limits>
#include "Classifier.h"
#include "KDB.h"
#include "TAN.h"
#include "SPODE.h"
#include "KDBLd.h"
#include "TANLd.h"
namespace bayesnet {
Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_)
Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_, std::vector<std::string>& notes_) : pDataset(dataset_), pFeatures(features_), pClassName(className_), notes(notes_)
{
}
void Proposal::setHyperparameters(const nlohmann::json& hyperparameters_)
void Proposal::setHyperparameters(nlohmann::json& hyperparameters)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("ld_proposed_cuts")) {
ld_params.proposed_cuts = hyperparameters["ld_proposed_cuts"];
hyperparameters.erase("ld_proposed_cuts");
@@ -38,8 +46,14 @@ namespace bayesnet {
throw std::invalid_argument("Invalid discretization algorithm: " + algorithm.get<std::string>());
}
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters for Proposal: " + hyperparameters.dump());
// Convergence parameters
if (hyperparameters.contains("max_iterations")) {
convergence_params.maxIterations = hyperparameters["max_iterations"];
hyperparameters.erase("max_iterations");
}
if (hyperparameters.contains("verbose_convergence")) {
convergence_params.verbose = hyperparameters["verbose_convergence"];
hyperparameters.erase("verbose_convergence");
}
}
@@ -163,4 +177,65 @@ namespace bayesnet {
}
return yy;
}
template<typename Classifier>
map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization(
const torch::Tensor& y,
Classifier* classifier,
torch::Tensor& dataset,
const std::vector<std::string>& features,
const std::string& className,
const map<std::string, std::vector<int>>& initialStates,
Smoothing_t smoothing
)
{
// Phase 1: Initial discretization (same as original)
auto currentStates = fit_local_discretization(y);
auto previousModel = Network();
if (convergence_params.verbose) {
std::cout << "Starting iterative local discretization with "
<< convergence_params.maxIterations << " max iterations" << std::endl;
}
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
for (int iteration = 0; iteration < convergence_params.maxIterations; ++iteration) {
if (convergence_params.verbose) {
std::cout << "Iteration " << (iteration + 1) << "/" << convergence_params.maxIterations << std::endl;
}
// Phase 2: Build model with current discretization
classifier->fit(dataset, features, className, currentStates, weights, smoothing);
// Phase 3: Network-aware discretization refinement
currentStates = localDiscretizationProposal(currentStates, classifier->getModel());
// Check convergence
if (iteration > 0 && previousModel == classifier->getModel()) {
if (convergence_params.verbose) {
std::cout << "Converged after " << (iteration + 1) << " iterations" << std::endl;
}
notes.push_back("Converged after " + std::to_string(iteration + 1) + " of "
+ std::to_string(convergence_params.maxIterations) + " iterations");
break;
}
// Update for next iteration
previousModel = classifier->getModel();
}
return currentStates;
}
// Explicit template instantiation for common classifier types
template map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization<KDB>(
const torch::Tensor&, KDB*, torch::Tensor&, const std::vector<std::string>&,
const std::string&, const map<std::string, std::vector<int>>&, Smoothing_t);
template map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization<TAN>(
const torch::Tensor&, TAN*, torch::Tensor&, const std::vector<std::string>&,
const std::string&, const map<std::string, std::vector<int>>&, Smoothing_t);
template map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization<SPODE>(
const torch::Tensor&, SPODE*, torch::Tensor&, const std::vector<std::string>&,
const std::string&, const map<std::string, std::vector<int>>&, Smoothing_t);
}

View File

@@ -18,25 +18,50 @@
namespace bayesnet {
class Proposal {
public:
Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_);
void setHyperparameters(const nlohmann::json& hyperparameters_);
Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_, std::vector<std::string>& notes);
void setHyperparameters(nlohmann::json& hyperparameters_);
protected:
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
torch::Tensor prepareX(torch::Tensor& X);
map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
// Iterative discretization method
template<typename Classifier>
map<std::string, std::vector<int>> iterativeLocalDiscretization(
const torch::Tensor& y,
Classifier* classifier,
torch::Tensor& dataset,
const std::vector<std::string>& features,
const std::string& className,
const map<std::string, std::vector<int>>& initialStates,
const Smoothing_t smoothing
);
torch::Tensor Xf; // X continuous nxm tensor
torch::Tensor y; // y discrete nx1 tensor
map<std::string, std::unique_ptr<mdlp::Discretizer>> discretizers;
// MDLP parameters
struct {
size_t min_length = 3; // Minimum length of the interval to consider it in mdlp
float proposed_cuts = 0.0; // Proposed cuts for the Discretization algorithm
int max_depth = std::numeric_limits<int>::max(); // Maximum depth of the MDLP tree
} ld_params;
nlohmann::json validHyperparameters_ld = { "ld_algorithm", "ld_proposed_cuts", "mdlp_min_length", "mdlp_max_depth" };
// Convergence parameters
struct {
int maxIterations = 10;
bool verbose = false;
} convergence_params;
nlohmann::json validHyperparameters_ld = {
"ld_algorithm", "ld_proposed_cuts", "mdlp_min_length", "mdlp_max_depth",
"max_iterations", "verbose_convergence"
};
private:
std::vector<int> factorize(const std::vector<std::string>& labels_t);
std::vector<std::string>& notes; // Notes during fit from BaseClassifier
torch::Tensor& pDataset; // (n+1)xm tensor
std::vector<std::string>& pFeatures;
std::string& pClassName;

View File

@@ -7,7 +7,7 @@
#include "SPODELd.h"
namespace bayesnet {
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className)
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className, SPODE::notes)
{
validHyperparameters = validHyperparameters_ld; // Inherits the valid hyperparameters from Proposal
}
@@ -34,12 +34,8 @@ namespace bayesnet {
{
features = features_;
className = className_;
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
states = iterativeLocalDiscretization(y, static_cast<SPODE*>(this), dataset, features, className, states_, smoothing);
SPODE::fit(dataset, features, className, states, smoothing);
states = localDiscretizationProposal(states, model);
return *this;
}
torch::Tensor SPODELd::predict(torch::Tensor& X)

View File

@@ -18,6 +18,12 @@ namespace bayesnet {
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
void setHyperparameters(const nlohmann::json& hyperparameters_) override
{
auto hyperparameters = hyperparameters_;
Proposal::setHyperparameters(hyperparameters);
SPODE::setHyperparameters(hyperparameters);
}
torch::Tensor predict(torch::Tensor& X) override;
torch::Tensor predict_proba(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };

View File

@@ -5,24 +5,37 @@
// ***************************************************************
#include "TANLd.h"
#include <memory>
namespace bayesnet {
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd::TANLd() : TAN(), Proposal(dataset, features, className, TAN::notes)
{
validHyperparameters = validHyperparameters_ld; // Inherits the valid hyperparameters from Proposal
}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
TAN::fit(dataset, features, className, states, smoothing);
states = localDiscretizationProposal(states, model);
return *this;
return commonFit(features_, className_, states_, smoothing);
}
TANLd& TANLd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{
if (!torch::is_floating_point(dataset)) {
throw std::runtime_error("Dataset must be a floating point tensor");
}
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
return commonFit(features_, className_, states_, smoothing);
}
TANLd& TANLd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{
features = features_;
className = className_;
states = iterativeLocalDiscretization(y, static_cast<TAN*>(this), dataset, features, className, states_, smoothing);
TAN::fit(dataset, features, className, states, smoothing);
return *this;
}
torch::Tensor TANLd::predict(torch::Tensor& X)
{
@@ -38,4 +51,4 @@ namespace bayesnet {
{
return TAN::graph(name);
}
}
}

View File

@@ -16,7 +16,15 @@ namespace bayesnet {
TANLd();
virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
TANLd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
TANLd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
std::vector<std::string> graph(const std::string& name = "TANLd") const override;
void setHyperparameters(const nlohmann::json& hyperparameters_) override
{
auto hyperparameters = hyperparameters_;
Proposal::setHyperparameters(hyperparameters);
TAN::setHyperparameters(hyperparameters);
}
torch::Tensor predict(torch::Tensor& X) override;
torch::Tensor predict_proba(torch::Tensor& X) override;
};

View File

@@ -7,7 +7,7 @@
#include "AODELd.h"
namespace bayesnet {
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className, Ensemble::notes)
{
validHyperparameters = validHyperparameters_ld; // Inherits the valid hyperparameters from Proposal
}

View File

@@ -17,6 +17,10 @@ namespace bayesnet {
virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing) override;
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
void setHyperparameters(const nlohmann::json& hyperparameters_) override
{
hyperparameters = hyperparameters_;
}
protected:
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
void buildModel(const torch::Tensor& weights) override;

View File

@@ -17,14 +17,90 @@ namespace bayesnet {
Network::Network() : fitted{ false }, classNumStates{ 0 }
{
}
Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
fitted(other.fitted), samples(other.samples)
Network::Network(const Network& other)
: features(other.features), className(other.className), classNumStates(other.classNumStates),
fitted(other.fitted)
{
if (samples.defined())
samples = samples.clone();
// Deep copy the samples tensor
if (other.samples.defined()) {
samples = other.samples.clone();
}
// First, create all nodes (without relationships)
for (const auto& node : other.nodes) {
nodes[node.first] = std::make_unique<Node>(*node.second);
}
// Second, reconstruct the relationships between nodes
for (const auto& node : other.nodes) {
const std::string& nodeName = node.first;
Node* originalNode = node.second.get();
Node* newNode = nodes[nodeName].get();
// Reconstruct parent relationships
for (Node* parent : originalNode->getParents()) {
const std::string& parentName = parent->getName();
if (nodes.find(parentName) != nodes.end()) {
newNode->addParent(nodes[parentName].get());
}
}
// Reconstruct child relationships
for (Node* child : originalNode->getChildren()) {
const std::string& childName = child->getName();
if (nodes.find(childName) != nodes.end()) {
newNode->addChild(nodes[childName].get());
}
}
}
}
Network& Network::operator=(const Network& other)
{
if (this != &other) {
// Clear existing state
nodes.clear();
features = other.features;
className = other.className;
classNumStates = other.classNumStates;
fitted = other.fitted;
// Deep copy the samples tensor
if (other.samples.defined()) {
samples = other.samples.clone();
} else {
samples = torch::Tensor();
}
// First, create all nodes (without relationships)
for (const auto& node : other.nodes) {
nodes[node.first] = std::make_unique<Node>(*node.second);
}
// Second, reconstruct the relationships between nodes
for (const auto& node : other.nodes) {
const std::string& nodeName = node.first;
Node* originalNode = node.second.get();
Node* newNode = nodes[nodeName].get();
// Reconstruct parent relationships
for (Node* parent : originalNode->getParents()) {
const std::string& parentName = parent->getName();
if (nodes.find(parentName) != nodes.end()) {
newNode->addParent(nodes[parentName].get());
}
}
// Reconstruct child relationships
for (Node* child : originalNode->getChildren()) {
const std::string& childName = child->getName();
if (nodes.find(childName) != nodes.end()) {
newNode->addChild(nodes[childName].get());
}
}
}
}
return *this;
}
void Network::initialize()
{
@@ -503,4 +579,41 @@ namespace bayesnet {
}
return oss.str();
}
bool Network::operator==(const Network& other) const
{
// Compare number of nodes
if (nodes.size() != other.nodes.size()) {
return false;
}
// Compare if all node names exist in both networks
for (const auto& node : nodes) {
if (other.nodes.find(node.first) == other.nodes.end()) {
return false;
}
}
// Compare edges (topology)
auto thisEdges = getEdges();
auto otherEdges = other.getEdges();
// Compare number of edges
if (thisEdges.size() != otherEdges.size()) {
return false;
}
// Sort both edge lists for comparison
std::sort(thisEdges.begin(), thisEdges.end());
std::sort(otherEdges.begin(), otherEdges.end());
// Compare each edge
for (size_t i = 0; i < thisEdges.size(); ++i) {
if (thisEdges[i] != otherEdges[i]) {
return false;
}
}
return true;
}
}

View File

@@ -17,7 +17,8 @@ namespace bayesnet {
class Network {
public:
Network();
explicit Network(const Network&);
Network(const Network& other);
Network& operator=(const Network& other);
~Network() = default;
torch::Tensor& getSamples();
void addNode(const std::string&);
@@ -47,6 +48,7 @@ namespace bayesnet {
void initialize();
std::string dump_cpt() const;
inline std::string version() { return { project_version.begin(), project_version.end() }; }
bool operator==(const Network& other) const;
private:
std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted;

View File

@@ -13,6 +13,41 @@ namespace bayesnet {
: name(name)
{
}
Node::Node(const Node& other)
: name(other.name), numStates(other.numStates), dimensions(other.dimensions)
{
// Deep copy the CPT tensor
if (other.cpTable.defined()) {
cpTable = other.cpTable.clone();
}
// Note: parent and children pointers are NOT copied here
// They will be reconstructed by the Network copy constructor
// to maintain proper object relationships
}
Node& Node::operator=(const Node& other)
{
if (this != &other) {
name = other.name;
numStates = other.numStates;
dimensions = other.dimensions;
// Deep copy the CPT tensor
if (other.cpTable.defined()) {
cpTable = other.cpTable.clone();
} else {
cpTable = torch::Tensor();
}
// Clear existing relationships
parents.clear();
children.clear();
// Note: parent and children pointers are NOT copied here
// They must be reconstructed to maintain proper object relationships
}
return *this;
}
void Node::clear()
{
parents.clear();

View File

@@ -14,6 +14,9 @@ namespace bayesnet {
class Node {
public:
explicit Node(const std::string&);
Node(const Node& other);
Node& operator=(const Node& other);
~Node() = default;
void clear();
void addParent(Node*);
void addChild(Node*);

View File

@@ -3,6 +3,7 @@ from conan import ConanFile
from conan.tools.cmake import CMakeToolchain, CMake, cmake_layout, CMakeDeps
from conan.tools.files import copy
class BayesNetConan(ConanFile):
name = "bayesnet"
settings = "os", "compiler", "build_type", "arch"
@@ -10,26 +11,35 @@ class BayesNetConan(ConanFile):
"shared": [True, False],
"fPIC": [True, False],
"enable_testing": [True, False],
"enable_coverage": [True, False]
"enable_coverage": [True, False],
}
default_options = {
"shared": False,
"fPIC": True,
"enable_testing": False,
"enable_coverage": False
"enable_coverage": False,
}
# Sources are located in the same place as this recipe, copy them to the recipe
exports_sources = "CMakeLists.txt", "bayesnet/*", "config/*", "cmake/*", "docs/*", "tests/*", "bayesnetConfig.cmake.in"
exports_sources = (
"CMakeLists.txt",
"bayesnet/*",
"config/*",
"cmake/*",
"docs/*",
"tests/*",
"bayesnetConfig.cmake.in",
)
def set_version(self) -> None:
cmake = pathlib.Path(self.recipe_folder) / "CMakeLists.txt"
text = cmake.read_text(encoding="utf-8")
text = cmake.read_text(encoding="utf-8")
# Accept either: project(foo VERSION 1.2.3) or set(foo_VERSION 1.2.3)
match = re.search(
r"""project\s*\([^\)]*VERSION\s+([0-9]+\.[0-9]+\.[0-9]+)""",
text, re.IGNORECASE | re.VERBOSE
text,
re.IGNORECASE | re.VERBOSE,
)
if match:
self.version = match.group(1)
@@ -40,26 +50,26 @@ class BayesNetConan(ConanFile):
def config_options(self):
if self.settings.os == "Windows":
del self.options.fPIC
def configure(self):
if self.options.shared:
self.options.rm_safe("fPIC")
def requirements(self):
# Core dependencies
self.requires("libtorch/2.7.0")
self.requires("libtorch/2.7.1")
self.requires("nlohmann_json/3.11.3")
self.requires("folding/1.1.1") # Custom package
self.requires("fimdlp/2.1.0") # Custom package
self.requires("folding/1.1.2") # Custom package
self.requires("fimdlp/2.1.1") # Custom package
def build_requirements(self):
self.build_requires("cmake/[>=3.27]")
self.test_requires("arff-files/1.2.0") # Custom package
self.test_requires("arff-files/1.2.1") # Custom package
self.test_requires("catch2/3.8.1")
def layout(self):
cmake_layout(self)
def generate(self):
deps = CMakeDeps(self)
deps.generate()
@@ -67,27 +77,32 @@ class BayesNetConan(ConanFile):
tc.variables["ENABLE_TESTING"] = self.options.enable_testing
tc.variables["CODE_COVERAGE"] = self.options.enable_coverage
tc.generate()
def build(self):
cmake = CMake(self)
cmake.configure()
cmake.build()
if self.options.enable_testing:
# Run tests only if we're building with testing enabled
self.run("ctest --output-on-failure", cwd=self.build_folder)
def package(self):
copy(self, "LICENSE", src=self.source_folder, dst=os.path.join(self.package_folder, "licenses"))
copy(
self,
"LICENSE",
src=self.source_folder,
dst=os.path.join(self.package_folder, "licenses"),
)
cmake = CMake(self)
cmake.install()
def package_info(self):
self.cpp_info.libs = ["bayesnet"]
self.cpp_info.includedirs = ["include"]
self.cpp_info.set_property("cmake_find_mode", "both")
self.cpp_info.set_property("cmake_target_name", "bayesnet::bayesnet")
# Add compiler flags that might be needed
if self.settings.os == "Linux":
self.cpp_info.system_libs = ["pthread"]
self.cpp_info.system_libs = ["pthread"]

View File

@@ -0,0 +1,235 @@
# Local Discretization Analysis - BayesNet Library
## Overview
This document analyzes the local discretization implementation in the BayesNet library, specifically focusing on the `Proposal.cc` implementation, and evaluates the feasibility of implementing an iterative discretization approach.
## Current Local Discretization Implementation
### Core Architecture
The local discretization functionality is implemented through a **Proposal class** (`bayesnet/classifiers/Proposal.h`) that serves as a mixin/base class for creating "Ld" (Local Discretization) variants of existing classifiers.
### Key Components
#### 1. The Proposal Class
- **Purpose**: Handles continuous data by applying local discretization using discretization algorithms
- **Dependencies**: Uses the `fimdlp` library for discretization algorithms
- **Supported Algorithms**:
- **MDLP** (Minimum Description Length Principle) - Default
- **BINQ** - Quantile-based binning
- **BINU** - Uniform binning
#### 2. Local Discretization Variants
The codebase implements Ld variants using multiple inheritance:
**Individual Classifiers:**
- `TANLd` - Tree Augmented Naive Bayes with Local Discretization
- `KDBLd` - K-Dependence Bayesian with Local Discretization
- `SPODELd` - Super-Parent One-Dependence Estimator with Local Discretization
**Ensemble Classifiers:**
- `AODELd` - Averaged One-Dependence Estimator with Local Discretization
### Implementation Pattern
All Ld variants follow a consistent pattern using **multiple inheritance**:
```cpp
class TANLd : public TAN, public Proposal {
// Inherits from both the base classifier and Proposal
};
```
### Two-Phase Discretization Process
#### Phase 1: Initial Discretization (`fit_local_discretization`)
- Each continuous feature is discretized independently using the chosen algorithm
- Creates initial discrete dataset
- Uses only class labels for discretization decisions
#### Phase 2: Network-Aware Refinement (`localDiscretizationProposal`)
- After building the initial Bayesian network structure
- Features are re-discretized considering their parent nodes in the network
- Uses topological ordering to ensure proper dependency handling
- Creates more informed discretization boundaries based on network relationships
### Hyperparameter Support
The Proposal class supports several configurable hyperparameters:
- `ld_algorithm`: Choice of discretization algorithm (MDLP, BINQ, BINU)
- `ld_proposed_cuts`: Number of proposed cuts for discretization
- `mdlp_min_length`: Minimum interval length for MDLP
- `mdlp_max_depth`: Maximum depth for MDLP tree
## Current Implementation Strengths
1. **Sophisticated Approach**: Considers network structure in discretization decisions
2. **Modular Design**: Clean separation through Proposal class mixin
3. **Multiple Algorithm Support**: Flexible discretization strategies
4. **Proper Dependency Handling**: Topological ordering ensures correct processing
5. **Well-Integrated**: Seamless integration with existing classifier architecture
## Areas for Improvement
### Code Quality Issues
1. **Dead Code**: Line 161 in `Proposal.cc` contains unused variable `allDigits`
2. **Performance Issues**:
- String concatenation in tight loop (lines 82-84) using `+=` operator
- Memory allocations could be optimized
- Tensor operations could be batched better
3. **Error Handling**: Could be more robust with better exception handling
### Algorithm Clarity
1. **Logic Clarity**: The `upgrade` flag logic could be more descriptive
2. **Variable Naming**: Some variables need more descriptive names
3. **Documentation**: Better inline documentation of the two-phase process
4. **Method Complexity**: `localDiscretizationProposal` method is quite long and complex
### Suggested Code Improvements
```cpp
// Instead of string concatenation in loop:
for (auto idx : indices) {
for (int i = 0; i < Xf.size(1); ++i) {
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
}
}
// Consider using stringstream or pre-allocation:
std::stringstream ss;
for (auto idx : indices) {
for (int i = 0; i < Xf.size(1); ++i) {
ss << pDataset.index({ idx, i }).item<int>();
yJoinParents[i] = ss.str();
ss.str("");
}
}
```
## Iterative Discretization Proposal
### Concept
Implement an iterative process: discretize → build model → re-discretize → rebuild model → repeat until convergence.
### Feasibility Assessment
**Highly Feasible** - The current implementation already provides a solid foundation with its two-phase approach, making extension straightforward.
### Proposed Implementation Strategy
```cpp
class IterativeProposal : public Proposal {
public:
struct ConvergenceParams {
int max_iterations = 10;
double tolerance = 1e-6;
bool check_network_structure = true;
bool check_discretization_stability = true;
};
private:
map<string, vector<int>> iterativeLocalDiscretization(const torch::Tensor& y) {
auto states = fit_local_discretization(y); // Initial discretization
Network previousModel, currentModel;
int iteration = 0;
do {
previousModel = currentModel;
// Build model with current discretization
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
currentModel.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::ORIGINAL);
// Apply local discretization based on current model
auto newStates = localDiscretizationProposal(states, currentModel);
// Check for convergence
if (hasConverged(previousModel, currentModel, states, newStates)) {
break;
}
states = newStates;
iteration++;
} while (iteration < convergenceParams.max_iterations);
return states;
}
bool hasConverged(const Network& prev, const Network& curr,
const map<string, vector<int>>& oldStates,
const map<string, vector<int>>& newStates) {
// Implementation of convergence criteria
return checkNetworkStructureConvergence(prev, curr) &&
checkDiscretizationStability(oldStates, newStates);
}
};
```
### Convergence Criteria Options
1. **Network Structure Comparison**: Compare edge sets between iterations
```cpp
bool checkNetworkStructureConvergence(const Network& prev, const Network& curr) {
// Compare adjacency matrices or edge lists
return prev.getEdges() == curr.getEdges();
}
```
2. **Discretization Stability**: Check if cut points change significantly
```cpp
bool checkDiscretizationStability(const map<string, vector<int>>& oldStates,
const map<string, vector<int>>& newStates) {
for (const auto& [feature, states] : oldStates) {
if (states != newStates.at(feature)) {
return false;
}
}
return true;
}
```
3. **Performance Metrics**: Monitor accuracy/likelihood convergence
4. **Maximum Iterations**: Prevent infinite loops
### Expected Benefits
1. **Better Discretization Quality**: Each iteration refines boundaries based on learned dependencies
2. **Improved Model Accuracy**: More informed discretization leads to better classification
3. **Adaptive Process**: Automatically finds optimal discretization-model combination
4. **Principled Approach**: Theoretically sound iterative refinement
5. **Reduced Manual Tuning**: Less need for hyperparameter optimization
### Implementation Considerations
1. **Convergence Detection**: Need robust criteria to detect when to stop
2. **Performance Impact**: Multiple iterations increase computational cost
3. **Overfitting Prevention**: May need regularization to avoid over-discretization
4. **Stability Guarantees**: Ensure the process doesn't oscillate indefinitely
5. **Memory Management**: Handle multiple model instances efficiently
### Integration Strategy
1. **Backward Compatibility**: Keep existing two-phase approach as default
2. **Optional Feature**: Add iterative mode as configurable option
3. **Hyperparameter Extension**: Add convergence-related parameters
4. **Testing Framework**: Comprehensive testing on standard datasets
## Conclusion
The current local discretization implementation in BayesNet is well-designed and functional, providing a solid foundation for the proposed iterative enhancement. The iterative approach would significantly improve the quality of discretization by creating a feedback loop between model structure and discretization decisions.
The implementation is highly feasible given the existing architecture, and the expected benefits justify the additional computational complexity. The key to success will be implementing robust convergence criteria and maintaining the modularity of the current design.
## Recommendations
1. **Immediate Improvements**: Fix code quality issues and optimize performance bottlenecks
2. **Iterative Implementation**: Develop the iterative approach as an optional enhancement
3. **Comprehensive Testing**: Validate improvements on standard benchmark datasets
4. **Documentation**: Enhance inline documentation and user guides
5. **Performance Profiling**: Monitor computational overhead and optimize where necessary

View File

@@ -8,7 +8,7 @@ if(ENABLE_TESTING)
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestXSPnDE.cc TestXBA2DE.cc
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestXBAODE.cc TestA2DE.cc
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc TestXSPODE.cc ${BayesNet_SOURCES})
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp::fimdlp PRIVATE Catch2::Catch2WithMain folding::folding)
target_link_libraries(TestBayesNet PRIVATE torch::torch fimdlp::fimdlp Catch2::Catch2WithMain folding::folding)
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")

View File

@@ -20,7 +20,7 @@
#include "bayesnet/ensembles/AODELd.h"
#include "bayesnet/ensembles/BoostAODE.h"
const std::string ACTUAL_VERSION = "1.2.0";
const std::string ACTUAL_VERSION = "1.2.1";
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{
@@ -31,9 +31,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{{"diabetes", "SPODE"}, 0.802083},
{{"diabetes", "TAN"}, 0.821615},
{{"diabetes", "AODELd"}, 0.8125f},
{{"diabetes", "KDBLd"}, 0.80208f},
{{"diabetes", "KDBLd"}, 0.804688f},
{{"diabetes", "SPODELd"}, 0.7890625f},
{{"diabetes", "TANLd"}, 0.803385437f},
{{"diabetes", "TANLd"}, 0.8125f},
{{"diabetes", "BoostAODE"}, 0.83984f},
// Ecoli
{{"ecoli", "AODE"}, 0.889881},
@@ -42,9 +42,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{{"ecoli", "SPODE"}, 0.880952},
{{"ecoli", "TAN"}, 0.892857},
{{"ecoli", "AODELd"}, 0.875f},
{{"ecoli", "KDBLd"}, 0.880952358f},
{{"ecoli", "KDBLd"}, 0.872024f},
{{"ecoli", "SPODELd"}, 0.839285731f},
{{"ecoli", "TANLd"}, 0.848214269f},
{{"ecoli", "TANLd"}, 0.869047642f},
{{"ecoli", "BoostAODE"}, 0.89583f},
// Glass
{{"glass", "AODE"}, 0.79439},
@@ -53,9 +53,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{{"glass", "SPODE"}, 0.775701},
{{"glass", "TAN"}, 0.827103},
{{"glass", "AODELd"}, 0.799065411f},
{{"glass", "KDBLd"}, 0.82710278f},
{{"glass", "KDBLd"}, 0.864485979f},
{{"glass", "SPODELd"}, 0.780373812f},
{{"glass", "TANLd"}, 0.869158864f},
{{"glass", "TANLd"}, 0.831775725f},
{{"glass", "BoostAODE"}, 0.84579f},
// Iris
{{"iris", "AODE"}, 0.973333},
@@ -68,29 +68,29 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{{"iris", "SPODELd"}, 0.96f},
{{"iris", "TANLd"}, 0.97333f},
{{"iris", "BoostAODE"}, 0.98f} };
std::map<std::string, bayesnet::BaseClassifier*> models{ {"AODE", new bayesnet::AODE()},
{"AODELd", new bayesnet::AODELd()},
{"BoostAODE", new bayesnet::BoostAODE()},
{"KDB", new bayesnet::KDB(2)},
{"KDBLd", new bayesnet::KDBLd(2)},
{"XSPODE", new bayesnet::XSpode(1)},
{"SPODE", new bayesnet::SPODE(1)},
{"SPODELd", new bayesnet::SPODELd(1)},
{"TAN", new bayesnet::TAN()},
{"TANLd", new bayesnet::TANLd()} };
std::map<std::string, std::unique_ptr<bayesnet::BaseClassifier>> models;
models["AODE"] = std::make_unique<bayesnet::AODE>();
models["AODELd"] = std::make_unique<bayesnet::AODELd>();
models["BoostAODE"] = std::make_unique<bayesnet::BoostAODE>();
models["KDB"] = std::make_unique<bayesnet::KDB>(2);
models["KDBLd"] = std::make_unique<bayesnet::KDBLd>(2);
models["XSPODE"] = std::make_unique<bayesnet::XSpode>(1);
models["SPODE"] = std::make_unique<bayesnet::SPODE>(1);
models["SPODELd"] = std::make_unique<bayesnet::SPODELd>(1);
models["TAN"] = std::make_unique<bayesnet::TAN>();
models["TANLd"] = std::make_unique<bayesnet::TANLd>();
std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "XSPODE", "SPODELd", "TAN", "TANLd");
auto clf = models[name];
auto clf = std::move(models[name]);
SECTION("Test " + name + " classifier")
{
for (const std::string& file_name : { "glass", "iris", "ecoli", "diabetes" }) {
auto clf = models[name];
auto discretize = name.substr(name.length() - 2) != "Ld";
auto raw = RawDatasets(file_name, discretize);
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf->score(raw.Xt, raw.yt);
// std::cout << "Classifier: " << name << " File: " << file_name << " Score: " << score << " expected = " <<
// scores[{file_name, name}] << std::endl;
// scores[{file_name, name}] << std::endl;
INFO("Classifier: " << name << " File: " << file_name);
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
@@ -101,7 +101,6 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
INFO("Checking version of " << name << " classifier");
REQUIRE(clf->getVersion() == ACTUAL_VERSION);
}
delete clf;
}
TEST_CASE("Models features & Graph", "[Models]")
{
@@ -133,7 +132,7 @@ TEST_CASE("Models features & Graph", "[Models]")
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.getNumberOfStates() == 27);
REQUIRE(clf.getNumberOfStates() == 26);
REQUIRE(clf.getClassNumStates() == 3);
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ",
"petallength -> sepallength, ", "petalwidth -> ",
@@ -149,7 +148,6 @@ TEST_CASE("Get num features & num edges", "[Models]")
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8);
}
TEST_CASE("Model predict_proba", "[Models]")
{
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting", "TANLd", "SPODELd", "KDBLd");
@@ -180,15 +178,15 @@ TEST_CASE("Model predict_proba", "[Models]")
{0.0284828, 0.770524, 0.200993},
{0.0213182, 0.857189, 0.121493},
{0.00868436, 0.949494, 0.0418215} });
auto res_prob_tanld = std::vector<std::vector<double>>({ {0.000544493, 0.995796, 0.00365992 },
{0.000908092, 0.997268, 0.00182429 },
{0.000908092, 0.997268, 0.00182429 },
{0.000908092, 0.997268, 0.00182429 },
{0.00228423, 0.994645, 0.00307078 },
{0.00120539, 0.0666788, 0.932116 },
{0.00361847, 0.979203, 0.017179 },
{0.00483293, 0.985326, 0.00984064 },
{0.000595606, 0.9977, 0.00170441 } });
auto res_prob_tanld = std::vector<std::vector<double>>({ {0.000597557, 0.9957, 0.00370254},
{0.000731377, 0.997914, 0.0013544},
{0.000731377, 0.997914, 0.0013544},
{0.000731377, 0.997914, 0.0013544},
{0.000838614, 0.998122, 0.00103923},
{0.00130852, 0.0659492, 0.932742},
{0.00365946, 0.979412, 0.0169281},
{0.00435035, 0.986248, 0.00940212},
{0.000583815, 0.997746, 0.00167066} });
auto res_prob_spodeld = std::vector<std::vector<double>>({ {0.000908024, 0.993742, 0.00535024 },
{0.00187726, 0.99167, 0.00645308 },
{0.00187726, 0.99167, 0.00645308 },
@@ -216,29 +214,33 @@ TEST_CASE("Model predict_proba", "[Models]")
{"TANLd", res_prob_tanld},
{"SPODELd", res_prob_spodeld},
{"KDBLd", res_prob_kdbld} };
std::map<std::string, bayesnet::BaseClassifier*> models{ {"TAN", new bayesnet::TAN()},
{"SPODE", new bayesnet::SPODE(0)},
{"BoostAODEproba", new bayesnet::BoostAODE(false)},
{"BoostAODEvoting", new bayesnet::BoostAODE(true)},
{"TANLd", new bayesnet::TANLd()},
{"SPODELd", new bayesnet::SPODELd(0)},
{"KDBLd", new bayesnet::KDBLd(2)} };
std::map<std::string, std::unique_ptr<bayesnet::BaseClassifier>> models;
models["TAN"] = std::make_unique<bayesnet::TAN>();
models["SPODE"] = std::make_unique<bayesnet::SPODE>(0);
models["BoostAODEproba"] = std::make_unique<bayesnet::BoostAODE>(false);
models["BoostAODEvoting"] = std::make_unique<bayesnet::BoostAODE>(true);
models["TANLd"] = std::make_unique<bayesnet::TANLd>();
models["SPODELd"] = std::make_unique<bayesnet::SPODELd>(0);
models["KDBLd"] = std::make_unique<bayesnet::KDBLd>(2);
int init_index = 78;
SECTION("Test " + model + " predict_proba")
{
INFO("Testing " << model << " predict_proba");
auto ld_model = model.substr(model.length() - 2) == "Ld";
auto discretize = !ld_model;
auto raw = RawDatasets("iris", discretize);
auto clf = models[model];
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto yt_pred_proba = clf->predict_proba(raw.Xt);
auto yt_pred = clf->predict(raw.Xt);
auto& clf = *models[model];
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto yt_pred_proba = clf.predict_proba(raw.Xt);
auto yt_pred = clf.predict(raw.Xt);
std::vector<int> y_pred;
std::vector<std::vector<double>> y_pred_proba;
if (!ld_model) {
y_pred = clf->predict(raw.Xv);
y_pred_proba = clf->predict_proba(raw.Xv);
y_pred = clf.predict(raw.Xv);
y_pred_proba = clf.predict_proba(raw.Xv);
REQUIRE(y_pred.size() == y_pred_proba.size());
REQUIRE(y_pred.size() == yt_pred.size(0));
REQUIRE(y_pred.size() == yt_pred_proba.size(0));
@@ -267,18 +269,20 @@ TEST_CASE("Model predict_proba", "[Models]")
} else {
// Check predict_proba values for vectors and tensors
auto predictedClasses = yt_pred_proba.argmax(1);
// std::cout << model << std::endl;
for (int i = 0; i < 9; i++) {
REQUIRE(predictedClasses[i].item<int>() == yt_pred[i].item<int>());
// std::cout << "{";
for (int j = 0; j < 3; j++) {
// std::cout << yt_pred_proba[i + init_index][j].item<double>() << ", ";
REQUIRE(res_prob[model][i][j] ==
Catch::Approx(yt_pred_proba[i + init_index][j].item<double>()).epsilon(raw.epsilon));
}
// std::cout << "\b\b}," << std::endl;
}
}
delete clf;
}
}
TEST_CASE("AODE voting-proba", "[Models]")
{
auto raw = RawDatasets("glass", true);
@@ -297,17 +301,30 @@ TEST_CASE("AODE voting-proba", "[Models]")
REQUIRE(pred_proba[67][0] == Catch::Approx(0.702184).epsilon(raw.epsilon));
REQUIRE(clf.topological_order() == std::vector<std::string>());
}
TEST_CASE("SPODELd dataset", "[Models]")
TEST_CASE("Ld models with dataset", "[Models]")
{
auto raw = RawDatasets("iris", false);
auto clf = bayesnet::SPODELd(0);
// raw.dataset.to(torch::kFloat32);
clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing);
auto score = clf.score(raw.Xt, raw.yt);
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon));
auto clf2 = bayesnet::TANLd();
clf2.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing);
auto score2 = clf2.score(raw.Xt, raw.yt);
clf2.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto score2t = clf2.score(raw.Xt, raw.yt);
REQUIRE(score2 == Catch::Approx(0.97333f).epsilon(raw.epsilon));
REQUIRE(score2t == Catch::Approx(0.97333f).epsilon(raw.epsilon));
auto clf3 = bayesnet::KDBLd(2);
clf3.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing);
auto score3 = clf3.score(raw.Xt, raw.yt);
clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
auto score3t = clf3.score(raw.Xt, raw.yt);
REQUIRE(score3 == Catch::Approx(0.97333f).epsilon(raw.epsilon));
REQUIRE(score3t == Catch::Approx(0.97333f).epsilon(raw.epsilon));
}
TEST_CASE("KDB with hyperparameters", "[Models]")
{
@@ -324,11 +341,15 @@ TEST_CASE("KDB with hyperparameters", "[Models]")
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
}
TEST_CASE("Incorrect type of data for SPODELd", "[Models]")
TEST_CASE("Incorrect type of data for Ld models", "[Models]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::SPODELd(0);
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
auto clfs = bayesnet::SPODELd(0);
REQUIRE_THROWS_AS(clfs.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
auto clft = bayesnet::TANLd();
REQUIRE_THROWS_AS(clft.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
auto clfk = bayesnet::KDBLd(0);
REQUIRE_THROWS_AS(clfk.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
}
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
{
@@ -386,14 +407,15 @@ TEST_CASE("Check proposal checkInput", "[Models]")
{
class testProposal : public bayesnet::Proposal {
public:
testProposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_)
: Proposal(dataset_, features_, className_)
testProposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_, std::vector<std::string>& notes_)
: Proposal(dataset_, features_, className_, notes_)
{
}
void test_X_y(const torch::Tensor& X, const torch::Tensor& y) { checkInput(X, y); }
};
auto raw = RawDatasets("iris", true);
auto clf = testProposal(raw.dataset, raw.features, raw.className);
std::vector<std::string> notes;
auto clf = testProposal(raw.dataset, raw.features, raw.className, notes);
torch::Tensor X = torch::randint(0, 3, { 10, 4 });
torch::Tensor y = torch::rand({ 10 });
INFO("Check X is not float");
@@ -428,3 +450,49 @@ TEST_CASE("Check KDB loop detection", "[Models]")
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 0, S, weights));
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 1, S, weights));
}
TEST_CASE("Local discretization hyperparameters", "[Models]")
{
auto raw = RawDatasets("iris", false);
auto clfs = bayesnet::SPODELd(0);
clfs.setHyperparameters({
{"max_iterations", 7},
{"verbose_convergence", true},
});
REQUIRE_NOTHROW(clfs.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
REQUIRE(clfs.getStatus() == bayesnet::NORMAL);
auto clfk = bayesnet::KDBLd(0);
clfk.setHyperparameters({
{"k", 3},
{"theta", 1e-4},
});
REQUIRE_NOTHROW(clfk.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
REQUIRE(clfk.getStatus() == bayesnet::NORMAL);
auto clfa = bayesnet::AODELd();
clfa.setHyperparameters({
{"ld_proposed_cuts", 9},
{"ld_algorithm", "BINQ"},
});
REQUIRE_NOTHROW(clfa.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
REQUIRE(clfa.getStatus() == bayesnet::NORMAL);
auto clft = bayesnet::TANLd();
clft.setHyperparameters({
{"ld_proposed_cuts", 7},
{"mdlp_max_depth", 5},
{"mdlp_min_length", 3},
{"ld_algorithm", "MDLP"},
});
REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
REQUIRE(clft.getStatus() == bayesnet::NORMAL);
clft.setHyperparameters({
{"ld_proposed_cuts", 9},
{"ld_algorithm", "BINQ"},
});
REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
REQUIRE(clft.getStatus() == bayesnet::NORMAL);
clft.setHyperparameters({
{"ld_proposed_cuts", 5},
{"ld_algorithm", "BINU"},
});
REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
REQUIRE(clft.getStatus() == bayesnet::NORMAL);
}

View File

@@ -338,6 +338,190 @@ TEST_CASE("Test Bayesian Network", "[Network]")
REQUIRE_THROWS_AS(net5.addEdge("A", "B"), std::logic_error);
REQUIRE_THROWS_WITH(net5.addEdge("A", "B"), "Cannot add edge to a fitted network. Initialize first.");
}
SECTION("Test assignment operator")
{
INFO("Test assignment operator");
// Create original network
auto net1 = bayesnet::Network();
buildModel(net1, raw.features, raw.className);
net1.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
// Create empty network and assign
auto net2 = bayesnet::Network();
net2.addNode("TempNode"); // Add something to make sure it gets cleared
net2 = net1;
// Verify they are equal
REQUIRE(net1.getFeatures() == net2.getFeatures());
REQUIRE(net1.getEdges() == net2.getEdges());
REQUIRE(net1.getNumEdges() == net2.getNumEdges());
REQUIRE(net1.getStates() == net2.getStates());
REQUIRE(net1.getClassName() == net2.getClassName());
REQUIRE(net1.getClassNumStates() == net2.getClassNumStates());
REQUIRE(net1.getSamples().size(0) == net2.getSamples().size(0));
REQUIRE(net1.getSamples().size(1) == net2.getSamples().size(1));
REQUIRE(net1.getNodes().size() == net2.getNodes().size());
// Verify topology equality
REQUIRE(net1 == net2);
// Verify they are separate objects by modifying one
net2.initialize();
net2.addNode("OnlyInNet2");
REQUIRE(net1.getNodes().size() != net2.getNodes().size());
REQUIRE_FALSE(net1 == net2);
}
SECTION("Test self assignment")
{
INFO("Test self assignment");
buildModel(net, raw.features, raw.className);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
int original_edges = net.getNumEdges();
int original_nodes = net.getNodes().size();
// Self assignment should not corrupt the network
net = net;
auto all_features = raw.features;
all_features.push_back(raw.className);
REQUIRE(net.getNumEdges() == original_edges);
REQUIRE(net.getNodes().size() == original_nodes);
REQUIRE(net.getFeatures() == all_features);
REQUIRE(net.getClassName() == raw.className);
}
SECTION("Test operator== topology comparison")
{
INFO("Test operator== topology comparison");
// Test 1: Two identical networks
auto net1 = bayesnet::Network();
auto net2 = bayesnet::Network();
net1.addNode("A");
net1.addNode("B");
net1.addNode("C");
net1.addEdge("A", "B");
net1.addEdge("B", "C");
net2.addNode("A");
net2.addNode("B");
net2.addNode("C");
net2.addEdge("A", "B");
net2.addEdge("B", "C");
REQUIRE(net1 == net2);
// Test 2: Different nodes
auto net3 = bayesnet::Network();
net3.addNode("A");
net3.addNode("D"); // Different node
REQUIRE_FALSE(net1 == net3);
// Test 3: Same nodes, different edges
auto net4 = bayesnet::Network();
net4.addNode("A");
net4.addNode("B");
net4.addNode("C");
net4.addEdge("A", "C"); // Different topology
net4.addEdge("B", "C");
REQUIRE_FALSE(net1 == net4);
// Test 4: Empty networks
auto net5 = bayesnet::Network();
auto net6 = bayesnet::Network();
REQUIRE(net5 == net6);
// Test 5: Same topology, different edge order
auto net7 = bayesnet::Network();
net7.addNode("A");
net7.addNode("B");
net7.addNode("C");
net7.addEdge("B", "C"); // Add edges in different order
net7.addEdge("A", "B");
REQUIRE(net1 == net7); // Should still be equal
}
SECTION("Test RAII compliance with smart pointers")
{
INFO("Test RAII compliance with smart pointers");
std::unique_ptr<bayesnet::Network> net1 = std::make_unique<bayesnet::Network>();
buildModel(*net1, raw.features, raw.className);
net1->fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
// Test that copy constructor works with smart pointers
std::unique_ptr<bayesnet::Network> net2 = std::make_unique<bayesnet::Network>(*net1);
REQUIRE(*net1 == *net2);
REQUIRE(net1->getNumEdges() == net2->getNumEdges());
REQUIRE(net1->getNodes().size() == net2->getNodes().size());
// Destroy original
net1.reset();
// Test predictions still work
std::vector<std::vector<int>> test = { {1}, {2}, {0}, {1} };
REQUIRE_NOTHROW(net2->predict(test));
// net2 should still be valid and functional
net2->initialize();
REQUIRE_NOTHROW(net2->addNode("NewNode"));
REQUIRE(net2->getNodes().count("NewNode") == 1);
}
SECTION("Test complex topology copy")
{
INFO("Test complex topology copy");
auto original = bayesnet::Network();
// Create a more complex network
original.addNode("Root");
original.addNode("Child1");
original.addNode("Child2");
original.addNode("Grandchild1");
original.addNode("Grandchild2");
original.addNode("Grandchild3");
original.addEdge("Root", "Child1");
original.addEdge("Root", "Child2");
original.addEdge("Child1", "Grandchild1");
original.addEdge("Child1", "Grandchild2");
original.addEdge("Child2", "Grandchild3");
// Copy it
auto copy = original;
// Verify topology is identical
REQUIRE(original == copy);
REQUIRE(original.getNodes().size() == copy.getNodes().size());
REQUIRE(original.getNumEdges() == copy.getNumEdges());
// Verify edges are properly reconstructed
auto originalEdges = original.getEdges();
auto copyEdges = copy.getEdges();
REQUIRE(originalEdges.size() == copyEdges.size());
// Verify node relationships are properly copied
for (const auto& nodePair : original.getNodes()) {
const std::string& nodeName = nodePair.first;
auto* originalNode = nodePair.second.get();
auto* copyNode = copy.getNodes().at(nodeName).get();
REQUIRE(originalNode->getParents().size() == copyNode->getParents().size());
REQUIRE(originalNode->getChildren().size() == copyNode->getChildren().size());
// Verify parent names match
for (size_t i = 0; i < originalNode->getParents().size(); ++i) {
REQUIRE(originalNode->getParents()[i]->getName() ==
copyNode->getParents()[i]->getName());
}
// Verify child names match
for (size_t i = 0; i < originalNode->getChildren().size(); ++i) {
REQUIRE(originalNode->getChildren()[i]->getName() ==
copyNode->getChildren()[i]->getName());
}
}
}
}
TEST_CASE("Test and empty Node", "[Network]")

View File

@@ -158,4 +158,48 @@ TEST_CASE("TEST MinFill method", "[Node]")
REQUIRE(node_2.minFill() == 6);
REQUIRE(node_3.minFill() == 3);
REQUIRE(node_4.minFill() == 1);
}
TEST_CASE("Test operator =", "[Node]")
{
// Generate a test to test the operator = of the Node class
// Create a node with 3 parents and 2 children
auto node = bayesnet::Node("N1");
auto parent_1 = bayesnet::Node("P1");
parent_1.setNumStates(3);
auto child_1 = bayesnet::Node("H1");
child_1.setNumStates(2);
node.addParent(&parent_1);
node.addChild(&child_1);
// Create a cpt in the node using computeCPT
auto dataset = torch::tensor({ {1, 0, 0, 1}, {0, 1, 2, 1}, {0, 1, 1, 0} });
auto states = std::vector<int>({ 2, 3, 3 });
auto features = std::vector<std::string>{ "N1", "P1", "H1" };
auto className = std::string("Class");
auto weights = torch::tensor({ 1.0, 1.0, 1.0, 1.0 }, torch::kDouble);
node.setNumStates(2);
node.computeCPT(dataset, features, 0.0, weights);
// Get the cpt of the node
auto cpt = node.getCPT();
// Check that the cpt is not empty
REQUIRE(cpt.numel() > 0);
// Check that the cpt has the correct dimensions
auto dimensions = cpt.sizes();
REQUIRE(dimensions.size() == 2);
REQUIRE(dimensions[0] == 2); // Number of states of the node
REQUIRE(dimensions[1] == 3); // Number of states of the first parent
// Create a copy of the node
bayesnet::Node node_copy("XX");
node_copy = node;
// Check that the copy has not any parents or children
auto parents = node_copy.getParents();
auto children = node_copy.getChildren();
REQUIRE(parents.size() == 0);
REQUIRE(children.size() == 0);
// Check that the copy has the same name
REQUIRE(node_copy.getName() == "N1");
// Check that the copy has the same cpt
auto cpt_copy = node_copy.getCPT();
REQUIRE(cpt_copy.equal(cpt));
// Check that the copy has the same number of states
REQUIRE(node_copy.getNumStates() == node.getNumStates());
}

View File

@@ -16,10 +16,10 @@
#include "TestUtils.h"
std::map<std::string, std::string> modules = {
{ "mdlp", "2.1.0" },
{ "Folding", "1.1.1" },
{ "mdlp", "2.1.1" },
{ "Folding", "1.1.2" },
{ "json", "3.11" },
{ "ArffFiles", "1.2.0" }
{ "ArffFiles", "1.2.1" }
};
TEST_CASE("MDLP", "[Modules]")