From 97894cc49c71282ca89a882b6ca70fe9c3f2fd23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sun, 6 Jul 2025 18:49:05 +0200 Subject: [PATCH 01/11] First approach with derived class --- ITERATIVE_PROPOSAL_README.md | 114 ++++++++++++++++ Makefile.iterative | 20 +++ bayesnet/classifiers/IterativeProposal.cc | 151 ++++++++++++++++++++++ bayesnet/classifiers/IterativeProposal.h | 50 +++++++ bayesnet/classifiers/TANLdi.cc | 45 +++++++ bayesnet/classifiers/TANLdi.h | 24 ++++ test_iterative_proposal.cpp | 66 ++++++++++ 7 files changed, 470 insertions(+) create mode 100644 ITERATIVE_PROPOSAL_README.md create mode 100644 Makefile.iterative create mode 100644 bayesnet/classifiers/IterativeProposal.cc create mode 100644 bayesnet/classifiers/IterativeProposal.h create mode 100644 bayesnet/classifiers/TANLdi.cc create mode 100644 bayesnet/classifiers/TANLdi.h create mode 100644 test_iterative_proposal.cpp diff --git a/ITERATIVE_PROPOSAL_README.md b/ITERATIVE_PROPOSAL_README.md new file mode 100644 index 0000000..a1683a9 --- /dev/null +++ b/ITERATIVE_PROPOSAL_README.md @@ -0,0 +1,114 @@ +# Iterative Proposal Implementation + +This implementation extends the existing local discretization framework with iterative convergence capabilities, following the analysis from `local_discretization_analysis.md`. + +## Key Components + +### 1. IterativeProposal Class +- **File**: `bayesnet/classifiers/IterativeProposal.h|cc` +- **Purpose**: Extends the base `Proposal` class with iterative convergence logic +- **Key Method**: `iterativeLocalDiscretization()` - performs iterative refinement until convergence + +### 2. TANLdIterative Example +- **File**: `bayesnet/classifiers/TANLdIterative.h|cc` +- **Purpose**: Demonstrates how to adapt existing Ld classifiers to use iterative discretization +- **Pattern**: Inherits from both `TAN` and `IterativeProposal` + +## Architecture + +The implementation follows the established dual inheritance pattern: + +```cpp +class TANLdIterative : public TAN, public IterativeProposal +``` + +This maintains the same interface as existing Ld classifiers while adding convergence capabilities. + +## Convergence Algorithm + +The iterative process works as follows: + +1. **Initial Discretization**: Use class-only discretization (`fit_local_discretization()`) +2. **Iterative Refinement Loop**: + - Build model with current discretization (call parent `fit()`) + - Refine discretization using network structure (`localDiscretizationProposal()`) + - Compute convergence metric (likelihood or accuracy) + - Check for convergence based on tolerance + - Repeat until convergence or max iterations reached + +## Configuration Parameters + +- `max_iterations`: Maximum number of iterations (default: 10) +- `tolerance`: Convergence tolerance (default: 1e-6) +- `convergence_metric`: "likelihood" or "accuracy" (default: "likelihood") +- `verbose_convergence`: Enable verbose logging (default: false) + +## Usage Example + +```cpp +#include "bayesnet/classifiers/TANLdIterative.h" + +// Create classifier +bayesnet::TANLdIterative classifier; + +// Set convergence parameters +nlohmann::json hyperparams; +hyperparams["max_iterations"] = 5; +hyperparams["tolerance"] = 1e-4; +hyperparams["convergence_metric"] = "likelihood"; +hyperparams["verbose_convergence"] = true; + +classifier.setHyperparameters(hyperparams); + +// Fit and use normally +classifier.fit(X, y, features, className, states, smoothing); +auto predictions = classifier.predict(X_test); +``` + +## Testing + +Run the test with: +```bash +make -f Makefile.iterative test-iterative +``` + +## Integration with Existing Code + +To convert existing Ld classifiers to use iterative discretization: + +1. Change inheritance from `Proposal` to `IterativeProposal` +2. Replace the discretization logic in `fit()` method: + ```cpp + // Old approach: + states = fit_local_discretization(y); + TAN::fit(dataset, features, className, states, smoothing); + states = localDiscretizationProposal(states, model); + + // New approach: + states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); + TAN::fit(dataset, features, className, states, smoothing); + ``` + +## Benefits + +1. **Convergence**: Iterative refinement until stable discretization +2. **Flexibility**: Configurable convergence criteria and limits +3. **Compatibility**: Maintains existing interface and patterns +4. **Monitoring**: Optional verbose logging for convergence tracking +5. **Extensibility**: Easy to add new convergence metrics or stopping criteria + +## Performance Considerations + +- Iterative approach will be slower than the original two-phase method +- Convergence monitoring adds computational overhead +- Consider setting appropriate `max_iterations` to prevent infinite loops +- The `tolerance` parameter should be tuned based on your specific use case + +## Future Enhancements + +Potential improvements: +1. Add more convergence metrics (e.g., AIC, BIC, cross-validation score) +2. Implement early stopping based on validation performance +3. Add support for different discretization schedules +4. Optimize likelihood computation for better performance +5. Add convergence visualization and reporting tools \ No newline at end of file diff --git a/Makefile.iterative b/Makefile.iterative new file mode 100644 index 0000000..84e3546 --- /dev/null +++ b/Makefile.iterative @@ -0,0 +1,20 @@ +# Makefile for testing iterative proposal implementation +# Include this in the main Makefile or use directly + +# Test iterative proposal +test-iterative: buildd + @echo "Building iterative proposal test..." + cd build_Debug && g++ -std=c++17 -I../bayesnet -I../config -I/usr/local/include \ + ../test_iterative_proposal.cpp \ + -L. -lbayesnet \ + -ltorch -ltorch_cpu \ + -pthread \ + -o test_iterative_proposal + @echo "Running iterative proposal test..." + cd build_Debug && ./test_iterative_proposal + +# Clean test +clean-test: + rm -f build_Debug/test_iterative_proposal + +.PHONY: test-iterative clean-test \ No newline at end of file diff --git a/bayesnet/classifiers/IterativeProposal.cc b/bayesnet/classifiers/IterativeProposal.cc new file mode 100644 index 0000000..7973644 --- /dev/null +++ b/bayesnet/classifiers/IterativeProposal.cc @@ -0,0 +1,151 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#include "IterativeProposal.h" +#include +#include + +namespace bayesnet { + + IterativeProposal::IterativeProposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_) + : Proposal(pDataset, features_, className_) {} + + void IterativeProposal::setHyperparameters(const nlohmann::json& hyperparameters_) { + // First set base Proposal hyperparameters + Proposal::setHyperparameters(hyperparameters_); + + // Then set IterativeProposal specific hyperparameters + if (hyperparameters_.contains("max_iterations")) { + convergence_params.maxIterations = hyperparameters_["max_iterations"]; + } + if (hyperparameters_.contains("tolerance")) { + convergence_params.tolerance = hyperparameters_["tolerance"]; + } + if (hyperparameters_.contains("convergence_metric")) { + convergence_params.convergenceMetric = hyperparameters_["convergence_metric"]; + } + if (hyperparameters_.contains("verbose_convergence")) { + convergence_params.verbose = hyperparameters_["verbose_convergence"]; + } + } + + template + map> IterativeProposal::iterativeLocalDiscretization( + const torch::Tensor& y, + Classifier* classifier, + const torch::Tensor& dataset, + const std::vector& features, + const std::string& className, + const map>& initialStates, + double smoothing + ) { + // Phase 1: Initial discretization (same as original) + auto currentStates = fit_local_discretization(y); + + double previousValue = -std::numeric_limits::infinity(); + double currentValue = 0.0; + + if (convergence_params.verbose) { + std::cout << "Starting iterative local discretization with " + << convergence_params.maxIterations << " max iterations" << std::endl; + } + + for (int iteration = 0; iteration < convergence_params.maxIterations; ++iteration) { + if (convergence_params.verbose) { + std::cout << "Iteration " << (iteration + 1) << "/" << convergence_params.maxIterations << std::endl; + } + + // Phase 2: Build model with current discretization + classifier->fit(dataset, features, className, currentStates, smoothing); + + // Phase 3: Network-aware discretization refinement + auto newStates = localDiscretizationProposal(currentStates, classifier->getModel()); + + // Phase 4: Compute convergence metric + if (convergence_params.convergenceMetric == "likelihood") { + currentValue = computeLogLikelihood(classifier->getModel(), dataset); + } else if (convergence_params.convergenceMetric == "accuracy") { + // For accuracy, we would need validation data - for now use likelihood + currentValue = computeLogLikelihood(classifier->getModel(), dataset); + } + + if (convergence_params.verbose) { + std::cout << " " << convergence_params.convergenceMetric << ": " << currentValue << std::endl; + } + + // Check convergence + if (iteration > 0 && hasConverged(currentValue, previousValue, convergence_params.convergenceMetric)) { + if (convergence_params.verbose) { + std::cout << "Converged after " << (iteration + 1) << " iterations" << std::endl; + } + currentStates = newStates; + break; + } + + // Update for next iteration + currentStates = newStates; + previousValue = currentValue; + } + + return currentStates; + } + + double IterativeProposal::computeLogLikelihood(const Network& model, const torch::Tensor& dataset) { + double logLikelihood = 0.0; + int n_samples = dataset.size(0); + int n_features = dataset.size(1); + + for (int i = 0; i < n_samples; ++i) { + double sampleLogLikelihood = 0.0; + + // Get class value for this sample + int classValue = dataset[i][n_features - 1].item(); + + // Compute log-likelihood for each feature given its parents and class + for (const auto& node : model.getNodes()) { + if (node.getName() == model.getClassName()) { + // For class node, add log P(class) + auto classCounts = node.getCPT(); + double classProb = classCounts[classValue] / dataset.size(0); + sampleLogLikelihood += std::log(std::max(classProb, 1e-10)); + } else { + // For feature nodes, add log P(feature | parents, class) + int featureIdx = std::distance(model.getFeatures().begin(), + std::find(model.getFeatures().begin(), + model.getFeatures().end(), + node.getName())); + int featureValue = dataset[i][featureIdx].item(); + + // Simplified probability computation - in practice would need full CPT lookup + double featureProb = 0.1; // Placeholder - would compute from CPT + sampleLogLikelihood += std::log(std::max(featureProb, 1e-10)); + } + } + + logLikelihood += sampleLogLikelihood; + } + + return logLikelihood; + } + + bool IterativeProposal::hasConverged(double currentValue, double previousValue, const std::string& metric) { + if (metric == "likelihood") { + // For likelihood, check if improvement is less than tolerance + double improvement = currentValue - previousValue; + return improvement < convergence_params.tolerance; + } else if (metric == "accuracy") { + // For accuracy, check if change is less than tolerance + double change = std::abs(currentValue - previousValue); + return change < convergence_params.tolerance; + } + return false; + } + + // Explicit template instantiation for common classifier types + template map> IterativeProposal::iterativeLocalDiscretization( + const torch::Tensor&, Classifier*, const torch::Tensor&, const std::vector&, + const std::string&, const map>&, double); +} \ No newline at end of file diff --git a/bayesnet/classifiers/IterativeProposal.h b/bayesnet/classifiers/IterativeProposal.h new file mode 100644 index 0000000..4453c2c --- /dev/null +++ b/bayesnet/classifiers/IterativeProposal.h @@ -0,0 +1,50 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#ifndef ITERATIVE_PROPOSAL_H +#define ITERATIVE_PROPOSAL_H + +#include "Proposal.h" +#include "bayesnet/network/Network.h" +#include + +namespace bayesnet { + class IterativeProposal : public Proposal { + public: + IterativeProposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_); + void setHyperparameters(const nlohmann::json& hyperparameters_); + + protected: + template + map> iterativeLocalDiscretization( + const torch::Tensor& y, + Classifier* classifier, + const torch::Tensor& dataset, + const std::vector& features, + const std::string& className, + const map>& initialStates, + double smoothing = 1.0 + ); + + // Convergence parameters + struct { + int maxIterations = 10; + double tolerance = 1e-6; + std::string convergenceMetric = "likelihood"; // "likelihood" or "accuracy" + bool verbose = false; + } convergence_params; + + nlohmann::json validHyperparameters_iter = { + "max_iterations", "tolerance", "convergence_metric", "verbose_convergence" + }; + + private: + double computeLogLikelihood(const Network& model, const torch::Tensor& dataset); + bool hasConverged(double currentValue, double previousValue, const std::string& metric); + }; +} + +#endif \ No newline at end of file diff --git a/bayesnet/classifiers/TANLdi.cc b/bayesnet/classifiers/TANLdi.cc new file mode 100644 index 0000000..4df5d5c --- /dev/null +++ b/bayesnet/classifiers/TANLdi.cc @@ -0,0 +1,45 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#include "TANLdi.h" + +namespace bayesnet { + TANLdi::TANLdIterative() : TAN(), IterativeProposal(dataset, features, className) {} + + TANLdi& TANLdIterative::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) + { + checkInput(X_, y_); + features = features_; + className = className_; + Xf = X_; + y = y_; + + // Use iterative local discretization instead of the two-phase approach + states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); + + // Final fit with converged discretization + TAN::fit(dataset, features, className, states, smoothing); + + return *this; + } + + torch::Tensor TANLdi::predict(torch::Tensor& X) + { + auto Xt = prepareX(X); + return TAN::predict(Xt); + } + + torch::Tensor TANLdi::predict_proba(torch::Tensor& X) + { + auto Xt = prepareX(X); + return TAN::predict_proba(Xt); + } + + std::vector TANLdi::graph(const std::string& name) const + { + return TAN::graph(name); + } +} \ No newline at end of file diff --git a/bayesnet/classifiers/TANLdi.h b/bayesnet/classifiers/TANLdi.h new file mode 100644 index 0000000..f850a59 --- /dev/null +++ b/bayesnet/classifiers/TANLdi.h @@ -0,0 +1,24 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#ifndef TANLDI_H +#define TANLDI_H +#include "TAN.h" +#include "IterativeProposal.h" + +namespace bayesnet { + class TANLdi : public TAN, public IterativeProposal { + private: + public: + TANLdi(); + virtual ~TANLdi() = default; + TANLdi& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + std::vector graph(const std::string& name = "TANLdi") const override; + torch::Tensor predict(torch::Tensor& X) override; + torch::Tensor predict_proba(torch::Tensor& X) override; + }; +} +#endif // !TANLDI_H \ No newline at end of file diff --git a/test_iterative_proposal.cpp b/test_iterative_proposal.cpp new file mode 100644 index 0000000..a6574c8 --- /dev/null +++ b/test_iterative_proposal.cpp @@ -0,0 +1,66 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#include +#include +#include +#include "bayesnet/classifiers/TANLdIterative.h" + +using json = nlohmann::json; + +int main() { + std::cout << "Testing Iterative Proposal Implementation" << std::endl; + + // Create synthetic continuous data + torch::Tensor X = torch::rand({100, 3}); // 100 samples, 3 features + torch::Tensor y = torch::randint(0, 2, {100}); // Binary classification + + // Create feature names + std::vector features = {"feature1", "feature2", "feature3"}; + std::string className = "class"; + + // Create initial states (will be updated by discretization) + std::map> states; + states[className] = {0, 1}; + + // Create classifier + bayesnet::TANLdIterative classifier; + + // Set convergence hyperparameters + json hyperparams; + hyperparams["max_iterations"] = 5; + hyperparams["tolerance"] = 1e-4; + hyperparams["convergence_metric"] = "likelihood"; + hyperparams["verbose_convergence"] = true; + + classifier.setHyperparameters(hyperparams); + + try { + // Fit the model + std::cout << "Fitting TANLdIterative classifier..." << std::endl; + classifier.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE); + + // Make predictions + torch::Tensor X_test = torch::rand({10, 3}); + torch::Tensor predictions = classifier.predict(X_test); + torch::Tensor probabilities = classifier.predict_proba(X_test); + + std::cout << "Predictions: " << predictions << std::endl; + std::cout << "Probabilities shape: " << probabilities.sizes() << std::endl; + + // Generate graph + auto graph = classifier.graph(); + std::cout << "Graph nodes: " << graph.size() << std::endl; + + std::cout << "Test completed successfully!" << std::endl; + + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + return 0; +} \ No newline at end of file From 62fa85a1b3acda34e350383feee10bf34933d6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Mon, 7 Jul 2025 00:37:16 +0200 Subject: [PATCH 02/11] Complete proposal --- Makefile | 15 +- bayesnet/classifiers/IterativeProposal.cc | 151 ------------------ bayesnet/classifiers/IterativeProposal.h | 50 ------ bayesnet/classifiers/KDBLd.cc | 11 +- bayesnet/classifiers/Proposal.cc | 102 ++++++++++++ bayesnet/classifiers/Proposal.h | 27 +++- bayesnet/classifiers/TANLd.cc | 12 +- bayesnet/classifiers/TANLdi.cc | 45 ------ bayesnet/classifiers/TANLdi.h | 24 --- bayesnet/network/Network.cc | 121 +++++++++++++- bayesnet/network/Network.h | 4 +- bayesnet/network/Node.cc | 35 +++++ bayesnet/network/Node.h | 3 + tests/TestBayesNetwork.cc | 182 ++++++++++++++++++++++ 14 files changed, 492 insertions(+), 290 deletions(-) delete mode 100644 bayesnet/classifiers/IterativeProposal.cc delete mode 100644 bayesnet/classifiers/IterativeProposal.h delete mode 100644 bayesnet/classifiers/TANLdi.cc delete mode 100644 bayesnet/classifiers/TANLdi.h diff --git a/Makefile b/Makefile index e5fcfce..bad7fa9 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,14 @@ mansrcdir = docs/man3 mandestdir = /usr/local/share/man sed_command_link = 's/e">LCOV -/e">Back to manual<\/a> LCOV -/g' sed_command_diagram = 's/Diagram"/Diagram" width="100%" height="100%" /g' +# Set the number of parallel jobs to the number of available processors minus 7 +CPUS := $(shell getconf _NPROCESSORS_ONLN 2>/dev/null \ + || nproc --all 2>/dev/null \ + || sysctl -n hw.ncpu) + +# --- Your desired job count: CPUs – 7, but never less than 1 -------------- +JOBS := $(shell n=$(CPUS); [ $${n} -gt 7 ] && echo $$((n-7)) || echo 1) + define ClearTests @for t in $(test_targets); do \ @@ -36,6 +44,7 @@ define setup_target @if [ -d $(2) ]; then rm -fr $(2); fi @conan install . --build=missing -of $(2) -s build_type=$(1) @cmake -S . -B $(2) -DCMAKE_TOOLCHAIN_FILE=$(2)/build/$(1)/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=$(1) -D$(3) + @echo ">>> Will build using $(JOBS) parallel jobs" @echo ">>> Done" endef @@ -72,10 +81,10 @@ release: ## Setup release version using Conan @$(call setup_target,"Release","$(f_release)","ENABLE_TESTING=OFF") buildd: ## Build the debug targets - cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL) + cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(JOBS) buildr: ## Build the release targets - cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL) + cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(JOBS) # Install targets @@ -105,7 +114,7 @@ opt = "" test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section @echo ">>> Running BayesNet tests..."; @$(MAKE) clean-test - @cmake --build $(f_debug) -t $(test_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL) + @cmake --build $(f_debug) -t $(test_targets) --parallel $(JOBS) @for t in $(test_targets); do \ echo ">>> Running $$t...";\ if [ -f $(f_debug)/tests/$$t ]; then \ diff --git a/bayesnet/classifiers/IterativeProposal.cc b/bayesnet/classifiers/IterativeProposal.cc deleted file mode 100644 index 7973644..0000000 --- a/bayesnet/classifiers/IterativeProposal.cc +++ /dev/null @@ -1,151 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#include "IterativeProposal.h" -#include -#include - -namespace bayesnet { - - IterativeProposal::IterativeProposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_) - : Proposal(pDataset, features_, className_) {} - - void IterativeProposal::setHyperparameters(const nlohmann::json& hyperparameters_) { - // First set base Proposal hyperparameters - Proposal::setHyperparameters(hyperparameters_); - - // Then set IterativeProposal specific hyperparameters - if (hyperparameters_.contains("max_iterations")) { - convergence_params.maxIterations = hyperparameters_["max_iterations"]; - } - if (hyperparameters_.contains("tolerance")) { - convergence_params.tolerance = hyperparameters_["tolerance"]; - } - if (hyperparameters_.contains("convergence_metric")) { - convergence_params.convergenceMetric = hyperparameters_["convergence_metric"]; - } - if (hyperparameters_.contains("verbose_convergence")) { - convergence_params.verbose = hyperparameters_["verbose_convergence"]; - } - } - - template - map> IterativeProposal::iterativeLocalDiscretization( - const torch::Tensor& y, - Classifier* classifier, - const torch::Tensor& dataset, - const std::vector& features, - const std::string& className, - const map>& initialStates, - double smoothing - ) { - // Phase 1: Initial discretization (same as original) - auto currentStates = fit_local_discretization(y); - - double previousValue = -std::numeric_limits::infinity(); - double currentValue = 0.0; - - if (convergence_params.verbose) { - std::cout << "Starting iterative local discretization with " - << convergence_params.maxIterations << " max iterations" << std::endl; - } - - for (int iteration = 0; iteration < convergence_params.maxIterations; ++iteration) { - if (convergence_params.verbose) { - std::cout << "Iteration " << (iteration + 1) << "/" << convergence_params.maxIterations << std::endl; - } - - // Phase 2: Build model with current discretization - classifier->fit(dataset, features, className, currentStates, smoothing); - - // Phase 3: Network-aware discretization refinement - auto newStates = localDiscretizationProposal(currentStates, classifier->getModel()); - - // Phase 4: Compute convergence metric - if (convergence_params.convergenceMetric == "likelihood") { - currentValue = computeLogLikelihood(classifier->getModel(), dataset); - } else if (convergence_params.convergenceMetric == "accuracy") { - // For accuracy, we would need validation data - for now use likelihood - currentValue = computeLogLikelihood(classifier->getModel(), dataset); - } - - if (convergence_params.verbose) { - std::cout << " " << convergence_params.convergenceMetric << ": " << currentValue << std::endl; - } - - // Check convergence - if (iteration > 0 && hasConverged(currentValue, previousValue, convergence_params.convergenceMetric)) { - if (convergence_params.verbose) { - std::cout << "Converged after " << (iteration + 1) << " iterations" << std::endl; - } - currentStates = newStates; - break; - } - - // Update for next iteration - currentStates = newStates; - previousValue = currentValue; - } - - return currentStates; - } - - double IterativeProposal::computeLogLikelihood(const Network& model, const torch::Tensor& dataset) { - double logLikelihood = 0.0; - int n_samples = dataset.size(0); - int n_features = dataset.size(1); - - for (int i = 0; i < n_samples; ++i) { - double sampleLogLikelihood = 0.0; - - // Get class value for this sample - int classValue = dataset[i][n_features - 1].item(); - - // Compute log-likelihood for each feature given its parents and class - for (const auto& node : model.getNodes()) { - if (node.getName() == model.getClassName()) { - // For class node, add log P(class) - auto classCounts = node.getCPT(); - double classProb = classCounts[classValue] / dataset.size(0); - sampleLogLikelihood += std::log(std::max(classProb, 1e-10)); - } else { - // For feature nodes, add log P(feature | parents, class) - int featureIdx = std::distance(model.getFeatures().begin(), - std::find(model.getFeatures().begin(), - model.getFeatures().end(), - node.getName())); - int featureValue = dataset[i][featureIdx].item(); - - // Simplified probability computation - in practice would need full CPT lookup - double featureProb = 0.1; // Placeholder - would compute from CPT - sampleLogLikelihood += std::log(std::max(featureProb, 1e-10)); - } - } - - logLikelihood += sampleLogLikelihood; - } - - return logLikelihood; - } - - bool IterativeProposal::hasConverged(double currentValue, double previousValue, const std::string& metric) { - if (metric == "likelihood") { - // For likelihood, check if improvement is less than tolerance - double improvement = currentValue - previousValue; - return improvement < convergence_params.tolerance; - } else if (metric == "accuracy") { - // For accuracy, check if change is less than tolerance - double change = std::abs(currentValue - previousValue); - return change < convergence_params.tolerance; - } - return false; - } - - // Explicit template instantiation for common classifier types - template map> IterativeProposal::iterativeLocalDiscretization( - const torch::Tensor&, Classifier*, const torch::Tensor&, const std::vector&, - const std::string&, const map>&, double); -} \ No newline at end of file diff --git a/bayesnet/classifiers/IterativeProposal.h b/bayesnet/classifiers/IterativeProposal.h deleted file mode 100644 index 4453c2c..0000000 --- a/bayesnet/classifiers/IterativeProposal.h +++ /dev/null @@ -1,50 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#ifndef ITERATIVE_PROPOSAL_H -#define ITERATIVE_PROPOSAL_H - -#include "Proposal.h" -#include "bayesnet/network/Network.h" -#include - -namespace bayesnet { - class IterativeProposal : public Proposal { - public: - IterativeProposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_); - void setHyperparameters(const nlohmann::json& hyperparameters_); - - protected: - template - map> iterativeLocalDiscretization( - const torch::Tensor& y, - Classifier* classifier, - const torch::Tensor& dataset, - const std::vector& features, - const std::string& className, - const map>& initialStates, - double smoothing = 1.0 - ); - - // Convergence parameters - struct { - int maxIterations = 10; - double tolerance = 1e-6; - std::string convergenceMetric = "likelihood"; // "likelihood" or "accuracy" - bool verbose = false; - } convergence_params; - - nlohmann::json validHyperparameters_iter = { - "max_iterations", "tolerance", "convergence_metric", "verbose_convergence" - }; - - private: - double computeLogLikelihood(const Network& model, const torch::Tensor& dataset); - bool hasConverged(double currentValue, double previousValue, const std::string& metric); - }; -} - -#endif \ No newline at end of file diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index e112c1c..541e005 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -33,12 +33,13 @@ namespace bayesnet { className = className_; Xf = X_; y = y_; - // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - states = fit_local_discretization(y); - // We have discretized the input data - // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network + + // Use iterative local discretization instead of the two-phase approach + states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); + + // Final fit with converged discretization KDB::fit(dataset, features, className, states, smoothing); - states = localDiscretizationProposal(states, model); + return *this; } torch::Tensor KDBLd::predict(torch::Tensor& X) diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index 3ef8a78..aa0698d 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -5,6 +5,9 @@ // *************************************************************** #include "Proposal.h" +#include +#include +#include namespace bayesnet { Proposal::Proposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) @@ -38,6 +41,15 @@ namespace bayesnet { throw std::invalid_argument("Invalid discretization algorithm: " + algorithm.get()); } } + // Convergence parameters + if (hyperparameters.contains("max_iterations")) { + convergence_params.maxIterations = hyperparameters["max_iterations"]; + hyperparameters.erase("max_iterations"); + } + if (hyperparameters.contains("verbose_convergence")) { + convergence_params.verbose = hyperparameters["verbose_convergence"]; + hyperparameters.erase("verbose_convergence"); + } if (!hyperparameters.empty()) { throw std::invalid_argument("Invalid hyperparameters for Proposal: " + hyperparameters.dump()); } @@ -163,4 +175,94 @@ namespace bayesnet { } return yy; } + + template + map> Proposal::iterativeLocalDiscretization( + const torch::Tensor& y, + Classifier* classifier, + const torch::Tensor& dataset, + const std::vector& features, + const std::string& className, + const map>& initialStates, + Smoothing_t smoothing + ) + { + // Phase 1: Initial discretization (same as original) + auto currentStates = fit_local_discretization(y); + auto previousModel = Network(); + + if (convergence_params.verbose) { + std::cout << "Starting iterative local discretization with " + << convergence_params.maxIterations << " max iterations" << std::endl; + } + + for (int iteration = 0; iteration < convergence_params.maxIterations; ++iteration) { + if (convergence_params.verbose) { + std::cout << "Iteration " << (iteration + 1) << "/" << convergence_params.maxIterations << std::endl; + } + + // Phase 2: Build model with current discretization + classifier->fit(dataset, features, className, currentStates, smoothing); + + // Phase 3: Network-aware discretization refinement + currentStates = localDiscretizationProposal(currentStates, classifier->model); + + // Check convergence + if (iteration > 0 && previousModel == classifier->model) { + if (convergence_params.verbose) { + std::cout << "Converged after " << (iteration + 1) << " iterations" << std::endl; + } + break; + } + + // Update for next iteration + previousModel = classifier->model; + } + + return currentStates; + } + + double Proposal::computeLogLikelihood(Network& model, const torch::Tensor& dataset) + { + double logLikelihood = 0.0; + int n_samples = dataset.size(0); + int n_features = dataset.size(1); + + for (int i = 0; i < n_samples; ++i) { + double sampleLogLikelihood = 0.0; + + // Get class value for this sample + int classValue = dataset[i][n_features - 1].item(); + + // Compute log-likelihood for each feature given its parents and class + for (const auto& node : model.getNodes()) { + if (node.first == model.getClassName()) { + // For class node, add log P(class) + auto classCounts = node.second->getCPT(); + double classProb = classCounts[classValue].item() / dataset.size(0); + sampleLogLikelihood += std::log(std::max(classProb, 1e-10)); + } else { + // For feature nodes, add log P(feature | parents, class) + int featureIdx = std::distance(model.getFeatures().begin(), + std::find(model.getFeatures().begin(), + model.getFeatures().end(), + node.first)); + int featureValue = dataset[i][featureIdx].item(); + + // Simplified probability computation - in practice would need full CPT lookup + double featureProb = 0.1; // Placeholder - would compute from CPT + sampleLogLikelihood += std::log(std::max(featureProb, 1e-10)); + } + } + + logLikelihood += sampleLogLikelihood; + } + + return logLikelihood; + } + + // Explicit template instantiation for common classifier types + // template map> Proposal::iterativeLocalDiscretization( + // const torch::Tensor&, Classifier*, const torch::Tensor&, const std::vector&, + // const std::string&, const map>&, Smoothing_t); } diff --git a/bayesnet/classifiers/Proposal.h b/bayesnet/classifiers/Proposal.h index 6823a38..150508a 100644 --- a/bayesnet/classifiers/Proposal.h +++ b/bayesnet/classifiers/Proposal.h @@ -25,18 +25,43 @@ namespace bayesnet { torch::Tensor prepareX(torch::Tensor& X); map> localDiscretizationProposal(const map>& states, Network& model); map> fit_local_discretization(const torch::Tensor& y); + + // Iterative discretization method + template + map> iterativeLocalDiscretization( + const torch::Tensor& y, + Classifier* classifier, + const torch::Tensor& dataset, + const std::vector& features, + const std::string& className, + const map>& initialStates, + const Smoothing_t smoothing + ); + torch::Tensor Xf; // X continuous nxm tensor torch::Tensor y; // y discrete nx1 tensor map> discretizers; + // MDLP parameters struct { size_t min_length = 3; // Minimum length of the interval to consider it in mdlp float proposed_cuts = 0.0; // Proposed cuts for the Discretization algorithm int max_depth = std::numeric_limits::max(); // Maximum depth of the MDLP tree } ld_params; - nlohmann::json validHyperparameters_ld = { "ld_algorithm", "ld_proposed_cuts", "mdlp_min_length", "mdlp_max_depth" }; + + // Convergence parameters + struct { + int maxIterations = 10; + bool verbose = false; + } convergence_params; + + nlohmann::json validHyperparameters_ld = { + "ld_algorithm", "ld_proposed_cuts", "mdlp_min_length", "mdlp_max_depth", + "max_iterations", "verbose_convergence" + }; private: std::vector factorize(const std::vector& labels_t); + double computeLogLikelihood(Network& model, const torch::Tensor& dataset); torch::Tensor& pDataset; // (n+1)xm tensor std::vector& pFeatures; std::string& pClassName; diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index f9418da..d5a8dda 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -15,14 +15,14 @@ namespace bayesnet { className = className_; Xf = X_; y = y_; - // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - states = fit_local_discretization(y); - // We have discretized the input data - // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network + + // Use iterative local discretization instead of the two-phase approach + states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); + + // Final fit with converged discretization TAN::fit(dataset, features, className, states, smoothing); - states = localDiscretizationProposal(states, model); + return *this; - } torch::Tensor TANLd::predict(torch::Tensor& X) { diff --git a/bayesnet/classifiers/TANLdi.cc b/bayesnet/classifiers/TANLdi.cc deleted file mode 100644 index 4df5d5c..0000000 --- a/bayesnet/classifiers/TANLdi.cc +++ /dev/null @@ -1,45 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#include "TANLdi.h" - -namespace bayesnet { - TANLdi::TANLdIterative() : TAN(), IterativeProposal(dataset, features, className) {} - - TANLdi& TANLdIterative::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) - { - checkInput(X_, y_); - features = features_; - className = className_; - Xf = X_; - y = y_; - - // Use iterative local discretization instead of the two-phase approach - states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); - - // Final fit with converged discretization - TAN::fit(dataset, features, className, states, smoothing); - - return *this; - } - - torch::Tensor TANLdi::predict(torch::Tensor& X) - { - auto Xt = prepareX(X); - return TAN::predict(Xt); - } - - torch::Tensor TANLdi::predict_proba(torch::Tensor& X) - { - auto Xt = prepareX(X); - return TAN::predict_proba(Xt); - } - - std::vector TANLdi::graph(const std::string& name) const - { - return TAN::graph(name); - } -} \ No newline at end of file diff --git a/bayesnet/classifiers/TANLdi.h b/bayesnet/classifiers/TANLdi.h deleted file mode 100644 index f850a59..0000000 --- a/bayesnet/classifiers/TANLdi.h +++ /dev/null @@ -1,24 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#ifndef TANLDI_H -#define TANLDI_H -#include "TAN.h" -#include "IterativeProposal.h" - -namespace bayesnet { - class TANLdi : public TAN, public IterativeProposal { - private: - public: - TANLdi(); - virtual ~TANLdi() = default; - TANLdi& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; - std::vector graph(const std::string& name = "TANLdi") const override; - torch::Tensor predict(torch::Tensor& X) override; - torch::Tensor predict_proba(torch::Tensor& X) override; - }; -} -#endif // !TANLDI_H \ No newline at end of file diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 07048f3..6d41f2c 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -17,14 +17,90 @@ namespace bayesnet { Network::Network() : fitted{ false }, classNumStates{ 0 } { } - Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()), - fitted(other.fitted), samples(other.samples) + Network::Network(const Network& other) + : features(other.features), className(other.className), classNumStates(other.classNumStates), + fitted(other.fitted) { - if (samples.defined()) - samples = samples.clone(); + // Deep copy the samples tensor + if (other.samples.defined()) { + samples = other.samples.clone(); + } + + // First, create all nodes (without relationships) for (const auto& node : other.nodes) { nodes[node.first] = std::make_unique(*node.second); } + + // Second, reconstruct the relationships between nodes + for (const auto& node : other.nodes) { + const std::string& nodeName = node.first; + Node* originalNode = node.second.get(); + Node* newNode = nodes[nodeName].get(); + + // Reconstruct parent relationships + for (Node* parent : originalNode->getParents()) { + const std::string& parentName = parent->getName(); + if (nodes.find(parentName) != nodes.end()) { + newNode->addParent(nodes[parentName].get()); + } + } + + // Reconstruct child relationships + for (Node* child : originalNode->getChildren()) { + const std::string& childName = child->getName(); + if (nodes.find(childName) != nodes.end()) { + newNode->addChild(nodes[childName].get()); + } + } + } + } + + Network& Network::operator=(const Network& other) + { + if (this != &other) { + // Clear existing state + nodes.clear(); + features = other.features; + className = other.className; + classNumStates = other.classNumStates; + fitted = other.fitted; + + // Deep copy the samples tensor + if (other.samples.defined()) { + samples = other.samples.clone(); + } else { + samples = torch::Tensor(); + } + + // First, create all nodes (without relationships) + for (const auto& node : other.nodes) { + nodes[node.first] = std::make_unique(*node.second); + } + + // Second, reconstruct the relationships between nodes + for (const auto& node : other.nodes) { + const std::string& nodeName = node.first; + Node* originalNode = node.second.get(); + Node* newNode = nodes[nodeName].get(); + + // Reconstruct parent relationships + for (Node* parent : originalNode->getParents()) { + const std::string& parentName = parent->getName(); + if (nodes.find(parentName) != nodes.end()) { + newNode->addParent(nodes[parentName].get()); + } + } + + // Reconstruct child relationships + for (Node* child : originalNode->getChildren()) { + const std::string& childName = child->getName(); + if (nodes.find(childName) != nodes.end()) { + newNode->addChild(nodes[childName].get()); + } + } + } + } + return *this; } void Network::initialize() { @@ -503,4 +579,41 @@ namespace bayesnet { } return oss.str(); } + + bool Network::operator==(const Network& other) const + { + // Compare number of nodes + if (nodes.size() != other.nodes.size()) { + return false; + } + + // Compare if all node names exist in both networks + for (const auto& node : nodes) { + if (other.nodes.find(node.first) == other.nodes.end()) { + return false; + } + } + + // Compare edges (topology) + auto thisEdges = getEdges(); + auto otherEdges = other.getEdges(); + + // Compare number of edges + if (thisEdges.size() != otherEdges.size()) { + return false; + } + + // Sort both edge lists for comparison + std::sort(thisEdges.begin(), thisEdges.end()); + std::sort(otherEdges.begin(), otherEdges.end()); + + // Compare each edge + for (size_t i = 0; i < thisEdges.size(); ++i) { + if (thisEdges[i] != otherEdges[i]) { + return false; + } + } + + return true; + } } diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index efee01e..ad7a940 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -17,7 +17,8 @@ namespace bayesnet { class Network { public: Network(); - explicit Network(const Network&); + Network(const Network& other); + Network& operator=(const Network& other); ~Network() = default; torch::Tensor& getSamples(); void addNode(const std::string&); @@ -47,6 +48,7 @@ namespace bayesnet { void initialize(); std::string dump_cpt() const; inline std::string version() { return { project_version.begin(), project_version.end() }; } + bool operator==(const Network& other) const; private: std::map> nodes; bool fitted; diff --git a/bayesnet/network/Node.cc b/bayesnet/network/Node.cc index b94b142..ef1a79d 100644 --- a/bayesnet/network/Node.cc +++ b/bayesnet/network/Node.cc @@ -13,6 +13,41 @@ namespace bayesnet { : name(name) { } + + Node::Node(const Node& other) + : name(other.name), numStates(other.numStates), dimensions(other.dimensions) + { + // Deep copy the CPT tensor + if (other.cpTable.defined()) { + cpTable = other.cpTable.clone(); + } + // Note: parent and children pointers are NOT copied here + // They will be reconstructed by the Network copy constructor + // to maintain proper object relationships + } + + Node& Node::operator=(const Node& other) + { + if (this != &other) { + name = other.name; + numStates = other.numStates; + dimensions = other.dimensions; + + // Deep copy the CPT tensor + if (other.cpTable.defined()) { + cpTable = other.cpTable.clone(); + } else { + cpTable = torch::Tensor(); + } + + // Clear existing relationships + parents.clear(); + children.clear(); + // Note: parent and children pointers are NOT copied here + // They must be reconstructed to maintain proper object relationships + } + return *this; + } void Node::clear() { parents.clear(); diff --git a/bayesnet/network/Node.h b/bayesnet/network/Node.h index b950d70..ef0eeed 100644 --- a/bayesnet/network/Node.h +++ b/bayesnet/network/Node.h @@ -14,6 +14,9 @@ namespace bayesnet { class Node { public: explicit Node(const std::string&); + Node(const Node& other); + Node& operator=(const Node& other); + ~Node() = default; void clear(); void addParent(Node*); void addChild(Node*); diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index 3f17d6a..c024f32 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -338,6 +338,188 @@ TEST_CASE("Test Bayesian Network", "[Network]") REQUIRE_THROWS_AS(net5.addEdge("A", "B"), std::logic_error); REQUIRE_THROWS_WITH(net5.addEdge("A", "B"), "Cannot add edge to a fitted network. Initialize first."); } + SECTION("Test assignment operator") + { + INFO("Test assignment operator"); + // Create original network + auto net1 = bayesnet::Network(); + buildModel(net1, raw.features, raw.className); + net1.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); + + // Create empty network and assign + auto net2 = bayesnet::Network(); + net2.addNode("TempNode"); // Add something to make sure it gets cleared + net2 = net1; + + // Verify they are equal + REQUIRE(net1.getFeatures() == net2.getFeatures()); + REQUIRE(net1.getEdges() == net2.getEdges()); + REQUIRE(net1.getNumEdges() == net2.getNumEdges()); + REQUIRE(net1.getStates() == net2.getStates()); + REQUIRE(net1.getClassName() == net2.getClassName()); + REQUIRE(net1.getClassNumStates() == net2.getClassNumStates()); + REQUIRE(net1.getSamples().size(0) == net2.getSamples().size(0)); + REQUIRE(net1.getSamples().size(1) == net2.getSamples().size(1)); + REQUIRE(net1.getNodes().size() == net2.getNodes().size()); + + // Verify topology equality + REQUIRE(net1 == net2); + + // Verify they are separate objects by modifying one + net2.initialize(); + net2.addNode("OnlyInNet2"); + REQUIRE(net1.getNodes().size() != net2.getNodes().size()); + REQUIRE_FALSE(net1 == net2); + } + SECTION("Test self assignment") + { + INFO("Test self assignment"); + buildModel(net, raw.features, raw.className); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); + + int original_edges = net.getNumEdges(); + int original_nodes = net.getNodes().size(); + + // Self assignment should not corrupt the network + net = net; + + REQUIRE(net.getNumEdges() == original_edges); + REQUIRE(net.getNodes().size() == original_nodes); + REQUIRE(net.getFeatures() == raw.features); + REQUIRE(net.getClassName() == raw.className); + } + SECTION("Test operator== topology comparison") + { + INFO("Test operator== topology comparison"); + + // Test 1: Two identical networks + auto net1 = bayesnet::Network(); + auto net2 = bayesnet::Network(); + + net1.addNode("A"); + net1.addNode("B"); + net1.addNode("C"); + net1.addEdge("A", "B"); + net1.addEdge("B", "C"); + + net2.addNode("A"); + net2.addNode("B"); + net2.addNode("C"); + net2.addEdge("A", "B"); + net2.addEdge("B", "C"); + + REQUIRE(net1 == net2); + + // Test 2: Different nodes + auto net3 = bayesnet::Network(); + net3.addNode("A"); + net3.addNode("D"); // Different node + REQUIRE_FALSE(net1 == net3); + + // Test 3: Same nodes, different edges + auto net4 = bayesnet::Network(); + net4.addNode("A"); + net4.addNode("B"); + net4.addNode("C"); + net4.addEdge("A", "C"); // Different topology + net4.addEdge("B", "C"); + REQUIRE_FALSE(net1 == net4); + + // Test 4: Empty networks + auto net5 = bayesnet::Network(); + auto net6 = bayesnet::Network(); + REQUIRE(net5 == net6); + + // Test 5: Same topology, different edge order + auto net7 = bayesnet::Network(); + net7.addNode("A"); + net7.addNode("B"); + net7.addNode("C"); + net7.addEdge("B", "C"); // Add edges in different order + net7.addEdge("A", "B"); + REQUIRE(net1 == net7); // Should still be equal + } + SECTION("Test RAII compliance with smart pointers") + { + INFO("Test RAII compliance with smart pointers"); + + std::unique_ptr net1 = std::make_unique(); + buildModel(*net1, raw.features, raw.className); + net1->fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); + + // Test that copy constructor works with smart pointers + std::unique_ptr net2 = std::make_unique(*net1); + + REQUIRE(*net1 == *net2); + REQUIRE(net1->getNumEdges() == net2->getNumEdges()); + REQUIRE(net1->getNodes().size() == net2->getNodes().size()); + + // Destroy original + net1.reset(); + + // net2 should still be valid and functional + REQUIRE_NOTHROW(net2->addNode("NewNode")); + REQUIRE(net2->getNodes().count("NewNode") == 1); + + // Test predictions still work + std::vector> test = { {1, 2, 0, 1, 1} }; + REQUIRE_NOTHROW(net2->predict(test)); + } + SECTION("Test complex topology copy") + { + INFO("Test complex topology copy"); + + auto original = bayesnet::Network(); + + // Create a more complex network + original.addNode("Root"); + original.addNode("Child1"); + original.addNode("Child2"); + original.addNode("Grandchild1"); + original.addNode("Grandchild2"); + original.addNode("Grandchild3"); + + original.addEdge("Root", "Child1"); + original.addEdge("Root", "Child2"); + original.addEdge("Child1", "Grandchild1"); + original.addEdge("Child1", "Grandchild2"); + original.addEdge("Child2", "Grandchild3"); + + // Copy it + auto copy = original; + + // Verify topology is identical + REQUIRE(original == copy); + REQUIRE(original.getNodes().size() == copy.getNodes().size()); + REQUIRE(original.getNumEdges() == copy.getNumEdges()); + + // Verify edges are properly reconstructed + auto originalEdges = original.getEdges(); + auto copyEdges = copy.getEdges(); + REQUIRE(originalEdges.size() == copyEdges.size()); + + // Verify node relationships are properly copied + for (const auto& nodePair : original.getNodes()) { + const std::string& nodeName = nodePair.first; + auto* originalNode = nodePair.second.get(); + auto* copyNode = copy.getNodes().at(nodeName).get(); + + REQUIRE(originalNode->getParents().size() == copyNode->getParents().size()); + REQUIRE(originalNode->getChildren().size() == copyNode->getChildren().size()); + + // Verify parent names match + for (size_t i = 0; i < originalNode->getParents().size(); ++i) { + REQUIRE(originalNode->getParents()[i]->getName() == + copyNode->getParents()[i]->getName()); + } + + // Verify child names match + for (size_t i = 0; i < originalNode->getChildren().size(); ++i) { + REQUIRE(originalNode->getChildren()[i]->getName() == + copyNode->getChildren()[i]->getName()); + } + } + } } TEST_CASE("Test and empty Node", "[Network]") From 0ce7f664b43afdd8d0f5bf24e95881bf4d02f2e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Mon, 7 Jul 2025 00:38:00 +0200 Subject: [PATCH 03/11] remove unneeded files --- ITERATIVE_PROPOSAL_README.md | 114 ----------------------------------- Makefile.iterative | 20 ------ test_iterative_proposal.cpp | 66 -------------------- 3 files changed, 200 deletions(-) delete mode 100644 ITERATIVE_PROPOSAL_README.md delete mode 100644 Makefile.iterative delete mode 100644 test_iterative_proposal.cpp diff --git a/ITERATIVE_PROPOSAL_README.md b/ITERATIVE_PROPOSAL_README.md deleted file mode 100644 index a1683a9..0000000 --- a/ITERATIVE_PROPOSAL_README.md +++ /dev/null @@ -1,114 +0,0 @@ -# Iterative Proposal Implementation - -This implementation extends the existing local discretization framework with iterative convergence capabilities, following the analysis from `local_discretization_analysis.md`. - -## Key Components - -### 1. IterativeProposal Class -- **File**: `bayesnet/classifiers/IterativeProposal.h|cc` -- **Purpose**: Extends the base `Proposal` class with iterative convergence logic -- **Key Method**: `iterativeLocalDiscretization()` - performs iterative refinement until convergence - -### 2. TANLdIterative Example -- **File**: `bayesnet/classifiers/TANLdIterative.h|cc` -- **Purpose**: Demonstrates how to adapt existing Ld classifiers to use iterative discretization -- **Pattern**: Inherits from both `TAN` and `IterativeProposal` - -## Architecture - -The implementation follows the established dual inheritance pattern: - -```cpp -class TANLdIterative : public TAN, public IterativeProposal -``` - -This maintains the same interface as existing Ld classifiers while adding convergence capabilities. - -## Convergence Algorithm - -The iterative process works as follows: - -1. **Initial Discretization**: Use class-only discretization (`fit_local_discretization()`) -2. **Iterative Refinement Loop**: - - Build model with current discretization (call parent `fit()`) - - Refine discretization using network structure (`localDiscretizationProposal()`) - - Compute convergence metric (likelihood or accuracy) - - Check for convergence based on tolerance - - Repeat until convergence or max iterations reached - -## Configuration Parameters - -- `max_iterations`: Maximum number of iterations (default: 10) -- `tolerance`: Convergence tolerance (default: 1e-6) -- `convergence_metric`: "likelihood" or "accuracy" (default: "likelihood") -- `verbose_convergence`: Enable verbose logging (default: false) - -## Usage Example - -```cpp -#include "bayesnet/classifiers/TANLdIterative.h" - -// Create classifier -bayesnet::TANLdIterative classifier; - -// Set convergence parameters -nlohmann::json hyperparams; -hyperparams["max_iterations"] = 5; -hyperparams["tolerance"] = 1e-4; -hyperparams["convergence_metric"] = "likelihood"; -hyperparams["verbose_convergence"] = true; - -classifier.setHyperparameters(hyperparams); - -// Fit and use normally -classifier.fit(X, y, features, className, states, smoothing); -auto predictions = classifier.predict(X_test); -``` - -## Testing - -Run the test with: -```bash -make -f Makefile.iterative test-iterative -``` - -## Integration with Existing Code - -To convert existing Ld classifiers to use iterative discretization: - -1. Change inheritance from `Proposal` to `IterativeProposal` -2. Replace the discretization logic in `fit()` method: - ```cpp - // Old approach: - states = fit_local_discretization(y); - TAN::fit(dataset, features, className, states, smoothing); - states = localDiscretizationProposal(states, model); - - // New approach: - states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); - TAN::fit(dataset, features, className, states, smoothing); - ``` - -## Benefits - -1. **Convergence**: Iterative refinement until stable discretization -2. **Flexibility**: Configurable convergence criteria and limits -3. **Compatibility**: Maintains existing interface and patterns -4. **Monitoring**: Optional verbose logging for convergence tracking -5. **Extensibility**: Easy to add new convergence metrics or stopping criteria - -## Performance Considerations - -- Iterative approach will be slower than the original two-phase method -- Convergence monitoring adds computational overhead -- Consider setting appropriate `max_iterations` to prevent infinite loops -- The `tolerance` parameter should be tuned based on your specific use case - -## Future Enhancements - -Potential improvements: -1. Add more convergence metrics (e.g., AIC, BIC, cross-validation score) -2. Implement early stopping based on validation performance -3. Add support for different discretization schedules -4. Optimize likelihood computation for better performance -5. Add convergence visualization and reporting tools \ No newline at end of file diff --git a/Makefile.iterative b/Makefile.iterative deleted file mode 100644 index 84e3546..0000000 --- a/Makefile.iterative +++ /dev/null @@ -1,20 +0,0 @@ -# Makefile for testing iterative proposal implementation -# Include this in the main Makefile or use directly - -# Test iterative proposal -test-iterative: buildd - @echo "Building iterative proposal test..." - cd build_Debug && g++ -std=c++17 -I../bayesnet -I../config -I/usr/local/include \ - ../test_iterative_proposal.cpp \ - -L. -lbayesnet \ - -ltorch -ltorch_cpu \ - -pthread \ - -o test_iterative_proposal - @echo "Running iterative proposal test..." - cd build_Debug && ./test_iterative_proposal - -# Clean test -clean-test: - rm -f build_Debug/test_iterative_proposal - -.PHONY: test-iterative clean-test \ No newline at end of file diff --git a/test_iterative_proposal.cpp b/test_iterative_proposal.cpp deleted file mode 100644 index a6574c8..0000000 --- a/test_iterative_proposal.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#include -#include -#include -#include "bayesnet/classifiers/TANLdIterative.h" - -using json = nlohmann::json; - -int main() { - std::cout << "Testing Iterative Proposal Implementation" << std::endl; - - // Create synthetic continuous data - torch::Tensor X = torch::rand({100, 3}); // 100 samples, 3 features - torch::Tensor y = torch::randint(0, 2, {100}); // Binary classification - - // Create feature names - std::vector features = {"feature1", "feature2", "feature3"}; - std::string className = "class"; - - // Create initial states (will be updated by discretization) - std::map> states; - states[className] = {0, 1}; - - // Create classifier - bayesnet::TANLdIterative classifier; - - // Set convergence hyperparameters - json hyperparams; - hyperparams["max_iterations"] = 5; - hyperparams["tolerance"] = 1e-4; - hyperparams["convergence_metric"] = "likelihood"; - hyperparams["verbose_convergence"] = true; - - classifier.setHyperparameters(hyperparams); - - try { - // Fit the model - std::cout << "Fitting TANLdIterative classifier..." << std::endl; - classifier.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE); - - // Make predictions - torch::Tensor X_test = torch::rand({10, 3}); - torch::Tensor predictions = classifier.predict(X_test); - torch::Tensor probabilities = classifier.predict_proba(X_test); - - std::cout << "Predictions: " << predictions << std::endl; - std::cout << "Probabilities shape: " << probabilities.sizes() << std::endl; - - // Generate graph - auto graph = classifier.graph(); - std::cout << "Graph nodes: " << graph.size() << std::endl; - - std::cout << "Test completed successfully!" << std::endl; - - } catch (const std::exception& e) { - std::cerr << "Error: " << e.what() << std::endl; - return 1; - } - - return 0; -} \ No newline at end of file From 2c7352ac38c9974fd488c2db6fee0fd7e4655c1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Mon, 7 Jul 2025 02:10:08 +0200 Subject: [PATCH 04/11] Fix classifier build in proposal --- Makefile | 2 +- bayesnet/classifiers/Classifier.h | 1 + bayesnet/classifiers/KDBLd.cc | 5 +++-- bayesnet/classifiers/Proposal.cc | 28 +++++++++++++++++++--------- bayesnet/classifiers/Proposal.h | 2 +- bayesnet/classifiers/TANLd.cc | 5 +++-- tests/CMakeLists.txt | 2 +- 7 files changed, 29 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index bad7fa9..5563222 100644 --- a/Makefile +++ b/Makefile @@ -237,7 +237,7 @@ sample: ## Build sample with Conan @if [ -d ./sample/build ]; then rm -rf ./sample/build; fi @cd sample && conan install . --output-folder=build --build=missing -s build_type=$(build_type) -o "&:enable_coverage=False" -o "&:enable_testing=False" @cd sample && cmake -B build -S . -DCMAKE_BUILD_TYPE=$(build_type) -DCMAKE_TOOLCHAIN_FILE=build/conan_toolchain.cmake && \ - cmake --build build -t bayesnet_sample + cmake --build build -t bayesnet_sample --parallel $(JOBS) sample/build/bayesnet_sample $(fname) $(model) @echo ">>> Done"; diff --git a/bayesnet/classifiers/Classifier.h b/bayesnet/classifiers/Classifier.h index 0a10a1f..95b6da2 100644 --- a/bayesnet/classifiers/Classifier.h +++ b/bayesnet/classifiers/Classifier.h @@ -37,6 +37,7 @@ namespace bayesnet { std::vector getNotes() const override { return notes; } std::string dump_cpt() const override; void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters + Network& getModel() { return model; } protected: bool fitted; unsigned int m, n; // m: number of samples, n: number of features diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index 541e005..32aa690 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -5,6 +5,7 @@ // *************************************************************** #include "KDBLd.h" +#include namespace bayesnet { KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) @@ -35,7 +36,7 @@ namespace bayesnet { y = y_; // Use iterative local discretization instead of the two-phase approach - states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); + states = iterativeLocalDiscretization(y, static_cast(this), dataset, features, className, states_, smoothing); // Final fit with converged discretization KDB::fit(dataset, features, className, states, smoothing); @@ -56,4 +57,4 @@ namespace bayesnet { { return KDB::graph(name); } -} \ No newline at end of file +} diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index aa0698d..4dde35a 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -8,6 +8,11 @@ #include #include #include +#include "Classifier.h" +#include "KDB.h" +#include "TAN.h" +#include "KDBLd.h" +#include "TANLd.h" namespace bayesnet { Proposal::Proposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) @@ -180,7 +185,7 @@ namespace bayesnet { map> Proposal::iterativeLocalDiscretization( const torch::Tensor& y, Classifier* classifier, - const torch::Tensor& dataset, + torch::Tensor& dataset, const std::vector& features, const std::string& className, const map>& initialStates, @@ -196,19 +201,20 @@ namespace bayesnet { << convergence_params.maxIterations << " max iterations" << std::endl; } + const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble); for (int iteration = 0; iteration < convergence_params.maxIterations; ++iteration) { if (convergence_params.verbose) { std::cout << "Iteration " << (iteration + 1) << "/" << convergence_params.maxIterations << std::endl; } // Phase 2: Build model with current discretization - classifier->fit(dataset, features, className, currentStates, smoothing); - + classifier->fit(dataset, features, className, currentStates, weights, smoothing); + // Phase 3: Network-aware discretization refinement - currentStates = localDiscretizationProposal(currentStates, classifier->model); + currentStates = localDiscretizationProposal(currentStates, classifier->getModel()); // Check convergence - if (iteration > 0 && previousModel == classifier->model) { + if (iteration > 0 && previousModel == classifier->getModel()) { if (convergence_params.verbose) { std::cout << "Converged after " << (iteration + 1) << " iterations" << std::endl; } @@ -216,7 +222,7 @@ namespace bayesnet { } // Update for next iteration - previousModel = classifier->model; + previousModel = classifier->getModel(); } return currentStates; @@ -262,7 +268,11 @@ namespace bayesnet { } // Explicit template instantiation for common classifier types - // template map> Proposal::iterativeLocalDiscretization( - // const torch::Tensor&, Classifier*, const torch::Tensor&, const std::vector&, - // const std::string&, const map>&, Smoothing_t); + template map> Proposal::iterativeLocalDiscretization( + const torch::Tensor&, KDB*, torch::Tensor&, const std::vector&, + const std::string&, const map>&, Smoothing_t); + + template map> Proposal::iterativeLocalDiscretization( + const torch::Tensor&, TAN*, torch::Tensor&, const std::vector&, + const std::string&, const map>&, Smoothing_t); } diff --git a/bayesnet/classifiers/Proposal.h b/bayesnet/classifiers/Proposal.h index 150508a..b5685d9 100644 --- a/bayesnet/classifiers/Proposal.h +++ b/bayesnet/classifiers/Proposal.h @@ -31,7 +31,7 @@ namespace bayesnet { map> iterativeLocalDiscretization( const torch::Tensor& y, Classifier* classifier, - const torch::Tensor& dataset, + torch::Tensor& dataset, const std::vector& features, const std::string& className, const map>& initialStates, diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index d5a8dda..783681c 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -5,6 +5,7 @@ // *************************************************************** #include "TANLd.h" +#include namespace bayesnet { TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} @@ -17,7 +18,7 @@ namespace bayesnet { y = y_; // Use iterative local discretization instead of the two-phase approach - states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); + states = iterativeLocalDiscretization(y, static_cast(this), dataset, features, className, states_, smoothing); // Final fit with converged discretization TAN::fit(dataset, features, className, states, smoothing); @@ -38,4 +39,4 @@ namespace bayesnet { { return TAN::graph(name); } -} \ No newline at end of file +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 788a6eb..6f9959f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,7 +8,7 @@ if(ENABLE_TESTING) add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestXSPnDE.cc TestXBA2DE.cc TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestXBAODE.cc TestA2DE.cc TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc TestXSPODE.cc ${BayesNet_SOURCES}) - target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp::fimdlp PRIVATE Catch2::Catch2WithMain folding::folding) + target_link_libraries(TestBayesNet PRIVATE torch::torch fimdlp::fimdlp Catch2::Catch2WithMain folding::folding) add_test(NAME BayesNetworkTest COMMAND TestBayesNet) add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]") add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]") From ed380b14945e01a5b8842d9c9a5b3d630599adbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 8 Jul 2025 11:42:20 +0200 Subject: [PATCH 05/11] Complete implementation with tests --- README.md | 2 +- bayesnet/classifiers/KDBLd.cc | 36 ++++---- bayesnet/classifiers/KDBLd.h | 9 +- bayesnet/classifiers/Proposal.cc | 53 ++--------- bayesnet/classifiers/Proposal.h | 3 +- bayesnet/classifiers/SPODELd.cc | 6 +- bayesnet/classifiers/SPODELd.h | 6 ++ bayesnet/classifiers/TANLd.cc | 23 +++-- bayesnet/classifiers/TANLd.h | 8 ++ bayesnet/ensembles/AODELd.h | 4 + tests/TestBayesModels.cc | 152 +++++++++++++++++++++---------- tests/TestBayesNetwork.cc | 80 ++++++++-------- tests/TestBayesNode.cc | 43 +++++++++ 13 files changed, 255 insertions(+), 170 deletions(-) diff --git a/README.md b/README.md index 2ca0195..1226f6d 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Doctorado-ML/BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es&logo=gitea) -[![Coverage Badge](https://img.shields.io/badge/Coverage-98,0%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet) +[![Coverage Badge](https://img.shields.io/badge/Coverage-98,5%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet) [![DOI](https://zenodo.org/badge/667782806.svg)](https://doi.org/10.5281/zenodo.14210344) Bayesian Network Classifiers library diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index 32aa690..1b96bce 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -14,33 +14,29 @@ namespace bayesnet { validHyperparameters.push_back("k"); validHyperparameters.push_back("theta"); } - void KDBLd::setHyperparameters(const nlohmann::json& hyperparameters_) - { - auto hyperparameters = hyperparameters_; - if (hyperparameters.contains("k")) { - k = hyperparameters["k"]; - hyperparameters.erase("k"); - } - if (hyperparameters.contains("theta")) { - theta = hyperparameters["theta"]; - hyperparameters.erase("theta"); - } - Proposal::setHyperparameters(hyperparameters); - } KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); - features = features_; - className = className_; Xf = X_; y = y_; - - // Use iterative local discretization instead of the two-phase approach + return commonFit(features_, className_, states_, smoothing); + } + KDBLd& KDBLd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) + { + if (!torch::is_floating_point(dataset)) { + throw std::runtime_error("Dataset must be a floating point tensor"); + } + Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); + y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); + return commonFit(features_, className_, states_, smoothing); + } + + KDBLd& KDBLd::commonFit(const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) + { + features = features_; + className = className_; states = iterativeLocalDiscretization(y, static_cast(this), dataset, features, className, states_, smoothing); - - // Final fit with converged discretization KDB::fit(dataset, features, className, states, smoothing); - return *this; } torch::Tensor KDBLd::predict(torch::Tensor& X) diff --git a/bayesnet/classifiers/KDBLd.h b/bayesnet/classifiers/KDBLd.h index 4fa5f82..e19da24 100644 --- a/bayesnet/classifiers/KDBLd.h +++ b/bayesnet/classifiers/KDBLd.h @@ -15,8 +15,15 @@ namespace bayesnet { explicit KDBLd(int k); virtual ~KDBLd() = default; KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + KDBLd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + KDBLd& commonFit(const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing); std::vector graph(const std::string& name = "KDB") const override; - void setHyperparameters(const nlohmann::json& hyperparameters_) override; + void setHyperparameters(const nlohmann::json& hyperparameters_) override + { + auto hyperparameters = hyperparameters_; + Proposal::setHyperparameters(hyperparameters); + KDB::setHyperparameters(hyperparameters); + } torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict_proba(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index 4dde35a..b634b6e 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -11,6 +11,7 @@ #include "Classifier.h" #include "KDB.h" #include "TAN.h" +#include "SPODE.h" #include "KDBLd.h" #include "TANLd.h" @@ -18,9 +19,8 @@ namespace bayesnet { Proposal::Proposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) { } - void Proposal::setHyperparameters(const nlohmann::json& hyperparameters_) + void Proposal::setHyperparameters(nlohmann::json& hyperparameters) { - auto hyperparameters = hyperparameters_; if (hyperparameters.contains("ld_proposed_cuts")) { ld_params.proposed_cuts = hyperparameters["ld_proposed_cuts"]; hyperparameters.erase("ld_proposed_cuts"); @@ -55,9 +55,6 @@ namespace bayesnet { convergence_params.verbose = hyperparameters["verbose_convergence"]; hyperparameters.erase("verbose_convergence"); } - if (!hyperparameters.empty()) { - throw std::invalid_argument("Invalid hyperparameters for Proposal: " + hyperparameters.dump()); - } } void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y) @@ -209,7 +206,7 @@ namespace bayesnet { // Phase 2: Build model with current discretization classifier->fit(dataset, features, className, currentStates, weights, smoothing); - + // Phase 3: Network-aware discretization refinement currentStates = localDiscretizationProposal(currentStates, classifier->getModel()); @@ -228,51 +225,15 @@ namespace bayesnet { return currentStates; } - double Proposal::computeLogLikelihood(Network& model, const torch::Tensor& dataset) - { - double logLikelihood = 0.0; - int n_samples = dataset.size(0); - int n_features = dataset.size(1); - - for (int i = 0; i < n_samples; ++i) { - double sampleLogLikelihood = 0.0; - - // Get class value for this sample - int classValue = dataset[i][n_features - 1].item(); - - // Compute log-likelihood for each feature given its parents and class - for (const auto& node : model.getNodes()) { - if (node.first == model.getClassName()) { - // For class node, add log P(class) - auto classCounts = node.second->getCPT(); - double classProb = classCounts[classValue].item() / dataset.size(0); - sampleLogLikelihood += std::log(std::max(classProb, 1e-10)); - } else { - // For feature nodes, add log P(feature | parents, class) - int featureIdx = std::distance(model.getFeatures().begin(), - std::find(model.getFeatures().begin(), - model.getFeatures().end(), - node.first)); - int featureValue = dataset[i][featureIdx].item(); - - // Simplified probability computation - in practice would need full CPT lookup - double featureProb = 0.1; // Placeholder - would compute from CPT - sampleLogLikelihood += std::log(std::max(featureProb, 1e-10)); - } - } - - logLikelihood += sampleLogLikelihood; - } - - return logLikelihood; - } - // Explicit template instantiation for common classifier types template map> Proposal::iterativeLocalDiscretization( const torch::Tensor&, KDB*, torch::Tensor&, const std::vector&, const std::string&, const map>&, Smoothing_t); - + template map> Proposal::iterativeLocalDiscretization( const torch::Tensor&, TAN*, torch::Tensor&, const std::vector&, const std::string&, const map>&, Smoothing_t); + template map> Proposal::iterativeLocalDiscretization( + const torch::Tensor&, SPODE*, torch::Tensor&, const std::vector&, + const std::string&, const map>&, Smoothing_t); } diff --git a/bayesnet/classifiers/Proposal.h b/bayesnet/classifiers/Proposal.h index b5685d9..9f23283 100644 --- a/bayesnet/classifiers/Proposal.h +++ b/bayesnet/classifiers/Proposal.h @@ -19,7 +19,7 @@ namespace bayesnet { class Proposal { public: Proposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_); - void setHyperparameters(const nlohmann::json& hyperparameters_); + void setHyperparameters(nlohmann::json& hyperparameters_); protected: void checkInput(const torch::Tensor& X, const torch::Tensor& y); torch::Tensor prepareX(torch::Tensor& X); @@ -61,7 +61,6 @@ namespace bayesnet { }; private: std::vector factorize(const std::vector& labels_t); - double computeLogLikelihood(Network& model, const torch::Tensor& dataset); torch::Tensor& pDataset; // (n+1)xm tensor std::vector& pFeatures; std::string& pClassName; diff --git a/bayesnet/classifiers/SPODELd.cc b/bayesnet/classifiers/SPODELd.cc index 1bb55fb..8cdbdec 100644 --- a/bayesnet/classifiers/SPODELd.cc +++ b/bayesnet/classifiers/SPODELd.cc @@ -34,12 +34,8 @@ namespace bayesnet { { features = features_; className = className_; - // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - states = fit_local_discretization(y); - // We have discretized the input data - // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network + states = iterativeLocalDiscretization(y, static_cast(this), dataset, features, className, states_, smoothing); SPODE::fit(dataset, features, className, states, smoothing); - states = localDiscretizationProposal(states, model); return *this; } torch::Tensor SPODELd::predict(torch::Tensor& X) diff --git a/bayesnet/classifiers/SPODELd.h b/bayesnet/classifiers/SPODELd.h index faa3a48..ff02149 100644 --- a/bayesnet/classifiers/SPODELd.h +++ b/bayesnet/classifiers/SPODELd.h @@ -18,6 +18,12 @@ namespace bayesnet { SPODELd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; SPODELd& commonFit(const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing); std::vector graph(const std::string& name = "SPODELd") const override; + void setHyperparameters(const nlohmann::json& hyperparameters_) override + { + auto hyperparameters = hyperparameters_; + Proposal::setHyperparameters(hyperparameters); + SPODE::setHyperparameters(hyperparameters); + } torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict_proba(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index 783681c..32bd7b8 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -12,17 +12,26 @@ namespace bayesnet { TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); - features = features_; - className = className_; Xf = X_; y = y_; - - // Use iterative local discretization instead of the two-phase approach + return commonFit(features_, className_, states_, smoothing); + } + TANLd& TANLd::fit(torch::Tensor& dataset, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) + { + if (!torch::is_floating_point(dataset)) { + throw std::runtime_error("Dataset must be a floating point tensor"); + } + Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); + y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); + return commonFit(features_, className_, states_, smoothing); + } + + TANLd& TANLd::commonFit(const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) + { + features = features_; + className = className_; states = iterativeLocalDiscretization(y, static_cast(this), dataset, features, className, states_, smoothing); - - // Final fit with converged discretization TAN::fit(dataset, features, className, states, smoothing); - return *this; } torch::Tensor TANLd::predict(torch::Tensor& X) diff --git a/bayesnet/classifiers/TANLd.h b/bayesnet/classifiers/TANLd.h index a904235..bc119fc 100644 --- a/bayesnet/classifiers/TANLd.h +++ b/bayesnet/classifiers/TANLd.h @@ -16,7 +16,15 @@ namespace bayesnet { TANLd(); virtual ~TANLd() = default; TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + TANLd& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; + TANLd& commonFit(const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing); std::vector graph(const std::string& name = "TANLd") const override; + void setHyperparameters(const nlohmann::json& hyperparameters_) override + { + auto hyperparameters = hyperparameters_; + Proposal::setHyperparameters(hyperparameters); + TAN::setHyperparameters(hyperparameters); + } torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict_proba(torch::Tensor& X) override; }; diff --git a/bayesnet/ensembles/AODELd.h b/bayesnet/ensembles/AODELd.h index d697554..63739b3 100644 --- a/bayesnet/ensembles/AODELd.h +++ b/bayesnet/ensembles/AODELd.h @@ -17,6 +17,10 @@ namespace bayesnet { virtual ~AODELd() = default; AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) override; std::vector graph(const std::string& name = "AODELd") const override; + void setHyperparameters(const nlohmann::json& hyperparameters_) override + { + hyperparameters = hyperparameters_; + } protected: void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; void buildModel(const torch::Tensor& weights) override; diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index f22eabc..9bf98b4 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -31,9 +31,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, {{"diabetes", "AODELd"}, 0.8125f}, - {{"diabetes", "KDBLd"}, 0.80208f}, + {{"diabetes", "KDBLd"}, 0.804688f}, {{"diabetes", "SPODELd"}, 0.7890625f}, - {{"diabetes", "TANLd"}, 0.803385437f}, + {{"diabetes", "TANLd"}, 0.8125f}, {{"diabetes", "BoostAODE"}, 0.83984f}, // Ecoli {{"ecoli", "AODE"}, 0.889881}, @@ -42,9 +42,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857}, {{"ecoli", "AODELd"}, 0.875f}, - {{"ecoli", "KDBLd"}, 0.880952358f}, + {{"ecoli", "KDBLd"}, 0.872024f}, {{"ecoli", "SPODELd"}, 0.839285731f}, - {{"ecoli", "TANLd"}, 0.848214269f}, + {{"ecoli", "TANLd"}, 0.869047642f}, {{"ecoli", "BoostAODE"}, 0.89583f}, // Glass {{"glass", "AODE"}, 0.79439}, @@ -53,9 +53,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103}, {{"glass", "AODELd"}, 0.799065411f}, - {{"glass", "KDBLd"}, 0.82710278f}, + {{"glass", "KDBLd"}, 0.864485979f}, {{"glass", "SPODELd"}, 0.780373812f}, - {{"glass", "TANLd"}, 0.869158864f}, + {{"glass", "TANLd"}, 0.831775725f}, {{"glass", "BoostAODE"}, 0.84579f}, // Iris {{"iris", "AODE"}, 0.973333}, @@ -68,29 +68,29 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} }; - std::map models{ {"AODE", new bayesnet::AODE()}, - {"AODELd", new bayesnet::AODELd()}, - {"BoostAODE", new bayesnet::BoostAODE()}, - {"KDB", new bayesnet::KDB(2)}, - {"KDBLd", new bayesnet::KDBLd(2)}, - {"XSPODE", new bayesnet::XSpode(1)}, - {"SPODE", new bayesnet::SPODE(1)}, - {"SPODELd", new bayesnet::SPODELd(1)}, - {"TAN", new bayesnet::TAN()}, - {"TANLd", new bayesnet::TANLd()} }; + std::map> models; + models["AODE"] = std::make_unique(); + models["AODELd"] = std::make_unique(); + models["BoostAODE"] = std::make_unique(); + models["KDB"] = std::make_unique(2); + models["KDBLd"] = std::make_unique(2); + models["XSPODE"] = std::make_unique(1); + models["SPODE"] = std::make_unique(1); + models["SPODELd"] = std::make_unique(1); + models["TAN"] = std::make_unique(); + models["TANLd"] = std::make_unique(); std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "XSPODE", "SPODELd", "TAN", "TANLd"); - auto clf = models[name]; + auto clf = std::move(models[name]); SECTION("Test " + name + " classifier") { for (const std::string& file_name : { "glass", "iris", "ecoli", "diabetes" }) { - auto clf = models[name]; auto discretize = name.substr(name.length() - 2) != "Ld"; auto raw = RawDatasets(file_name, discretize); clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf->score(raw.Xt, raw.yt); // std::cout << "Classifier: " << name << " File: " << file_name << " Score: " << score << " expected = " << - // scores[{file_name, name}] << std::endl; + // scores[{file_name, name}] << std::endl; INFO("Classifier: " << name << " File: " << file_name); REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon)); REQUIRE(clf->getStatus() == bayesnet::NORMAL); @@ -101,7 +101,6 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") INFO("Checking version of " << name << " classifier"); REQUIRE(clf->getVersion() == ACTUAL_VERSION); } - delete clf; } TEST_CASE("Models features & Graph", "[Models]") { @@ -133,7 +132,7 @@ TEST_CASE("Models features & Graph", "[Models]") clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 7); - REQUIRE(clf.getNumberOfStates() == 27); + REQUIRE(clf.getNumberOfStates() == 26); REQUIRE(clf.getClassNumStates() == 3); REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", @@ -149,7 +148,6 @@ TEST_CASE("Get num features & num edges", "[Models]") REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 8); } - TEST_CASE("Model predict_proba", "[Models]") { std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting", "TANLd", "SPODELd", "KDBLd"); @@ -180,15 +178,15 @@ TEST_CASE("Model predict_proba", "[Models]") {0.0284828, 0.770524, 0.200993}, {0.0213182, 0.857189, 0.121493}, {0.00868436, 0.949494, 0.0418215} }); - auto res_prob_tanld = std::vector>({ {0.000544493, 0.995796, 0.00365992 }, - {0.000908092, 0.997268, 0.00182429 }, - {0.000908092, 0.997268, 0.00182429 }, - {0.000908092, 0.997268, 0.00182429 }, - {0.00228423, 0.994645, 0.00307078 }, - {0.00120539, 0.0666788, 0.932116 }, - {0.00361847, 0.979203, 0.017179 }, - {0.00483293, 0.985326, 0.00984064 }, - {0.000595606, 0.9977, 0.00170441 } }); + auto res_prob_tanld = std::vector>({ {0.000597557, 0.9957, 0.00370254}, + {0.000731377, 0.997914, 0.0013544}, + {0.000731377, 0.997914, 0.0013544}, + {0.000731377, 0.997914, 0.0013544}, + {0.000838614, 0.998122, 0.00103923}, + {0.00130852, 0.0659492, 0.932742}, + {0.00365946, 0.979412, 0.0169281}, + {0.00435035, 0.986248, 0.00940212}, + {0.000583815, 0.997746, 0.00167066} }); auto res_prob_spodeld = std::vector>({ {0.000908024, 0.993742, 0.00535024 }, {0.00187726, 0.99167, 0.00645308 }, {0.00187726, 0.99167, 0.00645308 }, @@ -216,29 +214,33 @@ TEST_CASE("Model predict_proba", "[Models]") {"TANLd", res_prob_tanld}, {"SPODELd", res_prob_spodeld}, {"KDBLd", res_prob_kdbld} }; - std::map models{ {"TAN", new bayesnet::TAN()}, - {"SPODE", new bayesnet::SPODE(0)}, - {"BoostAODEproba", new bayesnet::BoostAODE(false)}, - {"BoostAODEvoting", new bayesnet::BoostAODE(true)}, - {"TANLd", new bayesnet::TANLd()}, - {"SPODELd", new bayesnet::SPODELd(0)}, - {"KDBLd", new bayesnet::KDBLd(2)} }; + + std::map> models; + models["TAN"] = std::make_unique(); + models["SPODE"] = std::make_unique(0); + models["BoostAODEproba"] = std::make_unique(false); + models["BoostAODEvoting"] = std::make_unique(true); + models["TANLd"] = std::make_unique(); + models["SPODELd"] = std::make_unique(0); + models["KDBLd"] = std::make_unique(2); + int init_index = 78; SECTION("Test " + model + " predict_proba") { + INFO("Testing " << model << " predict_proba"); auto ld_model = model.substr(model.length() - 2) == "Ld"; auto discretize = !ld_model; auto raw = RawDatasets("iris", discretize); - auto clf = models[model]; - clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); - auto yt_pred_proba = clf->predict_proba(raw.Xt); - auto yt_pred = clf->predict(raw.Xt); + auto& clf = *models[model]; + clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); + auto yt_pred_proba = clf.predict_proba(raw.Xt); + auto yt_pred = clf.predict(raw.Xt); std::vector y_pred; std::vector> y_pred_proba; if (!ld_model) { - y_pred = clf->predict(raw.Xv); - y_pred_proba = clf->predict_proba(raw.Xv); + y_pred = clf.predict(raw.Xv); + y_pred_proba = clf.predict_proba(raw.Xv); REQUIRE(y_pred.size() == y_pred_proba.size()); REQUIRE(y_pred.size() == yt_pred.size(0)); REQUIRE(y_pred.size() == yt_pred_proba.size(0)); @@ -267,18 +269,20 @@ TEST_CASE("Model predict_proba", "[Models]") } else { // Check predict_proba values for vectors and tensors auto predictedClasses = yt_pred_proba.argmax(1); + // std::cout << model << std::endl; for (int i = 0; i < 9; i++) { REQUIRE(predictedClasses[i].item() == yt_pred[i].item()); + // std::cout << "{"; for (int j = 0; j < 3; j++) { + // std::cout << yt_pred_proba[i + init_index][j].item() << ", "; REQUIRE(res_prob[model][i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); } + // std::cout << "\b\b}," << std::endl; } } - delete clf; } } - TEST_CASE("AODE voting-proba", "[Models]") { auto raw = RawDatasets("glass", true); @@ -324,11 +328,15 @@ TEST_CASE("KDB with hyperparameters", "[Models]") REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon)); } -TEST_CASE("Incorrect type of data for SPODELd", "[Models]") +TEST_CASE("Incorrect type of data for Ld models", "[Models]") { auto raw = RawDatasets("iris", true); - auto clf = bayesnet::SPODELd(0); - REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + auto clfs = bayesnet::SPODELd(0); + REQUIRE_THROWS_AS(clfs.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + auto clft = bayesnet::TANLd(); + REQUIRE_THROWS_AS(clft.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); + auto clfk = bayesnet::KDBLd(0); + REQUIRE_THROWS_AS(clfk.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error); } TEST_CASE("Predict, predict_proba & score without fitting", "[Models]") { @@ -428,3 +436,49 @@ TEST_CASE("Check KDB loop detection", "[Models]") REQUIRE_NOTHROW(clf.test_add_m_edges(features, 0, S, weights)); REQUIRE_NOTHROW(clf.test_add_m_edges(features, 1, S, weights)); } +TEST_CASE("Local discretization hyperparameters", "[Models]") +{ + auto raw = RawDatasets("iris", false); + auto clfs = bayesnet::SPODELd(0); + clfs.setHyperparameters({ + {"max_iterations", 7}, + {"verbose_convergence", true}, + }); + REQUIRE_NOTHROW(clfs.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clfs.getStatus() == bayesnet::NORMAL); + auto clfk = bayesnet::KDBLd(0); + clfk.setHyperparameters({ + {"k", 3}, + {"theta", 1e-4}, + }); + REQUIRE_NOTHROW(clfk.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clfk.getStatus() == bayesnet::NORMAL); + auto clfa = bayesnet::AODELd(); + clfa.setHyperparameters({ + {"ld_proposed_cuts", 9}, + {"ld_algorithm", "BINQ"}, + }); + REQUIRE_NOTHROW(clfa.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clfa.getStatus() == bayesnet::NORMAL); + auto clft = bayesnet::TANLd(); + clft.setHyperparameters({ + {"ld_proposed_cuts", 7}, + {"mdlp_max_depth", 5}, + {"mdlp_min_length", 3}, + {"ld_algorithm", "MDLP"}, + }); + REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clft.getStatus() == bayesnet::NORMAL); + clft.setHyperparameters({ + {"ld_proposed_cuts", 9}, + {"ld_algorithm", "BINQ"}, + }); + REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clft.getStatus() == bayesnet::NORMAL); + clft.setHyperparameters({ + {"ld_proposed_cuts", 5}, + {"ld_algorithm", "BINU"}, + }); + REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing)); + REQUIRE(clft.getStatus() == bayesnet::NORMAL); +} diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index c024f32..8e0f47a 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -345,12 +345,12 @@ TEST_CASE("Test Bayesian Network", "[Network]") auto net1 = bayesnet::Network(); buildModel(net1, raw.features, raw.className); net1.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); - + // Create empty network and assign auto net2 = bayesnet::Network(); net2.addNode("TempNode"); // Add something to make sure it gets cleared net2 = net1; - + // Verify they are equal REQUIRE(net1.getFeatures() == net2.getFeatures()); REQUIRE(net1.getEdges() == net2.getEdges()); @@ -361,10 +361,10 @@ TEST_CASE("Test Bayesian Network", "[Network]") REQUIRE(net1.getSamples().size(0) == net2.getSamples().size(0)); REQUIRE(net1.getSamples().size(1) == net2.getSamples().size(1)); REQUIRE(net1.getNodes().size() == net2.getNodes().size()); - + // Verify topology equality REQUIRE(net1 == net2); - + // Verify they are separate objects by modifying one net2.initialize(); net2.addNode("OnlyInNet2"); @@ -376,46 +376,47 @@ TEST_CASE("Test Bayesian Network", "[Network]") INFO("Test self assignment"); buildModel(net, raw.features, raw.className); net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); - + int original_edges = net.getNumEdges(); int original_nodes = net.getNodes().size(); - + // Self assignment should not corrupt the network net = net; - + auto all_features = raw.features; + all_features.push_back(raw.className); REQUIRE(net.getNumEdges() == original_edges); REQUIRE(net.getNodes().size() == original_nodes); - REQUIRE(net.getFeatures() == raw.features); + REQUIRE(net.getFeatures() == all_features); REQUIRE(net.getClassName() == raw.className); } SECTION("Test operator== topology comparison") { INFO("Test operator== topology comparison"); - + // Test 1: Two identical networks auto net1 = bayesnet::Network(); auto net2 = bayesnet::Network(); - + net1.addNode("A"); net1.addNode("B"); net1.addNode("C"); net1.addEdge("A", "B"); net1.addEdge("B", "C"); - + net2.addNode("A"); net2.addNode("B"); net2.addNode("C"); net2.addEdge("A", "B"); net2.addEdge("B", "C"); - + REQUIRE(net1 == net2); - + // Test 2: Different nodes auto net3 = bayesnet::Network(); net3.addNode("A"); net3.addNode("D"); // Different node REQUIRE_FALSE(net1 == net3); - + // Test 3: Same nodes, different edges auto net4 = bayesnet::Network(); net4.addNode("A"); @@ -424,12 +425,12 @@ TEST_CASE("Test Bayesian Network", "[Network]") net4.addEdge("A", "C"); // Different topology net4.addEdge("B", "C"); REQUIRE_FALSE(net1 == net4); - + // Test 4: Empty networks auto net5 = bayesnet::Network(); auto net6 = bayesnet::Network(); REQUIRE(net5 == net6); - + // Test 5: Same topology, different edge order auto net7 = bayesnet::Network(); net7.addNode("A"); @@ -442,35 +443,36 @@ TEST_CASE("Test Bayesian Network", "[Network]") SECTION("Test RAII compliance with smart pointers") { INFO("Test RAII compliance with smart pointers"); - + std::unique_ptr net1 = std::make_unique(); buildModel(*net1, raw.features, raw.className); net1->fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); - + // Test that copy constructor works with smart pointers std::unique_ptr net2 = std::make_unique(*net1); - + REQUIRE(*net1 == *net2); REQUIRE(net1->getNumEdges() == net2->getNumEdges()); REQUIRE(net1->getNodes().size() == net2->getNodes().size()); - + // Destroy original net1.reset(); - + + // Test predictions still work + std::vector> test = { {1}, {2}, {0}, {1} }; + REQUIRE_NOTHROW(net2->predict(test)); + // net2 should still be valid and functional + net2->initialize(); REQUIRE_NOTHROW(net2->addNode("NewNode")); REQUIRE(net2->getNodes().count("NewNode") == 1); - - // Test predictions still work - std::vector> test = { {1, 2, 0, 1, 1} }; - REQUIRE_NOTHROW(net2->predict(test)); } SECTION("Test complex topology copy") { INFO("Test complex topology copy"); - + auto original = bayesnet::Network(); - + // Create a more complex network original.addNode("Root"); original.addNode("Child1"); @@ -478,45 +480,45 @@ TEST_CASE("Test Bayesian Network", "[Network]") original.addNode("Grandchild1"); original.addNode("Grandchild2"); original.addNode("Grandchild3"); - + original.addEdge("Root", "Child1"); original.addEdge("Root", "Child2"); original.addEdge("Child1", "Grandchild1"); original.addEdge("Child1", "Grandchild2"); original.addEdge("Child2", "Grandchild3"); - + // Copy it auto copy = original; - + // Verify topology is identical REQUIRE(original == copy); REQUIRE(original.getNodes().size() == copy.getNodes().size()); REQUIRE(original.getNumEdges() == copy.getNumEdges()); - + // Verify edges are properly reconstructed auto originalEdges = original.getEdges(); auto copyEdges = copy.getEdges(); REQUIRE(originalEdges.size() == copyEdges.size()); - + // Verify node relationships are properly copied for (const auto& nodePair : original.getNodes()) { const std::string& nodeName = nodePair.first; auto* originalNode = nodePair.second.get(); auto* copyNode = copy.getNodes().at(nodeName).get(); - + REQUIRE(originalNode->getParents().size() == copyNode->getParents().size()); REQUIRE(originalNode->getChildren().size() == copyNode->getChildren().size()); - + // Verify parent names match for (size_t i = 0; i < originalNode->getParents().size(); ++i) { - REQUIRE(originalNode->getParents()[i]->getName() == - copyNode->getParents()[i]->getName()); + REQUIRE(originalNode->getParents()[i]->getName() == + copyNode->getParents()[i]->getName()); } - + // Verify child names match for (size_t i = 0; i < originalNode->getChildren().size(); ++i) { - REQUIRE(originalNode->getChildren()[i]->getName() == - copyNode->getChildren()[i]->getName()); + REQUIRE(originalNode->getChildren()[i]->getName() == + copyNode->getChildren()[i]->getName()); } } } diff --git a/tests/TestBayesNode.cc b/tests/TestBayesNode.cc index 8cbd757..a1fd04f 100644 --- a/tests/TestBayesNode.cc +++ b/tests/TestBayesNode.cc @@ -158,4 +158,47 @@ TEST_CASE("TEST MinFill method", "[Node]") REQUIRE(node_2.minFill() == 6); REQUIRE(node_3.minFill() == 3); REQUIRE(node_4.minFill() == 1); +} +TEST_CASE("Test operator =", "[Node]") +{ + // Generate a test to test the operator = of the Node class + // Create a node with 3 parents and 2 children + auto node = bayesnet::Node("N1"); + auto parent_1 = bayesnet::Node("P1"); + parent_1.setNumStates(3); + auto child_1 = bayesnet::Node("H1"); + child_1.setNumStates(2); + node.addParent(&parent_1); + node.addChild(&child_1); + // Create a cpt in the node using computeCPT + auto dataset = torch::tensor({ {1, 0, 0, 1}, {0, 1, 2, 1}, {0, 1, 1, 0} }); + auto states = std::vector({ 2, 3, 3 }); + auto features = std::vector{ "N1", "P1", "H1" }; + auto className = std::string("Class"); + auto weights = torch::tensor({ 1.0, 1.0, 1.0, 1.0 }, torch::kDouble); + node.setNumStates(2); + node.computeCPT(dataset, features, 0.0, weights); + // Get the cpt of the node + auto cpt = node.getCPT(); + // Check that the cpt is not empty + REQUIRE(cpt.numel() > 0); + // Check that the cpt has the correct dimensions + auto dimensions = cpt.sizes(); + REQUIRE(dimensions.size() == 2); + REQUIRE(dimensions[0] == 2); // Number of states of the node + REQUIRE(dimensions[1] == 3); // Number of states of the first parent + // Create a copy of the node + auto node_copy = node; + // Check that the copy has not any parents or children + auto parents = node_copy.getParents(); + auto children = node_copy.getChildren(); + REQUIRE(parents.size() == 0); + REQUIRE(children.size() == 0); + // Check that the copy has the same name + REQUIRE(node_copy.getName() == "N1"); + // Check that the copy has the same cpt + auto cpt_copy = node_copy.getCPT(); + REQUIRE(cpt_copy.equal(cpt)); + // Check that the copy has the same number of states + REQUIRE(node_copy.getNumStates() == node.getNumStates()); } \ No newline at end of file From e5227c5f4bd4b46b839447d6fa513075568dec5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 8 Jul 2025 16:07:16 +0200 Subject: [PATCH 06/11] Add dataset tests to Ld models --- README.md | 2 +- tests/TestBayesModels.cc | 17 +++++++++++++++-- tests/TestBayesNode.cc | 3 ++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1226f6d..dfb8103 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Doctorado-ML/BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es&logo=gitea) -[![Coverage Badge](https://img.shields.io/badge/Coverage-98,5%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet) +[![Coverage Badge](https://img.shields.io/badge/Coverage-99,1%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet) [![DOI](https://zenodo.org/badge/667782806.svg)](https://doi.org/10.5281/zenodo.14210344) Bayesian Network Classifiers library diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 9bf98b4..26cd773 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -301,17 +301,30 @@ TEST_CASE("AODE voting-proba", "[Models]") REQUIRE(pred_proba[67][0] == Catch::Approx(0.702184).epsilon(raw.epsilon)); REQUIRE(clf.topological_order() == std::vector()); } -TEST_CASE("SPODELd dataset", "[Models]") +TEST_CASE("Ld models with dataset", "[Models]") { auto raw = RawDatasets("iris", false); auto clf = bayesnet::SPODELd(0); - // raw.dataset.to(torch::kFloat32); clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing); auto score = clf.score(raw.Xt, raw.yt); clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); auto scoret = clf.score(raw.Xt, raw.yt); REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon)); + auto clf2 = bayesnet::TANLd(); + clf2.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing); + auto score2 = clf2.score(raw.Xt, raw.yt); + clf2.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); + auto score2t = clf2.score(raw.Xt, raw.yt); + REQUIRE(score2 == Catch::Approx(0.97333f).epsilon(raw.epsilon)); + REQUIRE(score2t == Catch::Approx(0.97333f).epsilon(raw.epsilon)); + auto clf3 = bayesnet::KDBLd(2); + clf3.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing); + auto score3 = clf3.score(raw.Xt, raw.yt); + clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing); + auto score3t = clf3.score(raw.Xt, raw.yt); + REQUIRE(score3 == Catch::Approx(0.97333f).epsilon(raw.epsilon)); + REQUIRE(score3t == Catch::Approx(0.97333f).epsilon(raw.epsilon)); } TEST_CASE("KDB with hyperparameters", "[Models]") { diff --git a/tests/TestBayesNode.cc b/tests/TestBayesNode.cc index a1fd04f..1ed7133 100644 --- a/tests/TestBayesNode.cc +++ b/tests/TestBayesNode.cc @@ -188,7 +188,8 @@ TEST_CASE("Test operator =", "[Node]") REQUIRE(dimensions[0] == 2); // Number of states of the node REQUIRE(dimensions[1] == 3); // Number of states of the first parent // Create a copy of the node - auto node_copy = node; + bayesnet::Node node_copy("XX"); + node_copy = node; // Check that the copy has not any parents or children auto parents = node_copy.getParents(); auto children = node_copy.getChildren(); From aa77745e55deed9f496d21a5bcf125e829dff4ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 8 Jul 2025 17:28:27 +0200 Subject: [PATCH 07/11] Fix TANLd valid_hyperparameters --- bayesnet/classifiers/TANLd.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index 32bd7b8..b415b0f 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -8,7 +8,10 @@ #include namespace bayesnet { - TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} + TANLd::TANLd() : TAN(), Proposal(dataset, features, className) + { + validHyperparameters = validHyperparameters_ld; // Inherits the valid hyperparameters from Proposal + } TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) { checkInput(X_, y_); From e2a0c5f4a5e20af7a1add65190e8bc7c480a0e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 8 Jul 2025 18:50:09 +0200 Subject: [PATCH 08/11] Add Notes to Proposal convergence --- bayesnet/classifiers/KDBLd.cc | 2 +- bayesnet/classifiers/Proposal.cc | 4 +++- bayesnet/classifiers/Proposal.h | 3 ++- bayesnet/classifiers/SPODELd.cc | 2 +- bayesnet/classifiers/TANLd.cc | 2 +- bayesnet/ensembles/AODELd.cc | 2 +- tests/TestBayesModels.cc | 7 ++++--- 7 files changed, 13 insertions(+), 9 deletions(-) diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index 1b96bce..dbda394 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -8,7 +8,7 @@ #include namespace bayesnet { - KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) + KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className, KDB::notes) { validHyperparameters = validHyperparameters_ld; validHyperparameters.push_back("k"); diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index b634b6e..b3c7639 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -16,7 +16,7 @@ #include "TANLd.h" namespace bayesnet { - Proposal::Proposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) + Proposal::Proposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_, std::vector& notes_) : pDataset(dataset_), pFeatures(features_), pClassName(className_), notes(notes_) { } void Proposal::setHyperparameters(nlohmann::json& hyperparameters) @@ -215,6 +215,8 @@ namespace bayesnet { if (convergence_params.verbose) { std::cout << "Converged after " << (iteration + 1) << " iterations" << std::endl; } + notes.push_back("Converged after " + std::to_string(iteration + 1) + " of " + + std::to_string(convergence_params.maxIterations) + " iterations"); break; } diff --git a/bayesnet/classifiers/Proposal.h b/bayesnet/classifiers/Proposal.h index 9f23283..bb53776 100644 --- a/bayesnet/classifiers/Proposal.h +++ b/bayesnet/classifiers/Proposal.h @@ -18,7 +18,7 @@ namespace bayesnet { class Proposal { public: - Proposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_); + Proposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_, std::vector& notes); void setHyperparameters(nlohmann::json& hyperparameters_); protected: void checkInput(const torch::Tensor& X, const torch::Tensor& y); @@ -61,6 +61,7 @@ namespace bayesnet { }; private: std::vector factorize(const std::vector& labels_t); + std::vector& notes; // Notes during fit from BaseClassifier torch::Tensor& pDataset; // (n+1)xm tensor std::vector& pFeatures; std::string& pClassName; diff --git a/bayesnet/classifiers/SPODELd.cc b/bayesnet/classifiers/SPODELd.cc index 8cdbdec..0cffe63 100644 --- a/bayesnet/classifiers/SPODELd.cc +++ b/bayesnet/classifiers/SPODELd.cc @@ -7,7 +7,7 @@ #include "SPODELd.h" namespace bayesnet { - SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) + SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className, SPODE::notes) { validHyperparameters = validHyperparameters_ld; // Inherits the valid hyperparameters from Proposal } diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index b415b0f..c9de329 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -8,7 +8,7 @@ #include namespace bayesnet { - TANLd::TANLd() : TAN(), Proposal(dataset, features, className) + TANLd::TANLd() : TAN(), Proposal(dataset, features, className, TAN::notes) { validHyperparameters = validHyperparameters_ld; // Inherits the valid hyperparameters from Proposal } diff --git a/bayesnet/ensembles/AODELd.cc b/bayesnet/ensembles/AODELd.cc index 3dc80bf..4f0f0cd 100644 --- a/bayesnet/ensembles/AODELd.cc +++ b/bayesnet/ensembles/AODELd.cc @@ -7,7 +7,7 @@ #include "AODELd.h" namespace bayesnet { - AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className) + AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className, Ensemble::notes) { validHyperparameters = validHyperparameters_ld; // Inherits the valid hyperparameters from Proposal } diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 26cd773..4473c35 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -407,14 +407,15 @@ TEST_CASE("Check proposal checkInput", "[Models]") { class testProposal : public bayesnet::Proposal { public: - testProposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_) - : Proposal(dataset_, features_, className_) + testProposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_, std::vector& notes_) + : Proposal(dataset_, features_, className_, notes_) { } void test_X_y(const torch::Tensor& X, const torch::Tensor& y) { checkInput(X, y); } }; auto raw = RawDatasets("iris", true); - auto clf = testProposal(raw.dataset, raw.features, raw.className); + std::vector notes; + auto clf = testProposal(raw.dataset, raw.features, raw.className, notes); torch::Tensor X = torch::randint(0, 3, { 10, 4 }); torch::Tensor y = torch::rand({ 10 }); INFO("Check X is not float"); From 3e0b790cfe98f03d2ff0101e4160dadbf8e1451a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 8 Jul 2025 18:57:57 +0200 Subject: [PATCH 09/11] Update Changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25bd515..96618c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [1.2.0] - 2025-06-30 +## [1.2.0] - 2025-07-08 ### Internal @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - *ld_proposed_cuts*: number of cut points to return. - *mdlp_min_length*: minimum length of a partition in MDLP algorithm to be evaluated for partition. - *mdlp_max_depth*: maximum level of recursion in MDLP algorithm. + - *max_iterations*: maximum number of iterations of discretization-build model loop. + - *verbose_convergence*: display status messages during the convergence process. - Remove vcpkg as a dependency manager, now the library is built with Conan package manager and CMake. - Add `build_type` option to the sample target in the Makefile to allow building in *Debug* or *Release* mode. Default is *Debug*. From 481c70230243da6d2d4f6c4ed71ba327fa4985cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Sat, 19 Jul 2025 22:12:27 +0200 Subject: [PATCH 10/11] Update libraries versions --- conanfile.py | 59 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/conanfile.py b/conanfile.py index d300869..d55e078 100644 --- a/conanfile.py +++ b/conanfile.py @@ -3,6 +3,7 @@ from conan import ConanFile from conan.tools.cmake import CMakeToolchain, CMake, cmake_layout, CMakeDeps from conan.tools.files import copy + class BayesNetConan(ConanFile): name = "bayesnet" settings = "os", "compiler", "build_type", "arch" @@ -10,26 +11,35 @@ class BayesNetConan(ConanFile): "shared": [True, False], "fPIC": [True, False], "enable_testing": [True, False], - "enable_coverage": [True, False] + "enable_coverage": [True, False], } default_options = { "shared": False, "fPIC": True, "enable_testing": False, - "enable_coverage": False + "enable_coverage": False, } # Sources are located in the same place as this recipe, copy them to the recipe - exports_sources = "CMakeLists.txt", "bayesnet/*", "config/*", "cmake/*", "docs/*", "tests/*", "bayesnetConfig.cmake.in" - + exports_sources = ( + "CMakeLists.txt", + "bayesnet/*", + "config/*", + "cmake/*", + "docs/*", + "tests/*", + "bayesnetConfig.cmake.in", + ) + def set_version(self) -> None: cmake = pathlib.Path(self.recipe_folder) / "CMakeLists.txt" - text = cmake.read_text(encoding="utf-8") + text = cmake.read_text(encoding="utf-8") # Accept either: project(foo VERSION 1.2.3) or set(foo_VERSION 1.2.3) match = re.search( r"""project\s*\([^\)]*VERSION\s+([0-9]+\.[0-9]+\.[0-9]+)""", - text, re.IGNORECASE | re.VERBOSE + text, + re.IGNORECASE | re.VERBOSE, ) if match: self.version = match.group(1) @@ -40,26 +50,26 @@ class BayesNetConan(ConanFile): def config_options(self): if self.settings.os == "Windows": del self.options.fPIC - + def configure(self): if self.options.shared: self.options.rm_safe("fPIC") - + def requirements(self): # Core dependencies - self.requires("libtorch/2.7.0") + self.requires("libtorch/2.7.1") self.requires("nlohmann_json/3.11.3") - self.requires("folding/1.1.1") # Custom package - self.requires("fimdlp/2.1.0") # Custom package - + self.requires("folding/1.1.2") # Custom package + self.requires("fimdlp/2.1.1") # Custom package + def build_requirements(self): self.build_requires("cmake/[>=3.27]") - self.test_requires("arff-files/1.2.0") # Custom package + self.test_requires("arff-files/1.2.1") # Custom package self.test_requires("catch2/3.8.1") - + def layout(self): cmake_layout(self) - + def generate(self): deps = CMakeDeps(self) deps.generate() @@ -67,27 +77,32 @@ class BayesNetConan(ConanFile): tc.variables["ENABLE_TESTING"] = self.options.enable_testing tc.variables["CODE_COVERAGE"] = self.options.enable_coverage tc.generate() - + def build(self): cmake = CMake(self) cmake.configure() cmake.build() - + if self.options.enable_testing: # Run tests only if we're building with testing enabled self.run("ctest --output-on-failure", cwd=self.build_folder) - + def package(self): - copy(self, "LICENSE", src=self.source_folder, dst=os.path.join(self.package_folder, "licenses")) + copy( + self, + "LICENSE", + src=self.source_folder, + dst=os.path.join(self.package_folder, "licenses"), + ) cmake = CMake(self) cmake.install() - + def package_info(self): self.cpp_info.libs = ["bayesnet"] self.cpp_info.includedirs = ["include"] self.cpp_info.set_property("cmake_find_mode", "both") self.cpp_info.set_property("cmake_target_name", "bayesnet::bayesnet") - + # Add compiler flags that might be needed if self.settings.os == "Linux": - self.cpp_info.system_libs = ["pthread"] \ No newline at end of file + self.cpp_info.system_libs = ["pthread"] From 56d85b1a430db24f87ef7d45f2cc487d73454f98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Sat, 19 Jul 2025 22:25:17 +0200 Subject: [PATCH 11/11] Update test libraries version number --- tests/TestModulesVersions.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/TestModulesVersions.cc b/tests/TestModulesVersions.cc index 3cd99be..413ab1f 100644 --- a/tests/TestModulesVersions.cc +++ b/tests/TestModulesVersions.cc @@ -16,10 +16,10 @@ #include "TestUtils.h" std::map modules = { - { "mdlp", "2.1.0" }, - { "Folding", "1.1.1" }, + { "mdlp", "2.1.1" }, + { "Folding", "1.1.2" }, { "json", "3.11" }, - { "ArffFiles", "1.2.0" } + { "ArffFiles", "1.2.1" } }; TEST_CASE("MDLP", "[Modules]")