diff --git a/Makefile b/Makefile index e5fcfce..bad7fa9 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,14 @@ mansrcdir = docs/man3 mandestdir = /usr/local/share/man sed_command_link = 's/e">LCOV -/e">Back to manual<\/a> LCOV -/g' sed_command_diagram = 's/Diagram"/Diagram" width="100%" height="100%" /g' +# Set the number of parallel jobs to the number of available processors minus 7 +CPUS := $(shell getconf _NPROCESSORS_ONLN 2>/dev/null \ + || nproc --all 2>/dev/null \ + || sysctl -n hw.ncpu) + +# --- Your desired job count: CPUs – 7, but never less than 1 -------------- +JOBS := $(shell n=$(CPUS); [ $${n} -gt 7 ] && echo $$((n-7)) || echo 1) + define ClearTests @for t in $(test_targets); do \ @@ -36,6 +44,7 @@ define setup_target @if [ -d $(2) ]; then rm -fr $(2); fi @conan install . --build=missing -of $(2) -s build_type=$(1) @cmake -S . -B $(2) -DCMAKE_TOOLCHAIN_FILE=$(2)/build/$(1)/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=$(1) -D$(3) + @echo ">>> Will build using $(JOBS) parallel jobs" @echo ">>> Done" endef @@ -72,10 +81,10 @@ release: ## Setup release version using Conan @$(call setup_target,"Release","$(f_release)","ENABLE_TESTING=OFF") buildd: ## Build the debug targets - cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL) + cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(JOBS) buildr: ## Build the release targets - cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL) + cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(JOBS) # Install targets @@ -105,7 +114,7 @@ opt = "" test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section @echo ">>> Running BayesNet tests..."; @$(MAKE) clean-test - @cmake --build $(f_debug) -t $(test_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL) + @cmake --build $(f_debug) -t $(test_targets) --parallel $(JOBS) @for t in $(test_targets); do \ echo ">>> Running $$t...";\ if [ -f $(f_debug)/tests/$$t ]; then \ diff --git a/bayesnet/classifiers/IterativeProposal.cc b/bayesnet/classifiers/IterativeProposal.cc deleted file mode 100644 index 7973644..0000000 --- a/bayesnet/classifiers/IterativeProposal.cc +++ /dev/null @@ -1,151 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#include "IterativeProposal.h" -#include -#include - -namespace bayesnet { - - IterativeProposal::IterativeProposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_) - : Proposal(pDataset, features_, className_) {} - - void IterativeProposal::setHyperparameters(const nlohmann::json& hyperparameters_) { - // First set base Proposal hyperparameters - Proposal::setHyperparameters(hyperparameters_); - - // Then set IterativeProposal specific hyperparameters - if (hyperparameters_.contains("max_iterations")) { - convergence_params.maxIterations = hyperparameters_["max_iterations"]; - } - if (hyperparameters_.contains("tolerance")) { - convergence_params.tolerance = hyperparameters_["tolerance"]; - } - if (hyperparameters_.contains("convergence_metric")) { - convergence_params.convergenceMetric = hyperparameters_["convergence_metric"]; - } - if (hyperparameters_.contains("verbose_convergence")) { - convergence_params.verbose = hyperparameters_["verbose_convergence"]; - } - } - - template - map> IterativeProposal::iterativeLocalDiscretization( - const torch::Tensor& y, - Classifier* classifier, - const torch::Tensor& dataset, - const std::vector& features, - const std::string& className, - const map>& initialStates, - double smoothing - ) { - // Phase 1: Initial discretization (same as original) - auto currentStates = fit_local_discretization(y); - - double previousValue = -std::numeric_limits::infinity(); - double currentValue = 0.0; - - if (convergence_params.verbose) { - std::cout << "Starting iterative local discretization with " - << convergence_params.maxIterations << " max iterations" << std::endl; - } - - for (int iteration = 0; iteration < convergence_params.maxIterations; ++iteration) { - if (convergence_params.verbose) { - std::cout << "Iteration " << (iteration + 1) << "/" << convergence_params.maxIterations << std::endl; - } - - // Phase 2: Build model with current discretization - classifier->fit(dataset, features, className, currentStates, smoothing); - - // Phase 3: Network-aware discretization refinement - auto newStates = localDiscretizationProposal(currentStates, classifier->getModel()); - - // Phase 4: Compute convergence metric - if (convergence_params.convergenceMetric == "likelihood") { - currentValue = computeLogLikelihood(classifier->getModel(), dataset); - } else if (convergence_params.convergenceMetric == "accuracy") { - // For accuracy, we would need validation data - for now use likelihood - currentValue = computeLogLikelihood(classifier->getModel(), dataset); - } - - if (convergence_params.verbose) { - std::cout << " " << convergence_params.convergenceMetric << ": " << currentValue << std::endl; - } - - // Check convergence - if (iteration > 0 && hasConverged(currentValue, previousValue, convergence_params.convergenceMetric)) { - if (convergence_params.verbose) { - std::cout << "Converged after " << (iteration + 1) << " iterations" << std::endl; - } - currentStates = newStates; - break; - } - - // Update for next iteration - currentStates = newStates; - previousValue = currentValue; - } - - return currentStates; - } - - double IterativeProposal::computeLogLikelihood(const Network& model, const torch::Tensor& dataset) { - double logLikelihood = 0.0; - int n_samples = dataset.size(0); - int n_features = dataset.size(1); - - for (int i = 0; i < n_samples; ++i) { - double sampleLogLikelihood = 0.0; - - // Get class value for this sample - int classValue = dataset[i][n_features - 1].item(); - - // Compute log-likelihood for each feature given its parents and class - for (const auto& node : model.getNodes()) { - if (node.getName() == model.getClassName()) { - // For class node, add log P(class) - auto classCounts = node.getCPT(); - double classProb = classCounts[classValue] / dataset.size(0); - sampleLogLikelihood += std::log(std::max(classProb, 1e-10)); - } else { - // For feature nodes, add log P(feature | parents, class) - int featureIdx = std::distance(model.getFeatures().begin(), - std::find(model.getFeatures().begin(), - model.getFeatures().end(), - node.getName())); - int featureValue = dataset[i][featureIdx].item(); - - // Simplified probability computation - in practice would need full CPT lookup - double featureProb = 0.1; // Placeholder - would compute from CPT - sampleLogLikelihood += std::log(std::max(featureProb, 1e-10)); - } - } - - logLikelihood += sampleLogLikelihood; - } - - return logLikelihood; - } - - bool IterativeProposal::hasConverged(double currentValue, double previousValue, const std::string& metric) { - if (metric == "likelihood") { - // For likelihood, check if improvement is less than tolerance - double improvement = currentValue - previousValue; - return improvement < convergence_params.tolerance; - } else if (metric == "accuracy") { - // For accuracy, check if change is less than tolerance - double change = std::abs(currentValue - previousValue); - return change < convergence_params.tolerance; - } - return false; - } - - // Explicit template instantiation for common classifier types - template map> IterativeProposal::iterativeLocalDiscretization( - const torch::Tensor&, Classifier*, const torch::Tensor&, const std::vector&, - const std::string&, const map>&, double); -} \ No newline at end of file diff --git a/bayesnet/classifiers/IterativeProposal.h b/bayesnet/classifiers/IterativeProposal.h deleted file mode 100644 index 4453c2c..0000000 --- a/bayesnet/classifiers/IterativeProposal.h +++ /dev/null @@ -1,50 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#ifndef ITERATIVE_PROPOSAL_H -#define ITERATIVE_PROPOSAL_H - -#include "Proposal.h" -#include "bayesnet/network/Network.h" -#include - -namespace bayesnet { - class IterativeProposal : public Proposal { - public: - IterativeProposal(torch::Tensor& pDataset, std::vector& features_, std::string& className_); - void setHyperparameters(const nlohmann::json& hyperparameters_); - - protected: - template - map> iterativeLocalDiscretization( - const torch::Tensor& y, - Classifier* classifier, - const torch::Tensor& dataset, - const std::vector& features, - const std::string& className, - const map>& initialStates, - double smoothing = 1.0 - ); - - // Convergence parameters - struct { - int maxIterations = 10; - double tolerance = 1e-6; - std::string convergenceMetric = "likelihood"; // "likelihood" or "accuracy" - bool verbose = false; - } convergence_params; - - nlohmann::json validHyperparameters_iter = { - "max_iterations", "tolerance", "convergence_metric", "verbose_convergence" - }; - - private: - double computeLogLikelihood(const Network& model, const torch::Tensor& dataset); - bool hasConverged(double currentValue, double previousValue, const std::string& metric); - }; -} - -#endif \ No newline at end of file diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index e112c1c..541e005 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -33,12 +33,13 @@ namespace bayesnet { className = className_; Xf = X_; y = y_; - // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - states = fit_local_discretization(y); - // We have discretized the input data - // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network + + // Use iterative local discretization instead of the two-phase approach + states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); + + // Final fit with converged discretization KDB::fit(dataset, features, className, states, smoothing); - states = localDiscretizationProposal(states, model); + return *this; } torch::Tensor KDBLd::predict(torch::Tensor& X) diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index 3ef8a78..aa0698d 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -5,6 +5,9 @@ // *************************************************************** #include "Proposal.h" +#include +#include +#include namespace bayesnet { Proposal::Proposal(torch::Tensor& dataset_, std::vector& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) @@ -38,6 +41,15 @@ namespace bayesnet { throw std::invalid_argument("Invalid discretization algorithm: " + algorithm.get()); } } + // Convergence parameters + if (hyperparameters.contains("max_iterations")) { + convergence_params.maxIterations = hyperparameters["max_iterations"]; + hyperparameters.erase("max_iterations"); + } + if (hyperparameters.contains("verbose_convergence")) { + convergence_params.verbose = hyperparameters["verbose_convergence"]; + hyperparameters.erase("verbose_convergence"); + } if (!hyperparameters.empty()) { throw std::invalid_argument("Invalid hyperparameters for Proposal: " + hyperparameters.dump()); } @@ -163,4 +175,94 @@ namespace bayesnet { } return yy; } + + template + map> Proposal::iterativeLocalDiscretization( + const torch::Tensor& y, + Classifier* classifier, + const torch::Tensor& dataset, + const std::vector& features, + const std::string& className, + const map>& initialStates, + Smoothing_t smoothing + ) + { + // Phase 1: Initial discretization (same as original) + auto currentStates = fit_local_discretization(y); + auto previousModel = Network(); + + if (convergence_params.verbose) { + std::cout << "Starting iterative local discretization with " + << convergence_params.maxIterations << " max iterations" << std::endl; + } + + for (int iteration = 0; iteration < convergence_params.maxIterations; ++iteration) { + if (convergence_params.verbose) { + std::cout << "Iteration " << (iteration + 1) << "/" << convergence_params.maxIterations << std::endl; + } + + // Phase 2: Build model with current discretization + classifier->fit(dataset, features, className, currentStates, smoothing); + + // Phase 3: Network-aware discretization refinement + currentStates = localDiscretizationProposal(currentStates, classifier->model); + + // Check convergence + if (iteration > 0 && previousModel == classifier->model) { + if (convergence_params.verbose) { + std::cout << "Converged after " << (iteration + 1) << " iterations" << std::endl; + } + break; + } + + // Update for next iteration + previousModel = classifier->model; + } + + return currentStates; + } + + double Proposal::computeLogLikelihood(Network& model, const torch::Tensor& dataset) + { + double logLikelihood = 0.0; + int n_samples = dataset.size(0); + int n_features = dataset.size(1); + + for (int i = 0; i < n_samples; ++i) { + double sampleLogLikelihood = 0.0; + + // Get class value for this sample + int classValue = dataset[i][n_features - 1].item(); + + // Compute log-likelihood for each feature given its parents and class + for (const auto& node : model.getNodes()) { + if (node.first == model.getClassName()) { + // For class node, add log P(class) + auto classCounts = node.second->getCPT(); + double classProb = classCounts[classValue].item() / dataset.size(0); + sampleLogLikelihood += std::log(std::max(classProb, 1e-10)); + } else { + // For feature nodes, add log P(feature | parents, class) + int featureIdx = std::distance(model.getFeatures().begin(), + std::find(model.getFeatures().begin(), + model.getFeatures().end(), + node.first)); + int featureValue = dataset[i][featureIdx].item(); + + // Simplified probability computation - in practice would need full CPT lookup + double featureProb = 0.1; // Placeholder - would compute from CPT + sampleLogLikelihood += std::log(std::max(featureProb, 1e-10)); + } + } + + logLikelihood += sampleLogLikelihood; + } + + return logLikelihood; + } + + // Explicit template instantiation for common classifier types + // template map> Proposal::iterativeLocalDiscretization( + // const torch::Tensor&, Classifier*, const torch::Tensor&, const std::vector&, + // const std::string&, const map>&, Smoothing_t); } diff --git a/bayesnet/classifiers/Proposal.h b/bayesnet/classifiers/Proposal.h index 6823a38..150508a 100644 --- a/bayesnet/classifiers/Proposal.h +++ b/bayesnet/classifiers/Proposal.h @@ -25,18 +25,43 @@ namespace bayesnet { torch::Tensor prepareX(torch::Tensor& X); map> localDiscretizationProposal(const map>& states, Network& model); map> fit_local_discretization(const torch::Tensor& y); + + // Iterative discretization method + template + map> iterativeLocalDiscretization( + const torch::Tensor& y, + Classifier* classifier, + const torch::Tensor& dataset, + const std::vector& features, + const std::string& className, + const map>& initialStates, + const Smoothing_t smoothing + ); + torch::Tensor Xf; // X continuous nxm tensor torch::Tensor y; // y discrete nx1 tensor map> discretizers; + // MDLP parameters struct { size_t min_length = 3; // Minimum length of the interval to consider it in mdlp float proposed_cuts = 0.0; // Proposed cuts for the Discretization algorithm int max_depth = std::numeric_limits::max(); // Maximum depth of the MDLP tree } ld_params; - nlohmann::json validHyperparameters_ld = { "ld_algorithm", "ld_proposed_cuts", "mdlp_min_length", "mdlp_max_depth" }; + + // Convergence parameters + struct { + int maxIterations = 10; + bool verbose = false; + } convergence_params; + + nlohmann::json validHyperparameters_ld = { + "ld_algorithm", "ld_proposed_cuts", "mdlp_min_length", "mdlp_max_depth", + "max_iterations", "verbose_convergence" + }; private: std::vector factorize(const std::vector& labels_t); + double computeLogLikelihood(Network& model, const torch::Tensor& dataset); torch::Tensor& pDataset; // (n+1)xm tensor std::vector& pFeatures; std::string& pClassName; diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index f9418da..d5a8dda 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -15,14 +15,14 @@ namespace bayesnet { className = className_; Xf = X_; y = y_; - // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y - states = fit_local_discretization(y); - // We have discretized the input data - // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network + + // Use iterative local discretization instead of the two-phase approach + states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); + + // Final fit with converged discretization TAN::fit(dataset, features, className, states, smoothing); - states = localDiscretizationProposal(states, model); + return *this; - } torch::Tensor TANLd::predict(torch::Tensor& X) { diff --git a/bayesnet/classifiers/TANLdi.cc b/bayesnet/classifiers/TANLdi.cc deleted file mode 100644 index 4df5d5c..0000000 --- a/bayesnet/classifiers/TANLdi.cc +++ /dev/null @@ -1,45 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#include "TANLdi.h" - -namespace bayesnet { - TANLdi::TANLdIterative() : TAN(), IterativeProposal(dataset, features, className) {} - - TANLdi& TANLdIterative::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector& features_, const std::string& className_, map>& states_, const Smoothing_t smoothing) - { - checkInput(X_, y_); - features = features_; - className = className_; - Xf = X_; - y = y_; - - // Use iterative local discretization instead of the two-phase approach - states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing); - - // Final fit with converged discretization - TAN::fit(dataset, features, className, states, smoothing); - - return *this; - } - - torch::Tensor TANLdi::predict(torch::Tensor& X) - { - auto Xt = prepareX(X); - return TAN::predict(Xt); - } - - torch::Tensor TANLdi::predict_proba(torch::Tensor& X) - { - auto Xt = prepareX(X); - return TAN::predict_proba(Xt); - } - - std::vector TANLdi::graph(const std::string& name) const - { - return TAN::graph(name); - } -} \ No newline at end of file diff --git a/bayesnet/classifiers/TANLdi.h b/bayesnet/classifiers/TANLdi.h deleted file mode 100644 index f850a59..0000000 --- a/bayesnet/classifiers/TANLdi.h +++ /dev/null @@ -1,24 +0,0 @@ -// *************************************************************** -// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez -// SPDX-FileType: SOURCE -// SPDX-License-Identifier: MIT -// *************************************************************** - -#ifndef TANLDI_H -#define TANLDI_H -#include "TAN.h" -#include "IterativeProposal.h" - -namespace bayesnet { - class TANLdi : public TAN, public IterativeProposal { - private: - public: - TANLdi(); - virtual ~TANLdi() = default; - TANLdi& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; - std::vector graph(const std::string& name = "TANLdi") const override; - torch::Tensor predict(torch::Tensor& X) override; - torch::Tensor predict_proba(torch::Tensor& X) override; - }; -} -#endif // !TANLDI_H \ No newline at end of file diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 07048f3..6d41f2c 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -17,14 +17,90 @@ namespace bayesnet { Network::Network() : fitted{ false }, classNumStates{ 0 } { } - Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()), - fitted(other.fitted), samples(other.samples) + Network::Network(const Network& other) + : features(other.features), className(other.className), classNumStates(other.classNumStates), + fitted(other.fitted) { - if (samples.defined()) - samples = samples.clone(); + // Deep copy the samples tensor + if (other.samples.defined()) { + samples = other.samples.clone(); + } + + // First, create all nodes (without relationships) for (const auto& node : other.nodes) { nodes[node.first] = std::make_unique(*node.second); } + + // Second, reconstruct the relationships between nodes + for (const auto& node : other.nodes) { + const std::string& nodeName = node.first; + Node* originalNode = node.second.get(); + Node* newNode = nodes[nodeName].get(); + + // Reconstruct parent relationships + for (Node* parent : originalNode->getParents()) { + const std::string& parentName = parent->getName(); + if (nodes.find(parentName) != nodes.end()) { + newNode->addParent(nodes[parentName].get()); + } + } + + // Reconstruct child relationships + for (Node* child : originalNode->getChildren()) { + const std::string& childName = child->getName(); + if (nodes.find(childName) != nodes.end()) { + newNode->addChild(nodes[childName].get()); + } + } + } + } + + Network& Network::operator=(const Network& other) + { + if (this != &other) { + // Clear existing state + nodes.clear(); + features = other.features; + className = other.className; + classNumStates = other.classNumStates; + fitted = other.fitted; + + // Deep copy the samples tensor + if (other.samples.defined()) { + samples = other.samples.clone(); + } else { + samples = torch::Tensor(); + } + + // First, create all nodes (without relationships) + for (const auto& node : other.nodes) { + nodes[node.first] = std::make_unique(*node.second); + } + + // Second, reconstruct the relationships between nodes + for (const auto& node : other.nodes) { + const std::string& nodeName = node.first; + Node* originalNode = node.second.get(); + Node* newNode = nodes[nodeName].get(); + + // Reconstruct parent relationships + for (Node* parent : originalNode->getParents()) { + const std::string& parentName = parent->getName(); + if (nodes.find(parentName) != nodes.end()) { + newNode->addParent(nodes[parentName].get()); + } + } + + // Reconstruct child relationships + for (Node* child : originalNode->getChildren()) { + const std::string& childName = child->getName(); + if (nodes.find(childName) != nodes.end()) { + newNode->addChild(nodes[childName].get()); + } + } + } + } + return *this; } void Network::initialize() { @@ -503,4 +579,41 @@ namespace bayesnet { } return oss.str(); } + + bool Network::operator==(const Network& other) const + { + // Compare number of nodes + if (nodes.size() != other.nodes.size()) { + return false; + } + + // Compare if all node names exist in both networks + for (const auto& node : nodes) { + if (other.nodes.find(node.first) == other.nodes.end()) { + return false; + } + } + + // Compare edges (topology) + auto thisEdges = getEdges(); + auto otherEdges = other.getEdges(); + + // Compare number of edges + if (thisEdges.size() != otherEdges.size()) { + return false; + } + + // Sort both edge lists for comparison + std::sort(thisEdges.begin(), thisEdges.end()); + std::sort(otherEdges.begin(), otherEdges.end()); + + // Compare each edge + for (size_t i = 0; i < thisEdges.size(); ++i) { + if (thisEdges[i] != otherEdges[i]) { + return false; + } + } + + return true; + } } diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index efee01e..ad7a940 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -17,7 +17,8 @@ namespace bayesnet { class Network { public: Network(); - explicit Network(const Network&); + Network(const Network& other); + Network& operator=(const Network& other); ~Network() = default; torch::Tensor& getSamples(); void addNode(const std::string&); @@ -47,6 +48,7 @@ namespace bayesnet { void initialize(); std::string dump_cpt() const; inline std::string version() { return { project_version.begin(), project_version.end() }; } + bool operator==(const Network& other) const; private: std::map> nodes; bool fitted; diff --git a/bayesnet/network/Node.cc b/bayesnet/network/Node.cc index b94b142..ef1a79d 100644 --- a/bayesnet/network/Node.cc +++ b/bayesnet/network/Node.cc @@ -13,6 +13,41 @@ namespace bayesnet { : name(name) { } + + Node::Node(const Node& other) + : name(other.name), numStates(other.numStates), dimensions(other.dimensions) + { + // Deep copy the CPT tensor + if (other.cpTable.defined()) { + cpTable = other.cpTable.clone(); + } + // Note: parent and children pointers are NOT copied here + // They will be reconstructed by the Network copy constructor + // to maintain proper object relationships + } + + Node& Node::operator=(const Node& other) + { + if (this != &other) { + name = other.name; + numStates = other.numStates; + dimensions = other.dimensions; + + // Deep copy the CPT tensor + if (other.cpTable.defined()) { + cpTable = other.cpTable.clone(); + } else { + cpTable = torch::Tensor(); + } + + // Clear existing relationships + parents.clear(); + children.clear(); + // Note: parent and children pointers are NOT copied here + // They must be reconstructed to maintain proper object relationships + } + return *this; + } void Node::clear() { parents.clear(); diff --git a/bayesnet/network/Node.h b/bayesnet/network/Node.h index b950d70..ef0eeed 100644 --- a/bayesnet/network/Node.h +++ b/bayesnet/network/Node.h @@ -14,6 +14,9 @@ namespace bayesnet { class Node { public: explicit Node(const std::string&); + Node(const Node& other); + Node& operator=(const Node& other); + ~Node() = default; void clear(); void addParent(Node*); void addChild(Node*); diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index 3f17d6a..c024f32 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -338,6 +338,188 @@ TEST_CASE("Test Bayesian Network", "[Network]") REQUIRE_THROWS_AS(net5.addEdge("A", "B"), std::logic_error); REQUIRE_THROWS_WITH(net5.addEdge("A", "B"), "Cannot add edge to a fitted network. Initialize first."); } + SECTION("Test assignment operator") + { + INFO("Test assignment operator"); + // Create original network + auto net1 = bayesnet::Network(); + buildModel(net1, raw.features, raw.className); + net1.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); + + // Create empty network and assign + auto net2 = bayesnet::Network(); + net2.addNode("TempNode"); // Add something to make sure it gets cleared + net2 = net1; + + // Verify they are equal + REQUIRE(net1.getFeatures() == net2.getFeatures()); + REQUIRE(net1.getEdges() == net2.getEdges()); + REQUIRE(net1.getNumEdges() == net2.getNumEdges()); + REQUIRE(net1.getStates() == net2.getStates()); + REQUIRE(net1.getClassName() == net2.getClassName()); + REQUIRE(net1.getClassNumStates() == net2.getClassNumStates()); + REQUIRE(net1.getSamples().size(0) == net2.getSamples().size(0)); + REQUIRE(net1.getSamples().size(1) == net2.getSamples().size(1)); + REQUIRE(net1.getNodes().size() == net2.getNodes().size()); + + // Verify topology equality + REQUIRE(net1 == net2); + + // Verify they are separate objects by modifying one + net2.initialize(); + net2.addNode("OnlyInNet2"); + REQUIRE(net1.getNodes().size() != net2.getNodes().size()); + REQUIRE_FALSE(net1 == net2); + } + SECTION("Test self assignment") + { + INFO("Test self assignment"); + buildModel(net, raw.features, raw.className); + net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); + + int original_edges = net.getNumEdges(); + int original_nodes = net.getNodes().size(); + + // Self assignment should not corrupt the network + net = net; + + REQUIRE(net.getNumEdges() == original_edges); + REQUIRE(net.getNodes().size() == original_nodes); + REQUIRE(net.getFeatures() == raw.features); + REQUIRE(net.getClassName() == raw.className); + } + SECTION("Test operator== topology comparison") + { + INFO("Test operator== topology comparison"); + + // Test 1: Two identical networks + auto net1 = bayesnet::Network(); + auto net2 = bayesnet::Network(); + + net1.addNode("A"); + net1.addNode("B"); + net1.addNode("C"); + net1.addEdge("A", "B"); + net1.addEdge("B", "C"); + + net2.addNode("A"); + net2.addNode("B"); + net2.addNode("C"); + net2.addEdge("A", "B"); + net2.addEdge("B", "C"); + + REQUIRE(net1 == net2); + + // Test 2: Different nodes + auto net3 = bayesnet::Network(); + net3.addNode("A"); + net3.addNode("D"); // Different node + REQUIRE_FALSE(net1 == net3); + + // Test 3: Same nodes, different edges + auto net4 = bayesnet::Network(); + net4.addNode("A"); + net4.addNode("B"); + net4.addNode("C"); + net4.addEdge("A", "C"); // Different topology + net4.addEdge("B", "C"); + REQUIRE_FALSE(net1 == net4); + + // Test 4: Empty networks + auto net5 = bayesnet::Network(); + auto net6 = bayesnet::Network(); + REQUIRE(net5 == net6); + + // Test 5: Same topology, different edge order + auto net7 = bayesnet::Network(); + net7.addNode("A"); + net7.addNode("B"); + net7.addNode("C"); + net7.addEdge("B", "C"); // Add edges in different order + net7.addEdge("A", "B"); + REQUIRE(net1 == net7); // Should still be equal + } + SECTION("Test RAII compliance with smart pointers") + { + INFO("Test RAII compliance with smart pointers"); + + std::unique_ptr net1 = std::make_unique(); + buildModel(*net1, raw.features, raw.className); + net1->fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing); + + // Test that copy constructor works with smart pointers + std::unique_ptr net2 = std::make_unique(*net1); + + REQUIRE(*net1 == *net2); + REQUIRE(net1->getNumEdges() == net2->getNumEdges()); + REQUIRE(net1->getNodes().size() == net2->getNodes().size()); + + // Destroy original + net1.reset(); + + // net2 should still be valid and functional + REQUIRE_NOTHROW(net2->addNode("NewNode")); + REQUIRE(net2->getNodes().count("NewNode") == 1); + + // Test predictions still work + std::vector> test = { {1, 2, 0, 1, 1} }; + REQUIRE_NOTHROW(net2->predict(test)); + } + SECTION("Test complex topology copy") + { + INFO("Test complex topology copy"); + + auto original = bayesnet::Network(); + + // Create a more complex network + original.addNode("Root"); + original.addNode("Child1"); + original.addNode("Child2"); + original.addNode("Grandchild1"); + original.addNode("Grandchild2"); + original.addNode("Grandchild3"); + + original.addEdge("Root", "Child1"); + original.addEdge("Root", "Child2"); + original.addEdge("Child1", "Grandchild1"); + original.addEdge("Child1", "Grandchild2"); + original.addEdge("Child2", "Grandchild3"); + + // Copy it + auto copy = original; + + // Verify topology is identical + REQUIRE(original == copy); + REQUIRE(original.getNodes().size() == copy.getNodes().size()); + REQUIRE(original.getNumEdges() == copy.getNumEdges()); + + // Verify edges are properly reconstructed + auto originalEdges = original.getEdges(); + auto copyEdges = copy.getEdges(); + REQUIRE(originalEdges.size() == copyEdges.size()); + + // Verify node relationships are properly copied + for (const auto& nodePair : original.getNodes()) { + const std::string& nodeName = nodePair.first; + auto* originalNode = nodePair.second.get(); + auto* copyNode = copy.getNodes().at(nodeName).get(); + + REQUIRE(originalNode->getParents().size() == copyNode->getParents().size()); + REQUIRE(originalNode->getChildren().size() == copyNode->getChildren().size()); + + // Verify parent names match + for (size_t i = 0; i < originalNode->getParents().size(); ++i) { + REQUIRE(originalNode->getParents()[i]->getName() == + copyNode->getParents()[i]->getName()); + } + + // Verify child names match + for (size_t i = 0; i < originalNode->getChildren().size(); ++i) { + REQUIRE(originalNode->getChildren()[i]->getName() == + copyNode->getChildren()[i]->getName()); + } + } + } } TEST_CASE("Test and empty Node", "[Network]")