remove unneeded files

Complete proposal
First approach with derived class
2025-07-07 00:38:00 +02:00 · 2025-07-07 00:37:16 +02:00 · 2025-07-06 18:49:05 +02:00 · 2025-07-04 12:19:58 +02:00 · 2025-07-04 11:56:55 +02:00 · 2025-07-03 09:55:05 +02:00
13 changed files with 765 additions and 74 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  - *mdlp_min_length*: minimum length of a partition in MDLP algorithm to be evaluated for partition.
  - *mdlp_max_depth*: maximum level of recursion in MDLP algorithm.
 - Remove vcpkg as a dependency manager, now the library is built with Conan package manager and CMake.
+- Add `build_type` option to the sample target in the Makefile to allow building in *Debug* or *Release* mode. Default is *Debug*.

 ## [1.1.1] - 2025-05-20

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,16 +18,18 @@ set(CMAKE_CXX_EXTENSIONS                  OFF)
 set(CMAKE_EXPORT_COMPILE_COMMANDS          ON)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
-if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
-endif()
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
+
+
+if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+    MESSAGE("Debug mode")
+else(CMAKE_BUILD_TYPE STREQUAL "Debug")
+    MESSAGE("Release mode")
+endif (CMAKE_BUILD_TYPE STREQUAL "Debug")

 # Options
 # -------
 option(ENABLE_TESTING "Unit testing build"                       OFF)
-option(CODE_COVERAGE "Collect coverage from test library"        OFF)
-
    
 find_package(Torch CONFIG REQUIRED)
 if(NOT TARGET torch::torch)
@@ -63,23 +65,21 @@ target_link_libraries(bayesnet
    arff-files::arff-files
 )

+
+
 # Testing
 # -------
-if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-    MESSAGE("Debug mode")
-else(CMAKE_BUILD_TYPE STREQUAL "Debug")
-    MESSAGE("Release mode")
-endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
 if (ENABLE_TESTING)
    MESSAGE(STATUS "Testing enabled")
    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors")
+    if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
+    endif()
    find_package(Catch2 CONFIG REQUIRED)
    find_package(arff-files CONFIG REQUIRED)
    enable_testing()
    include(CTest)
    add_subdirectory(tests)
-else(ENABLE_TESTING)
-    
 endif (ENABLE_TESTING)

 # Installation
--- a/78
+++ b/78
@@ -1,6 +1,6 @@
 SHELL := /bin/bash
 .DEFAULT_GOAL := help
-.PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean updatebadge doc doc-install init clean-test conan-debug conan-release conan-create conan-upload conan-clean conan-sample
+.PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean updatebadge doc doc-install init clean-test debug release conan-create conan-upload conan-clean sample

 f_release = build_Release
 f_debug = build_Debug
@@ -17,6 +17,14 @@ mansrcdir = docs/man3
 mandestdir = /usr/local/share/man
 sed_command_link = 's/e">LCOV -/e"><a href="https:\/\/rmontanana.github.io\/bayesnet">Back to manual<\/a> LCOV -/g'
 sed_command_diagram = 's/Diagram"/Diagram" width="100%" height="100%" /g'
+# Set the number of parallel jobs to the number of available processors minus 7
+CPUS := $(shell getconf _NPROCESSORS_ONLN 2>/dev/null \
+                 || nproc --all 2>/dev/null \
+                 || sysctl -n hw.ncpu)
+
+# --- Your desired job count: CPUs – 7, but never less than 1 --------------
+JOBS := $(shell n=$(CPUS); [ $${n} -gt 7 ] && echo $$((n-7)) || echo 1)
+

 define ClearTests
 	@for t in $(test_targets); do \
@@ -31,6 +39,15 @@ define ClearTests
 	fi ; 
 endef

+define setup_target
+	@echo ">>> Setup the project for $(1)..."
+	@if [ -d $(2) ]; then rm -fr $(2); fi
+	@conan install . --build=missing -of $(2) -s build_type=$(1)
+	@cmake -S . -B $(2) -DCMAKE_TOOLCHAIN_FILE=$(2)/build/$(1)/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=$(1) -D$(3)
+	@echo ">>> Will build using $(JOBS) parallel jobs"
+	@echo ">>> Done"
+endef
+
 setup: ## Install dependencies for tests and coverage
 	@if [ "$(shell uname)" = "Darwin" ]; then \
 		brew install gcovr; \
@@ -57,11 +74,17 @@ clean: ## Clean the project
 # Build targets
 # =============

+debug: ## Setup debug version using Conan
+	@$(call setup_target,"Debug","$(f_debug)","ENABLE_TESTING=ON")
+
+release: ## Setup release version using Conan
+	@$(call setup_target,"Release","$(f_release)","ENABLE_TESTING=OFF")
+
 buildd: ## Build the debug targets
-	cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
+	cmake --build $(f_debug) --config Debug -t $(app_targets) --parallel $(JOBS)

 buildr: ## Build the release targets
-	cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
+	cmake --build $(f_release) --config Release -t $(app_targets) --parallel $(JOBS)


 # Install targets
@@ -91,7 +114,7 @@ opt = ""
 test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
 	@echo ">>> Running BayesNet tests...";
 	@$(MAKE) clean-test
-	@cmake --build $(f_debug) -t $(test_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
+	@cmake --build $(f_debug) -t $(test_targets) --parallel $(JOBS)
 	@for t in $(test_targets); do \
 		echo ">>> Running $$t...";\
 		if [ -f $(f_debug)/tests/$$t ]; then \
@@ -160,6 +183,7 @@ doc: ## Generate documentation
 	@echo ">>> Done";

 diagrams: ## Create an UML class diagram & dependency of the project (diagrams/BayesNet.png)
+	@echo ">>> Creating diagrams..."
 	@which $(plantuml) || (echo ">>> Please install plantuml"; exit 1)
 	@which $(dot) || (echo ">>> Please install graphviz"; exit 1)
 	@which $(clang-uml) || (echo ">>> Please install clang-uml"; exit 1)
@@ -172,6 +196,7 @@ diagrams: ## Create an UML class diagram & dependency of the project (diagrams/B
 	$(MAKE) debug
 	cd $(f_debug) && cmake .. --graphviz=dependency.dot 
 	@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
+	@echo ">>> Done";

 docdir = ""
 doc-install: ## Install documentation
@@ -190,61 +215,28 @@ doc-install: ## Install documentation
 # Conan package manager targets
 # =============================

-debug:             ## Build debug version using Conan
-	@echo ">>> Building *Debug* BayesNet with Conan..."
-	@rm -rf $(f_debug)            # wipe previous tree
-	@conan install . \
-	            -s build_type=Debug \
-	            --build=missing \
-	            -of $(f_debug) \
-				--profile=debug
-	@cmake -S . -B $(f_debug) \
-	       -DCMAKE_BUILD_TYPE=Debug \
-	       -DENABLE_TESTING=ON \
-	       -DCODE_COVERAGE=ON \
-	       -DCMAKE_TOOLCHAIN_FILE=$(f_debug)/build/Debug/generators/conan_toolchain.cmake
-	@echo ">>> Done"
-
-release: ## Build release version using Conan
-	@echo ">>> Building Release BayesNet with Conan..."
-	@conan install . \
-	            -s build_type=Release \
-	            --build=missing \
-	            -of $(f_debug) \
-				--profile=release
-	@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
-	@mkdir $(f_release)
-	@conan install . -s build_type=Release --build=missing -of $(f_release)
-	@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$(f_release)/build/Release/generators/conan_toolchain.cmake
-	@echo ">>> Done"
-
 conan-create: ## Create Conan package
 	@echo ">>> Creating Conan package..."
-	@conan create . --build=missing -tf "" --profile=release
-	@conan create . --build=missing -tf "" --profile=debug -o "&:enable_coverage=False" -o "&:enable_testing=False"
-	@echo ">>> Done"
-
-profile ?= release
-remote ?= Cimmeria
-conan-upload: ## Upload package to Conan remote (profile=release remote=Cimmeria)
-	@echo ">>> Uploading to Conan remote $(remote) with profile $(profile)..."
-	@conan upload bayesnet/$(grep version conanfile.py | cut -d'"' -f2) -r $(remote) --confirm
+	@conan create . --build=missing -tf "" -s:a build_type=Release
+	@conan create . --build=missing -tf "" -s:a build_type=Debug -o "&:enable_coverage=False" -o "&:enable_testing=False"
 	@echo ">>> Done"

 conan-clean: ## Clean Conan cache and build folders
 	@echo ">>> Cleaning Conan cache and build folders..."
 	@conan remove "*" --confirm
+	@conan cache clean
 	@if test -d "$(f_release)" ; then rm -rf "$(f_release)"; fi
 	@if test -d "$(f_debug)" ; then rm -rf "$(f_debug)"; fi
 	@echo ">>> Done"

 fname = "tests/data/iris.arff"
 model = "TANLd"
+build_type = "Debug"
 sample: ## Build sample with Conan
 	@echo ">>> Building Sample with Conan...";
 	@if [ -d ./sample/build ]; then rm -rf ./sample/build; fi
-	@cd sample && conan install . --output-folder=build --build=missing
-	@cd sample && cmake -B build -S . -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=build/conan_toolchain.cmake && \
+	@cd sample && conan install . --output-folder=build --build=missing -s build_type=$(build_type) -o "&:enable_coverage=False" -o "&:enable_testing=False"
+	@cd sample && cmake -B build -S . -DCMAKE_BUILD_TYPE=$(build_type) -DCMAKE_TOOLCHAIN_FILE=build/conan_toolchain.cmake && \
 	cmake --build build -t bayesnet_sample
 	sample/build/bayesnet_sample $(fname) $(model)
 	@echo ">>> Done";
--- a/bayesnet/classifiers/KDBLd.cc
+++ b/bayesnet/classifiers/KDBLd.cc
@@ -33,12 +33,13 @@ namespace bayesnet {
        className = className_;
        Xf = X_;
        y = y_;
-        // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
-        states = fit_local_discretization(y);
-        // We have discretized the input data
-        // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
+        
+        // Use iterative local discretization instead of the two-phase approach
+        states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing);
+        
+        // Final fit with converged discretization
        KDB::fit(dataset, features, className, states, smoothing);
-        states = localDiscretizationProposal(states, model);
+        
        return *this;
    }
    torch::Tensor KDBLd::predict(torch::Tensor& X)
--- a/bayesnet/classifiers/Proposal.cc
+++ b/bayesnet/classifiers/Proposal.cc
@@ -5,6 +5,9 @@
 // ***************************************************************

 #include "Proposal.h"
+#include <iostream>
+#include <cmath>
+#include <limits>

 namespace bayesnet {
    Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_)
@@ -38,6 +41,15 @@ namespace bayesnet {
                throw std::invalid_argument("Invalid discretization algorithm: " + algorithm.get<std::string>());
            }
        }
+        // Convergence parameters
+        if (hyperparameters.contains("max_iterations")) {
+            convergence_params.maxIterations = hyperparameters["max_iterations"];
+            hyperparameters.erase("max_iterations");
+        }
+        if (hyperparameters.contains("verbose_convergence")) {
+            convergence_params.verbose = hyperparameters["verbose_convergence"];
+            hyperparameters.erase("verbose_convergence");
+        }
        if (!hyperparameters.empty()) {
            throw std::invalid_argument("Invalid hyperparameters for Proposal: " + hyperparameters.dump());
        }
@@ -163,4 +175,94 @@ namespace bayesnet {
        }
        return yy;
    }
+
+    template<typename Classifier>
+    map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization(
+        const torch::Tensor& y,
+        Classifier* classifier,
+        const torch::Tensor& dataset,
+        const std::vector<std::string>& features,
+        const std::string& className,
+        const map<std::string, std::vector<int>>& initialStates,
+        Smoothing_t smoothing
+    )
+    {
+        // Phase 1: Initial discretization (same as original)
+        auto currentStates = fit_local_discretization(y);
+        auto previousModel = Network();
+
+        if (convergence_params.verbose) {
+            std::cout << "Starting iterative local discretization with "
+                << convergence_params.maxIterations << " max iterations" << std::endl;
+        }
+
+        for (int iteration = 0; iteration < convergence_params.maxIterations; ++iteration) {
+            if (convergence_params.verbose) {
+                std::cout << "Iteration " << (iteration + 1) << "/" << convergence_params.maxIterations << std::endl;
+            }
+
+            // Phase 2: Build model with current discretization
+            classifier->fit(dataset, features, className, currentStates, smoothing);
+
+            // Phase 3: Network-aware discretization refinement
+            currentStates = localDiscretizationProposal(currentStates, classifier->model);
+
+            // Check convergence
+            if (iteration > 0 && previousModel == classifier->model) {
+                if (convergence_params.verbose) {
+                    std::cout << "Converged after " << (iteration + 1) << " iterations" << std::endl;
+                }
+                break;
+            }
+
+            // Update for next iteration
+            previousModel = classifier->model;
+        }
+
+        return currentStates;
+    }
+
+    double Proposal::computeLogLikelihood(Network& model, const torch::Tensor& dataset)
+    {
+        double logLikelihood = 0.0;
+        int n_samples = dataset.size(0);
+        int n_features = dataset.size(1);
+
+        for (int i = 0; i < n_samples; ++i) {
+            double sampleLogLikelihood = 0.0;
+
+            // Get class value for this sample
+            int classValue = dataset[i][n_features - 1].item<int>();
+
+            // Compute log-likelihood for each feature given its parents and class
+            for (const auto& node : model.getNodes()) {
+                if (node.first == model.getClassName()) {
+                    // For class node, add log P(class)
+                    auto classCounts = node.second->getCPT();
+                    double classProb = classCounts[classValue].item<double>() / dataset.size(0);
+                    sampleLogLikelihood += std::log(std::max(classProb, 1e-10));
+                } else {
+                    // For feature nodes, add log P(feature | parents, class)
+                    int featureIdx = std::distance(model.getFeatures().begin(),
+                        std::find(model.getFeatures().begin(),
+                            model.getFeatures().end(),
+                            node.first));
+                    int featureValue = dataset[i][featureIdx].item<int>();
+
+                    // Simplified probability computation - in practice would need full CPT lookup
+                    double featureProb = 0.1; // Placeholder - would compute from CPT
+                    sampleLogLikelihood += std::log(std::max(featureProb, 1e-10));
+                }
+            }
+
+            logLikelihood += sampleLogLikelihood;
+        }
+
+        return logLikelihood;
+    }
+
+    // Explicit template instantiation for common classifier types
+    // template map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization<Classifier>(
+    //     const torch::Tensor&, Classifier*, const torch::Tensor&, const std::vector<std::string>&,
+    //     const std::string&, const map<std::string, std::vector<int>>&, Smoothing_t);
 }
--- a/bayesnet/classifiers/Proposal.h
+++ b/bayesnet/classifiers/Proposal.h
@@ -25,18 +25,43 @@ namespace bayesnet {
        torch::Tensor prepareX(torch::Tensor& X);
        map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
        map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
+
+        // Iterative discretization method
+        template<typename Classifier>
+        map<std::string, std::vector<int>> iterativeLocalDiscretization(
+            const torch::Tensor& y,
+            Classifier* classifier,
+            const torch::Tensor& dataset,
+            const std::vector<std::string>& features,
+            const std::string& className,
+            const map<std::string, std::vector<int>>& initialStates,
+            const Smoothing_t smoothing
+        );
+
        torch::Tensor Xf; // X continuous nxm tensor
        torch::Tensor y; // y discrete nx1 tensor
        map<std::string, std::unique_ptr<mdlp::Discretizer>> discretizers;
+
        // MDLP parameters
        struct {
            size_t min_length = 3; // Minimum length of the interval to consider it in mdlp
            float proposed_cuts = 0.0; // Proposed cuts for the Discretization algorithm
            int max_depth = std::numeric_limits<int>::max(); // Maximum depth of the MDLP tree
        } ld_params;
-        nlohmann::json validHyperparameters_ld = { "ld_algorithm", "ld_proposed_cuts", "mdlp_min_length", "mdlp_max_depth" };
+
+        // Convergence parameters
+        struct {
+            int maxIterations = 10;
+            bool verbose = false;
+        } convergence_params;
+
+        nlohmann::json validHyperparameters_ld = {
+            "ld_algorithm", "ld_proposed_cuts", "mdlp_min_length", "mdlp_max_depth",
+            "max_iterations", "verbose_convergence"
+        };
    private:
        std::vector<int> factorize(const std::vector<std::string>& labels_t);
+        double computeLogLikelihood(Network& model, const torch::Tensor& dataset);
        torch::Tensor& pDataset; // (n+1)xm tensor
        std::vector<std::string>& pFeatures;
        std::string& pClassName;
--- a/bayesnet/classifiers/TANLd.cc
+++ b/bayesnet/classifiers/TANLd.cc
@@ -15,14 +15,14 @@ namespace bayesnet {
        className = className_;
        Xf = X_;
        y = y_;
-        // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
-        states = fit_local_discretization(y);
-        // We have discretized the input data
-        // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
-        TAN::fit(dataset, features, className, states, smoothing);
-        states = localDiscretizationProposal(states, model);
-        return *this;
        
+        // Use iterative local discretization instead of the two-phase approach
+        states = iterativeLocalDiscretization(y, this, dataset, features, className, states_, smoothing);
+        
+        // Final fit with converged discretization
+        TAN::fit(dataset, features, className, states, smoothing);
+        
+        return *this;
    }
    torch::Tensor TANLd::predict(torch::Tensor& X)
    {
--- a/bayesnet/network/Network.cc
+++ b/bayesnet/network/Network.cc
@@ -17,14 +17,90 @@ namespace bayesnet {
    Network::Network() : fitted{ false }, classNumStates{ 0 }
    {
    }
-    Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
-        fitted(other.fitted), samples(other.samples)
+    Network::Network(const Network& other) 
+        : features(other.features), className(other.className), classNumStates(other.classNumStates),
+          fitted(other.fitted)
    {
-        if (samples.defined())
-            samples = samples.clone();
+        // Deep copy the samples tensor
+        if (other.samples.defined()) {
+            samples = other.samples.clone();
+        }
+        
+        // First, create all nodes (without relationships)
        for (const auto& node : other.nodes) {
            nodes[node.first] = std::make_unique<Node>(*node.second);
        }
+        
+        // Second, reconstruct the relationships between nodes
+        for (const auto& node : other.nodes) {
+            const std::string& nodeName = node.first;
+            Node* originalNode = node.second.get();
+            Node* newNode = nodes[nodeName].get();
+            
+            // Reconstruct parent relationships
+            for (Node* parent : originalNode->getParents()) {
+                const std::string& parentName = parent->getName();
+                if (nodes.find(parentName) != nodes.end()) {
+                    newNode->addParent(nodes[parentName].get());
+                }
+            }
+            
+            // Reconstruct child relationships
+            for (Node* child : originalNode->getChildren()) {
+                const std::string& childName = child->getName();
+                if (nodes.find(childName) != nodes.end()) {
+                    newNode->addChild(nodes[childName].get());
+                }
+            }
+        }
+    }
+    
+    Network& Network::operator=(const Network& other)
+    {
+        if (this != &other) {
+            // Clear existing state
+            nodes.clear();
+            features = other.features;
+            className = other.className;
+            classNumStates = other.classNumStates;
+            fitted = other.fitted;
+            
+            // Deep copy the samples tensor
+            if (other.samples.defined()) {
+                samples = other.samples.clone();
+            } else {
+                samples = torch::Tensor();
+            }
+            
+            // First, create all nodes (without relationships)
+            for (const auto& node : other.nodes) {
+                nodes[node.first] = std::make_unique<Node>(*node.second);
+            }
+            
+            // Second, reconstruct the relationships between nodes
+            for (const auto& node : other.nodes) {
+                const std::string& nodeName = node.first;
+                Node* originalNode = node.second.get();
+                Node* newNode = nodes[nodeName].get();
+                
+                // Reconstruct parent relationships
+                for (Node* parent : originalNode->getParents()) {
+                    const std::string& parentName = parent->getName();
+                    if (nodes.find(parentName) != nodes.end()) {
+                        newNode->addParent(nodes[parentName].get());
+                    }
+                }
+                
+                // Reconstruct child relationships
+                for (Node* child : originalNode->getChildren()) {
+                    const std::string& childName = child->getName();
+                    if (nodes.find(childName) != nodes.end()) {
+                        newNode->addChild(nodes[childName].get());
+                    }
+                }
+            }
+        }
+        return *this;
    }
    void Network::initialize()
    {
@@ -503,4 +579,41 @@ namespace bayesnet {
        }
        return oss.str();
    }
+    
+    bool Network::operator==(const Network& other) const
+    {
+        // Compare number of nodes
+        if (nodes.size() != other.nodes.size()) {
+            return false;
+        }
+        
+        // Compare if all node names exist in both networks
+        for (const auto& node : nodes) {
+            if (other.nodes.find(node.first) == other.nodes.end()) {
+                return false;
+            }
+        }
+        
+        // Compare edges (topology)
+        auto thisEdges = getEdges();
+        auto otherEdges = other.getEdges();
+        
+        // Compare number of edges
+        if (thisEdges.size() != otherEdges.size()) {
+            return false;
+        }
+        
+        // Sort both edge lists for comparison
+        std::sort(thisEdges.begin(), thisEdges.end());
+        std::sort(otherEdges.begin(), otherEdges.end());
+        
+        // Compare each edge
+        for (size_t i = 0; i < thisEdges.size(); ++i) {
+            if (thisEdges[i] != otherEdges[i]) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
 }
--- a/bayesnet/network/Network.h
+++ b/bayesnet/network/Network.h
@@ -17,7 +17,8 @@ namespace bayesnet {
    class Network {
    public:
        Network();
-        explicit Network(const Network&);
+        Network(const Network& other);
+        Network& operator=(const Network& other);
        ~Network() = default;
        torch::Tensor& getSamples();
        void addNode(const std::string&);
@@ -47,6 +48,7 @@ namespace bayesnet {
        void initialize();
        std::string dump_cpt() const;
        inline std::string version() { return  { project_version.begin(), project_version.end() }; }
+        bool operator==(const Network& other) const;
    private:
        std::map<std::string, std::unique_ptr<Node>> nodes;
        bool fitted;
--- a/bayesnet/network/Node.cc
+++ b/bayesnet/network/Node.cc
@@ -13,6 +13,41 @@ namespace bayesnet {
        : name(name)
    {
    }
+    
+    Node::Node(const Node& other)
+        : name(other.name), numStates(other.numStates), dimensions(other.dimensions)
+    {
+        // Deep copy the CPT tensor
+        if (other.cpTable.defined()) {
+            cpTable = other.cpTable.clone();
+        }
+        // Note: parent and children pointers are NOT copied here
+        // They will be reconstructed by the Network copy constructor
+        // to maintain proper object relationships
+    }
+    
+    Node& Node::operator=(const Node& other)
+    {
+        if (this != &other) {
+            name = other.name;
+            numStates = other.numStates;
+            dimensions = other.dimensions;
+            
+            // Deep copy the CPT tensor
+            if (other.cpTable.defined()) {
+                cpTable = other.cpTable.clone();
+            } else {
+                cpTable = torch::Tensor();
+            }
+            
+            // Clear existing relationships
+            parents.clear();
+            children.clear();
+            // Note: parent and children pointers are NOT copied here
+            // They must be reconstructed to maintain proper object relationships
+        }
+        return *this;
+    }
    void Node::clear()
    {
        parents.clear();
--- a/bayesnet/network/Node.h
+++ b/bayesnet/network/Node.h
@@ -14,6 +14,9 @@ namespace bayesnet {
    class Node {
    public:
        explicit Node(const std::string&);
+        Node(const Node& other);
+        Node& operator=(const Node& other);
+        ~Node() = default;
        void clear();
        void addParent(Node*);
        void addChild(Node*);
--- a/local_discretization_analysis.md
+++ b/local_discretization_analysis.md
@@ -0,0 +1,235 @@
+# Local Discretization Analysis - BayesNet Library
+
+## Overview
+
+This document analyzes the local discretization implementation in the BayesNet library, specifically focusing on the `Proposal.cc` implementation, and evaluates the feasibility of implementing an iterative discretization approach.
+
+## Current Local Discretization Implementation
+
+### Core Architecture
+
+The local discretization functionality is implemented through a **Proposal class** (`bayesnet/classifiers/Proposal.h`) that serves as a mixin/base class for creating "Ld" (Local Discretization) variants of existing classifiers.
+
+### Key Components
+
+#### 1. The Proposal Class
+- **Purpose**: Handles continuous data by applying local discretization using discretization algorithms
+- **Dependencies**: Uses the `fimdlp` library for discretization algorithms
+- **Supported Algorithms**:
+  - **MDLP** (Minimum Description Length Principle) - Default
+  - **BINQ** - Quantile-based binning
+  - **BINU** - Uniform binning
+
+#### 2. Local Discretization Variants
+
+The codebase implements Ld variants using multiple inheritance:
+
+**Individual Classifiers:**
+- `TANLd` - Tree Augmented Naive Bayes with Local Discretization
+- `KDBLd` - K-Dependence Bayesian with Local Discretization  
+- `SPODELd` - Super-Parent One-Dependence Estimator with Local Discretization
+
+**Ensemble Classifiers:**
+- `AODELd` - Averaged One-Dependence Estimator with Local Discretization
+
+### Implementation Pattern
+
+All Ld variants follow a consistent pattern using **multiple inheritance**:
+
+```cpp
+class TANLd : public TAN, public Proposal {
+    // Inherits from both the base classifier and Proposal
+};
+```
+
+### Two-Phase Discretization Process
+
+#### Phase 1: Initial Discretization (`fit_local_discretization`)
+- Each continuous feature is discretized independently using the chosen algorithm
+- Creates initial discrete dataset
+- Uses only class labels for discretization decisions
+
+#### Phase 2: Network-Aware Refinement (`localDiscretizationProposal`)
+- After building the initial Bayesian network structure
+- Features are re-discretized considering their parent nodes in the network
+- Uses topological ordering to ensure proper dependency handling
+- Creates more informed discretization boundaries based on network relationships
+
+### Hyperparameter Support
+
+The Proposal class supports several configurable hyperparameters:
+- `ld_algorithm`: Choice of discretization algorithm (MDLP, BINQ, BINU)
+- `ld_proposed_cuts`: Number of proposed cuts for discretization
+- `mdlp_min_length`: Minimum interval length for MDLP
+- `mdlp_max_depth`: Maximum depth for MDLP tree
+
+## Current Implementation Strengths
+
+1. **Sophisticated Approach**: Considers network structure in discretization decisions
+2. **Modular Design**: Clean separation through Proposal class mixin
+3. **Multiple Algorithm Support**: Flexible discretization strategies
+4. **Proper Dependency Handling**: Topological ordering ensures correct processing
+5. **Well-Integrated**: Seamless integration with existing classifier architecture
+
+## Areas for Improvement
+
+### Code Quality Issues
+
+1. **Dead Code**: Line 161 in `Proposal.cc` contains unused variable `allDigits`
+2. **Performance Issues**: 
+   - String concatenation in tight loop (lines 82-84) using `+=` operator
+   - Memory allocations could be optimized
+   - Tensor operations could be batched better
+3. **Error Handling**: Could be more robust with better exception handling
+
+### Algorithm Clarity
+
+1. **Logic Clarity**: The `upgrade` flag logic could be more descriptive
+2. **Variable Naming**: Some variables need more descriptive names
+3. **Documentation**: Better inline documentation of the two-phase process
+4. **Method Complexity**: `localDiscretizationProposal` method is quite long and complex
+
+### Suggested Code Improvements
+
+```cpp
+// Instead of string concatenation in loop:
+for (auto idx : indices) {
+    for (int i = 0; i < Xf.size(1); ++i) {
+        yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
+    }
+}
+
+// Consider using stringstream or pre-allocation:
+std::stringstream ss;
+for (auto idx : indices) {
+    for (int i = 0; i < Xf.size(1); ++i) {
+        ss << pDataset.index({ idx, i }).item<int>();
+        yJoinParents[i] = ss.str();
+        ss.str("");
+    }
+}
+```
+
+## Iterative Discretization Proposal
+
+### Concept
+
+Implement an iterative process: discretize → build model → re-discretize → rebuild model → repeat until convergence.
+
+### Feasibility Assessment
+
+**Highly Feasible** - The current implementation already provides a solid foundation with its two-phase approach, making extension straightforward.
+
+### Proposed Implementation Strategy
+
+```cpp
+class IterativeProposal : public Proposal {
+public:
+    struct ConvergenceParams {
+        int max_iterations = 10;
+        double tolerance = 1e-6;
+        bool check_network_structure = true;
+        bool check_discretization_stability = true;
+    };
+
+private:
+    map<string, vector<int>> iterativeLocalDiscretization(const torch::Tensor& y) {
+        auto states = fit_local_discretization(y);  // Initial discretization
+        Network previousModel, currentModel;
+        int iteration = 0;
+        
+        do {
+            previousModel = currentModel;
+            
+            // Build model with current discretization
+            const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
+            currentModel.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::ORIGINAL);
+            
+            // Apply local discretization based on current model
+            auto newStates = localDiscretizationProposal(states, currentModel);
+            
+            // Check for convergence
+            if (hasConverged(previousModel, currentModel, states, newStates)) {
+                break;
+            }
+            
+            states = newStates;
+            iteration++;
+            
+        } while (iteration < convergenceParams.max_iterations);
+        
+        return states;
+    }
+    
+    bool hasConverged(const Network& prev, const Network& curr, 
+                     const map<string, vector<int>>& oldStates,
+                     const map<string, vector<int>>& newStates) {
+        // Implementation of convergence criteria
+        return checkNetworkStructureConvergence(prev, curr) && 
+               checkDiscretizationStability(oldStates, newStates);
+    }
+};
+```
+
+### Convergence Criteria Options
+
+1. **Network Structure Comparison**: Compare edge sets between iterations
+   ```cpp
+   bool checkNetworkStructureConvergence(const Network& prev, const Network& curr) {
+       // Compare adjacency matrices or edge lists
+       return prev.getEdges() == curr.getEdges();
+   }
+   ```
+
+2. **Discretization Stability**: Check if cut points change significantly
+   ```cpp
+   bool checkDiscretizationStability(const map<string, vector<int>>& oldStates,
+                                    const map<string, vector<int>>& newStates) {
+       for (const auto& [feature, states] : oldStates) {
+           if (states != newStates.at(feature)) {
+               return false;
+           }
+       }
+       return true;
+   }
+   ```
+
+3. **Performance Metrics**: Monitor accuracy/likelihood convergence
+4. **Maximum Iterations**: Prevent infinite loops
+
+### Expected Benefits
+
+1. **Better Discretization Quality**: Each iteration refines boundaries based on learned dependencies
+2. **Improved Model Accuracy**: More informed discretization leads to better classification
+3. **Adaptive Process**: Automatically finds optimal discretization-model combination
+4. **Principled Approach**: Theoretically sound iterative refinement
+5. **Reduced Manual Tuning**: Less need for hyperparameter optimization
+
+### Implementation Considerations
+
+1. **Convergence Detection**: Need robust criteria to detect when to stop
+2. **Performance Impact**: Multiple iterations increase computational cost
+3. **Overfitting Prevention**: May need regularization to avoid over-discretization
+4. **Stability Guarantees**: Ensure the process doesn't oscillate indefinitely
+5. **Memory Management**: Handle multiple model instances efficiently
+
+### Integration Strategy
+
+1. **Backward Compatibility**: Keep existing two-phase approach as default
+2. **Optional Feature**: Add iterative mode as configurable option
+3. **Hyperparameter Extension**: Add convergence-related parameters
+4. **Testing Framework**: Comprehensive testing on standard datasets
+
+## Conclusion
+
+The current local discretization implementation in BayesNet is well-designed and functional, providing a solid foundation for the proposed iterative enhancement. The iterative approach would significantly improve the quality of discretization by creating a feedback loop between model structure and discretization decisions.
+
+The implementation is highly feasible given the existing architecture, and the expected benefits justify the additional computational complexity. The key to success will be implementing robust convergence criteria and maintaining the modularity of the current design.
+
+## Recommendations
+
+1. **Immediate Improvements**: Fix code quality issues and optimize performance bottlenecks
+2. **Iterative Implementation**: Develop the iterative approach as an optional enhancement
+3. **Comprehensive Testing**: Validate improvements on standard benchmark datasets
+4. **Documentation**: Enhance inline documentation and user guides
+5. **Performance Profiling**: Monitor computational overhead and optimize where necessary
--- a/tests/TestBayesNetwork.cc
+++ b/tests/TestBayesNetwork.cc
@@ -338,6 +338,188 @@ TEST_CASE("Test Bayesian Network", "[Network]")
        REQUIRE_THROWS_AS(net5.addEdge("A", "B"), std::logic_error);
        REQUIRE_THROWS_WITH(net5.addEdge("A", "B"), "Cannot add edge to a fitted network. Initialize first.");
    }
+    SECTION("Test assignment operator")
+    {
+        INFO("Test assignment operator");
+        // Create original network
+        auto net1 = bayesnet::Network();
+        buildModel(net1, raw.features, raw.className);
+        net1.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
+        
+        // Create empty network and assign
+        auto net2 = bayesnet::Network();
+        net2.addNode("TempNode"); // Add something to make sure it gets cleared
+        net2 = net1;
+        
+        // Verify they are equal
+        REQUIRE(net1.getFeatures() == net2.getFeatures());
+        REQUIRE(net1.getEdges() == net2.getEdges());
+        REQUIRE(net1.getNumEdges() == net2.getNumEdges());
+        REQUIRE(net1.getStates() == net2.getStates());
+        REQUIRE(net1.getClassName() == net2.getClassName());
+        REQUIRE(net1.getClassNumStates() == net2.getClassNumStates());
+        REQUIRE(net1.getSamples().size(0) == net2.getSamples().size(0));
+        REQUIRE(net1.getSamples().size(1) == net2.getSamples().size(1));
+        REQUIRE(net1.getNodes().size() == net2.getNodes().size());
+        
+        // Verify topology equality
+        REQUIRE(net1 == net2);
+        
+        // Verify they are separate objects by modifying one
+        net2.initialize();
+        net2.addNode("OnlyInNet2");
+        REQUIRE(net1.getNodes().size() != net2.getNodes().size());
+        REQUIRE_FALSE(net1 == net2);
+    }
+    SECTION("Test self assignment")
+    {
+        INFO("Test self assignment");
+        buildModel(net, raw.features, raw.className);
+        net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
+        
+        int original_edges = net.getNumEdges();
+        int original_nodes = net.getNodes().size();
+        
+        // Self assignment should not corrupt the network
+        net = net;
+        
+        REQUIRE(net.getNumEdges() == original_edges);
+        REQUIRE(net.getNodes().size() == original_nodes);
+        REQUIRE(net.getFeatures() == raw.features);
+        REQUIRE(net.getClassName() == raw.className);
+    }
+    SECTION("Test operator== topology comparison")
+    {
+        INFO("Test operator== topology comparison");
+        
+        // Test 1: Two identical networks
+        auto net1 = bayesnet::Network();
+        auto net2 = bayesnet::Network();
+        
+        net1.addNode("A");
+        net1.addNode("B");
+        net1.addNode("C");
+        net1.addEdge("A", "B");
+        net1.addEdge("B", "C");
+        
+        net2.addNode("A");
+        net2.addNode("B");
+        net2.addNode("C");
+        net2.addEdge("A", "B");
+        net2.addEdge("B", "C");
+        
+        REQUIRE(net1 == net2);
+        
+        // Test 2: Different nodes
+        auto net3 = bayesnet::Network();
+        net3.addNode("A");
+        net3.addNode("D"); // Different node
+        REQUIRE_FALSE(net1 == net3);
+        
+        // Test 3: Same nodes, different edges
+        auto net4 = bayesnet::Network();
+        net4.addNode("A");
+        net4.addNode("B");
+        net4.addNode("C");
+        net4.addEdge("A", "C"); // Different topology
+        net4.addEdge("B", "C");
+        REQUIRE_FALSE(net1 == net4);
+        
+        // Test 4: Empty networks
+        auto net5 = bayesnet::Network();
+        auto net6 = bayesnet::Network();
+        REQUIRE(net5 == net6);
+        
+        // Test 5: Same topology, different edge order
+        auto net7 = bayesnet::Network();
+        net7.addNode("A");
+        net7.addNode("B");
+        net7.addNode("C");
+        net7.addEdge("B", "C"); // Add edges in different order
+        net7.addEdge("A", "B");
+        REQUIRE(net1 == net7); // Should still be equal
+    }
+    SECTION("Test RAII compliance with smart pointers")
+    {
+        INFO("Test RAII compliance with smart pointers");
+        
+        std::unique_ptr<bayesnet::Network> net1 = std::make_unique<bayesnet::Network>();
+        buildModel(*net1, raw.features, raw.className);
+        net1->fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
+        
+        // Test that copy constructor works with smart pointers
+        std::unique_ptr<bayesnet::Network> net2 = std::make_unique<bayesnet::Network>(*net1);
+        
+        REQUIRE(*net1 == *net2);
+        REQUIRE(net1->getNumEdges() == net2->getNumEdges());
+        REQUIRE(net1->getNodes().size() == net2->getNodes().size());
+        
+        // Destroy original
+        net1.reset();
+        
+        // net2 should still be valid and functional
+        REQUIRE_NOTHROW(net2->addNode("NewNode"));
+        REQUIRE(net2->getNodes().count("NewNode") == 1);
+        
+        // Test predictions still work
+        std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1} };
+        REQUIRE_NOTHROW(net2->predict(test));
+    }
+    SECTION("Test complex topology copy")
+    {
+        INFO("Test complex topology copy");
+        
+        auto original = bayesnet::Network();
+        
+        // Create a more complex network
+        original.addNode("Root");
+        original.addNode("Child1");
+        original.addNode("Child2");
+        original.addNode("Grandchild1");
+        original.addNode("Grandchild2");
+        original.addNode("Grandchild3");
+        
+        original.addEdge("Root", "Child1");
+        original.addEdge("Root", "Child2");
+        original.addEdge("Child1", "Grandchild1");
+        original.addEdge("Child1", "Grandchild2");
+        original.addEdge("Child2", "Grandchild3");
+        
+        // Copy it
+        auto copy = original;
+        
+        // Verify topology is identical
+        REQUIRE(original == copy);
+        REQUIRE(original.getNodes().size() == copy.getNodes().size());
+        REQUIRE(original.getNumEdges() == copy.getNumEdges());
+        
+        // Verify edges are properly reconstructed
+        auto originalEdges = original.getEdges();
+        auto copyEdges = copy.getEdges();
+        REQUIRE(originalEdges.size() == copyEdges.size());
+        
+        // Verify node relationships are properly copied
+        for (const auto& nodePair : original.getNodes()) {
+            const std::string& nodeName = nodePair.first;
+            auto* originalNode = nodePair.second.get();
+            auto* copyNode = copy.getNodes().at(nodeName).get();
+            
+            REQUIRE(originalNode->getParents().size() == copyNode->getParents().size());
+            REQUIRE(originalNode->getChildren().size() == copyNode->getChildren().size());
+            
+            // Verify parent names match
+            for (size_t i = 0; i < originalNode->getParents().size(); ++i) {
+                REQUIRE(originalNode->getParents()[i]->getName() == 
+                       copyNode->getParents()[i]->getName());
+            }
+            
+            // Verify child names match
+            for (size_t i = 0; i < originalNode->getChildren().size(); ++i) {
+                REQUIRE(originalNode->getChildren()[i]->getName() == 
+                       copyNode->getChildren()[i]->getName());
+            }
+        }
+    }

 }
 TEST_CASE("Test and empty Node", "[Network]")
Author	SHA1	Message	Date
Ricardo Montañana Gómez	0ce7f664b4	remove unneeded files	2025-07-07 00:38:00 +02:00
Ricardo Montañana Gómez	62fa85a1b3	Complete proposal	2025-07-07 00:37:16 +02:00
Ricardo Montañana Gómez	97894cc49c	First approach with derived class	2025-07-06 18:49:05 +02:00
Ricardo Montañana Gómez	090172c6c5	Add Claude local discretization analysis	2025-07-04 12:19:58 +02:00
Ricardo Montañana Gómez	3048244a27	Add cache clean to conan-clean	2025-07-04 11:56:55 +02:00
Ricardo Montañana Gómez	c142ff2c4a	Compact Makefile and remove unneeded in CMakeLists	2025-07-03 09:55:05 +02:00
Ricardo Montañana	a5841000d3	Change optimization flag in Release	2025-07-02 13:56:54 +02:00
Ricardo Montañana Gómez	e7e80cfa9c	Update CHANGELOG	2025-07-02 00:52:53 +02:00
Ricardo Montañana Gómez	1d58cea276	Add build_type option to sample target in Makefile	2025-07-02 00:51:31 +02:00
Ricardo Montañana Gómez	189d314990	Fix Conan debug build Fix smell issues in markdown and python	2025-07-02 00:44:24 +02:00
Ricardo Montañana	28be43db02	Update sample target in Makefile	2025-07-01 18:42:20 +02:00
Ricardo Montañana	55a24fbaf0	Update optimization flag	2025-07-01 16:49:04 +02:00
Ricardo Montañana Gómez	3b170324f4	Merge pull request 'conan' (#38 ) from conan into main Reviewed-on: #38	2025-07-01 14:33:50 +00:00