diff --git a/CHANGELOG.md b/CHANGELOG.md index 24e4b89..1fcd2f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,14 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [1.1.1] - 2025-05-08 +## [1.1.1] - 2025-05-20 ### Internal - Fix the vcpkg configuration in building the library. - Fix the sample app to use the vcpkg configuration. - Add predict_proba method to all Ld classifiers. -- Optimize the computeCPT method in the Node class with libtorch vectorized operations and remove the for loop. +- Refactor the computeCPT method in the Node class with libtorch vectorized operations. +- Refactor the sample to use local discretization models. ## [1.1.0] - 2025-04-27 diff --git a/Makefile b/Makefile index f4c0292..2f21973 100644 --- a/Makefile +++ b/Makefile @@ -111,12 +111,13 @@ release: ## Build a Release version of the project @echo ">>> Done"; fname = "tests/data/iris.arff" +model = "TANLd" sample: ## Build sample @echo ">>> Building Sample..."; @if [ -d ./sample/build ]; then rm -rf ./sample/build; fi @cd sample && cmake -B build -S . -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake && \ cmake --build build -t bayesnet_sample - sample/build/bayesnet_sample $(fname) + sample/build/bayesnet_sample $(fname) $(model) @echo ">>> Done"; fname = "tests/data/iris.arff" diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index 651d3c2..1029247 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -23,6 +23,7 @@ namespace bayesnet { throw std::invalid_argument("y must be an integer tensor"); } } + // Fit method for single classifier map> Proposal::localDiscretizationProposal(const map>& oldStates, Network& model) { // order of local discretization is important. no good 0, 1, 2... diff --git a/bayesnet/classifiers/SPODELd.cc b/bayesnet/classifiers/SPODELd.cc index a912261..c68b7d9 100644 --- a/bayesnet/classifiers/SPODELd.cc +++ b/bayesnet/classifiers/SPODELd.cc @@ -45,7 +45,6 @@ namespace bayesnet { } torch::Tensor SPODELd::predict_proba(torch::Tensor& X) { - std::cout << "Debug: SPODELd::predict_proba" << std::endl; auto Xt = prepareX(X); return SPODE::predict_proba(Xt); } diff --git a/bayesnet/network/Node.cc b/bayesnet/network/Node.cc index a66fc8a..b94b142 100644 --- a/bayesnet/network/Node.cc +++ b/bayesnet/network/Node.cc @@ -5,6 +5,7 @@ // *************************************************************** #include "Node.h" +#include namespace bayesnet { @@ -94,43 +95,34 @@ namespace bayesnet { { dimensions.clear(); dimensions.reserve(parents.size() + 1); - // Get dimensions of the CPT dimensions.push_back(numStates); for (const auto& parent : parents) { dimensions.push_back(parent->getNumStates()); } - // Create a tensor initialized with smoothing cpTable = torch::full(dimensions, smoothing, torch::kDouble); - // Create a map for quick feature index lookup - std::unordered_map cachedFeatureIndexMap; - bool featureIndexMapReady = false; - // Build featureIndexMap if not ready - if (!featureIndexMapReady) { - cachedFeatureIndexMap.clear(); - for (size_t i = 0; i < features.size(); ++i) { - cachedFeatureIndexMap[features[i]] = i; - } - featureIndexMapReady = true; + + // Build feature index map + std::unordered_map featureIndexMap; + for (size_t i = 0; i < features.size(); ++i) { + featureIndexMap[features[i]] = i; } - const auto& featureIndexMap = cachedFeatureIndexMap; + // Gather indices for node and parents std::vector all_indices; - all_indices.push_back(featureIndexMap.at(name)); + all_indices.push_back(featureIndexMap[name]); for (const auto& parent : parents) { - all_indices.push_back(featureIndexMap.at(parent->getName())); + all_indices.push_back(featureIndexMap[parent->getName()]); } + // Extract relevant columns: shape (num_features, num_samples) auto indices_tensor = dataset.index_select(0, torch::tensor(all_indices, torch::kLong)); - // Transpose to (num_samples, num_features) - indices_tensor = indices_tensor.transpose(0, 1).to(torch::kLong); - // Flatten CPT for easier indexing - auto flat_cpt = cpTable.flatten(); - // Compute strides for flattening multi-dim indices + indices_tensor = indices_tensor.transpose(0, 1).to(torch::kLong); // (num_samples, num_features) + + // Manual flattening of indices std::vector strides(all_indices.size(), 1); for (int i = strides.size() - 2; i >= 0; --i) { strides[i] = strides[i + 1] * cpTable.size(i + 1); } - // Compute flat indices for each sample auto indices_tensor_cpu = indices_tensor.cpu(); auto indices_accessor = indices_tensor_cpu.accessor(); std::vector flat_indices(indices_tensor.size(0)); @@ -141,13 +133,15 @@ namespace bayesnet { } flat_indices[i] = idx; } + // Accumulate weights into flat CPT + auto flat_cpt = cpTable.flatten(); auto flat_indices_tensor = torch::from_blob(flat_indices.data(), { (int64_t)flat_indices.size() }, torch::kLong).clone(); flat_cpt.index_add_(0, flat_indices_tensor, weights.cpu()); cpTable = flat_cpt.view(cpTable.sizes()); + // Normalize the counts (dividing each row by the sum of the row) cpTable /= cpTable.sum(0, true); - return; } double Node::getFactorValue(std::map& evidence) { diff --git a/sample/sample.cc b/sample/sample.cc index 27d520c..96f60dc 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -69,8 +69,8 @@ std::tuple, std::string> int main(int argc, char* argv[]) { - if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " " << std::endl; + if (argc < 3) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; return 1; } std::string file_name = argv[1]; @@ -79,6 +79,11 @@ int main(int argc, char* argv[]) }; if (models.find(model_name) == models.end()) { std::cerr << "Model not found: " << model_name << std::endl; + std::cerr << "Available models: "; + for (const auto& model : models) { + std::cerr << model.first << " "; + } + std::cerr << std::endl; return 1; } auto clf = models[model_name];