From dd98cf159de61df5c03a4e3a46f09bd4463fa326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 13 Feb 2025 01:17:37 +0100 Subject: [PATCH] ComputeCPT Optimization --- bayesnet/network/Node.cc | 42 +++++++++++++++++++++++----------------- sample/CMakeLists.txt | 2 +- sample/sample.cc | 16 ++++++++++++++- 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/bayesnet/network/Node.cc b/bayesnet/network/Node.cc index b62e275..1b2381f 100644 --- a/bayesnet/network/Node.cc +++ b/bayesnet/network/Node.cc @@ -93,36 +93,42 @@ namespace bayesnet { void Node::computeCPT(const torch::Tensor& dataset, const std::vector& features, const double smoothing, const torch::Tensor& weights) { dimensions.clear(); + dimensions.reserve(parents.size() + 1); // Get dimensions of the CPT dimensions.push_back(numStates); - transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); }); - // Create a tensor of zeros with the dimensions of the CPT - cpTable = torch::zeros(dimensions, torch::kDouble) + smoothing; - // Fill table with counts - auto pos = find(features.begin(), features.end(), name); - if (pos == features.end()) { - throw std::logic_error("Feature " + name + " not found in dataset"); + for (const auto& parent : parents) { + dimensions.push_back(parent->getNumStates()); + } + //transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); }); + // Create a tensor initialized with smoothing + cpTable = torch::full(dimensions, smoothing, torch::kDouble); + // Create a map for quick feature index lookup + std::unordered_map featureIndexMap; + for (size_t i = 0; i < features.size(); ++i) { + featureIndexMap[features[i]] = i; + } + // Fill table with counts + // Get the index of this node's feature + int name_index = featureIndexMap[name]; + // Get parent indices in dataset + std::vector parent_indices; + parent_indices.reserve(parents.size()); + for (const auto& parent : parents) { + parent_indices.push_back(featureIndexMap[parent->getName()]); } - int name_index = pos - features.begin(); c10::List> coordinates; for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) { coordinates.clear(); auto sample = dataset.index({ "...", n_sample }); coordinates.push_back(sample[name_index]); - for (auto parent : parents) { - pos = find(features.begin(), features.end(), parent->getName()); - if (pos == features.end()) { - throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset"); - } - int parent_index = pos - features.begin(); - coordinates.push_back(sample[parent_index]); + for (size_t i = 0; i < parent_indices.size(); ++i) { + coordinates.push_back(sample[parent_indices[i]]); } // Increment the count of the corresponding coordinate cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true); } - // Normalize the counts - // Divide each row by the sum of the row - cpTable = cpTable / cpTable.sum(0); + // Normalize the counts (dividing each row by the sum of the row) + cpTable /= cpTable.sum(0, true); } double Node::getFactorValue(std::map& evidence) { diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 3799e89..fbcfdcc 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -18,7 +18,7 @@ include_directories( ../tests/lib/Files lib/json/include /usr/local/include - ${FImdlp_INCLUDE_DIRS} + /usr/local/include/fimdlp/ ) add_executable(bayesnet_sample sample.cc) diff --git a/sample/sample.cc b/sample/sample.cc index 478ff85..421381d 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -60,7 +60,21 @@ int main(int argc, char* argv[]) auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict std::cout << "Library version: " << clf.getVersion() << std::endl; tie(X, y, features, className, states) = loadDataset(file_name, true); - clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE); + torch::Tensor weights = torch::full({ X.size(1) }, 15, torch::kDouble); + torch::Tensor dataset; + try { + auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); + dataset = torch::cat({ X, yresized }, 0); + } + catch (const std::exception& e) { + std::stringstream oss; + oss << "* Error in X and y dimensions *\n"; + oss << "X dimensions: " << dataset.sizes() << "\n"; + oss << "y dimensions: " << y.sizes(); + throw std::runtime_error(oss.str()); + } + //Classifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override; + clf.fit(dataset, features, className, states, weights, bayesnet::Smoothing_t::LAPLACE); auto score = clf.score(X, y); std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl; return 0;