Update CHANGELOG

ComputeCPT Optimization
2025-02-13 01:18:43 +01:00 · 2025-02-13 01:17:37 +01:00
4 changed files with 44 additions and 20 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add a new hyperparameter to the SPODE class, *parent*, to set the root node of the model. If no value is set the root parameter of the constructor is used.
 - Add a new hyperparameter to the TAN class, *parent*, to set the root node of the model. If not set the first feature is used as root.
 ### Internal
 - Optimize ComputeCPT method in the Node class.
 ### Changed
 - Hyperparameter *maxTolerance* in the BoostAODE class is now in [1, 6] range (it was in [1, 4] range before).
--- a/bayesnet/network/Node.cc
+++ b/bayesnet/network/Node.cc
@@ -93,36 +93,42 @@ namespace bayesnet {
    void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
    {
        dimensions.clear();
        dimensions.reserve(parents.size() + 1);
        // Get dimensions of the CPT
        dimensions.push_back(numStates);
-        transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
+        for (const auto& parent : parents) {
-        // Create a tensor of zeros with the dimensions of the CPT
+            dimensions.push_back(parent->getNumStates());
-        cpTable = torch::zeros(dimensions, torch::kDouble) + smoothing;
+        }
-        // Fill table with counts
+        //transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
-        auto pos = find(features.begin(), features.end(), name);
+        // Create a tensor initialized with smoothing
-        if (pos == features.end()) {
+        cpTable = torch::full(dimensions, smoothing, torch::kDouble);
-            throw std::logic_error("Feature " + name + " not found in dataset");
+        // Create a map for quick feature index lookup
        std::unordered_map<std::string, int> featureIndexMap;
        for (size_t i = 0; i < features.size(); ++i) {
            featureIndexMap[features[i]] = i;
        }
        // Fill table with counts
        // Get the index of this node's feature
        int name_index = featureIndexMap[name];
        // Get parent indices in dataset
        std::vector<int> parent_indices;
        parent_indices.reserve(parents.size());
        for (const auto& parent : parents) {
            parent_indices.push_back(featureIndexMap[parent->getName()]);
        }
        int name_index = pos - features.begin();
        c10::List<c10::optional<at::Tensor>> coordinates;
        for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
            coordinates.clear();
            auto sample = dataset.index({ "...", n_sample });
            coordinates.push_back(sample[name_index]);
-            for (auto parent : parents) {
+            for (size_t i = 0; i < parent_indices.size(); ++i) {
-                pos = find(features.begin(), features.end(), parent->getName());
+                coordinates.push_back(sample[parent_indices[i]]);
                if (pos == features.end()) {
                    throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
                }
                int parent_index = pos - features.begin();
                coordinates.push_back(sample[parent_index]);
            }
            // Increment the count of the corresponding coordinate
            cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
        }
-        // Normalize the counts
+        // Normalize the counts (dividing each row by the sum of the row)
-        // Divide each row by the sum of the row
+        cpTable /= cpTable.sum(0, true);
        cpTable = cpTable / cpTable.sum(0);
    }
    double Node::getFactorValue(std::map<std::string, int>& evidence)
    {
--- a/sample/CMakeLists.txt
+++ b/sample/CMakeLists.txt
@@ -18,7 +18,7 @@ include_directories(
    ../tests/lib/Files
    lib/json/include
    /usr/local/include
-    ${FImdlp_INCLUDE_DIRS}
+    /usr/local/include/fimdlp/
 )
 add_executable(bayesnet_sample sample.cc) 
--- a/sample/sample.cc
+++ b/sample/sample.cc
@@ -60,7 +60,21 @@ int main(int argc, char* argv[])
    auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
    std::cout << "Library version: " << clf.getVersion() << std::endl;
    tie(X, y, features, className, states) = loadDataset(file_name, true);
-    clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE);
+    torch::Tensor weights = torch::full({ X.size(1) }, 15, torch::kDouble);
    torch::Tensor dataset;
    try {
        auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
        dataset = torch::cat({ X, yresized }, 0);
    }
    catch (const std::exception& e) {
        std::stringstream oss;
        oss << "* Error in X and y dimensions *\n";
        oss << "X dimensions: " << dataset.sizes() << "\n";
        oss << "y dimensions: " << y.sizes();
        throw std::runtime_error(oss.str());
    }
    //Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override;
    clf.fit(dataset, features, className, states, weights, bayesnet::Smoothing_t::LAPLACE);
    auto score = clf.score(X, y);
    std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
    return 0;
Author	SHA1	Message	Date
Ricardo Montañana Gómez	81fd7df7f0	Update CHANGELOG	2025-02-13 01:18:43 +01:00
Ricardo Montañana Gómez	dd98cf159d	ComputeCPT Optimization	2025-02-13 01:17:37 +01:00