BayesNet/Proposal.cc.gcov.html at 22b742f0685e13e19706c3217aaec44820747995

Files

Add some tests to reach 99%

2024-05-06 17:56:00 +02:00

20 KiB

Raw Blame History

LCOV - code coverage report

Current view:	top level - bayesnet/classifiers - Proposal.cc (source / functions)		Coverage	Total	Hit
Test:	BayesNet Coverage Report	Lines:	97.7 %	86	84
Test Date:	2024-05-06 17:54:04	Functions:	100.0 %	8	8
Legend:	Lines: hit not hit

            Line data    Source code

       1              : // ***************************************************************
       2              : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
       3              : // SPDX-FileType: SOURCE
       4              : // SPDX-License-Identifier: MIT
       5              : // ***************************************************************
       6              : 
       7              : #include <ArffFiles.h>
       8              : #include "Proposal.h"
       9              : 
      10              : namespace bayesnet {
      11          424 :     Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
      12          200 :     Proposal::~Proposal()
      13              :     {
      14         1896 :         for (auto& [key, value] : discretizers) {
      15         1696 :             delete value;
      16              :         }
      17          200 :     }
      18          228 :     void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
      19              :     {
      20          228 :         if (!torch::is_floating_point(X)) {
      21            0 :             throw std::invalid_argument("X must be a floating point tensor");
      22              :         }
      23          228 :         if (torch::is_floating_point(y)) {
      24            0 :             throw std::invalid_argument("y must be an integer tensor");
      25              :         }
      26          228 :     }
      27          212 :     map<std::string, std::vector<int>> Proposal::localDiscretizationProposal(const map<std::string, std::vector<int>>& oldStates, Network& model)
      28              :     {
      29              :         // order of local discretization is important. no good 0, 1, 2...
      30              :         // although we rediscretize features after the local discretization of every feature
      31          212 :         auto order = model.topological_sort();
      32          212 :         auto& nodes = model.getNodes();
      33          212 :         map<std::string, std::vector<int>> states = oldStates;
      34          212 :         std::vector<int> indicesToReDiscretize;
      35          212 :         bool upgrade = false; // Flag to check if we need to upgrade the model
      36         1776 :         for (auto feature : order) {
      37         1564 :             auto nodeParents = nodes[feature]->getParents();
      38         1564 :             if (nodeParents.size() < 2) continue; // Only has class as parent
      39         1324 :             upgrade = true;
      40         1324 :             int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin();
      41         1324 :             indicesToReDiscretize.push_back(index); // We need to re-discretize this feature
      42         1324 :             std::vector<std::string> parents;
      43         4020 :             transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); });
      44              :             // Remove class as parent as it will be added later
      45         1324 :             parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end());
      46              :             // Get the indices of the parents
      47         1324 :             std::vector<int> indices;
      48         1324 :             indices.push_back(-1); // Add class index
      49         2696 :             transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); });
      50              :             // Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y)
      51         1324 :             std::vector<std::string> yJoinParents(Xf.size(1));
      52         4020 :             for (auto idx : indices) {
      53       958640 :                 for (int i = 0; i < Xf.size(1); ++i) {
      54      2867832 :                     yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
      55              :                 }
      56              :             }
      57         1324 :             auto arff = ArffFiles();
      58         1324 :             auto yxv = arff.factorize(yJoinParents);
      59         2648 :             auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
      60         1324 :             auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
      61         1324 :             discretizers[feature]->fit(xvf, yxv);
      62         1804 :         }
      63          212 :         if (upgrade) {
      64              :             // Discretize again X (only the affected indices) with the new fitted discretizers
      65         1536 :             for (auto index : indicesToReDiscretize) {
      66         2648 :                 auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
      67         1324 :                 auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
      68         5296 :                 pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt)));
      69         1324 :                 auto xStates = std::vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
      70         1324 :                 iota(xStates.begin(), xStates.end(), 0);
      71              :                 //Update new states of the feature/node
      72         1324 :                 states[pFeatures[index]] = xStates;
      73         1324 :             }
      74          212 :             const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
      75          212 :             model.fit(pDataset, weights, pFeatures, pClassName, states);
      76          212 :         }
      77          424 :         return states;
      78       960128 :     }
      79          232 :     map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
      80              :     {
      81              :         // Discretize the continuous input data and build pDataset (Classifier::dataset)
      82          232 :         int m = Xf.size(1);
      83          232 :         int n = Xf.size(0);
      84          232 :         map<std::string, std::vector<int>> states;
      85          232 :         pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
      86          232 :         auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
      87              :         // discretize input data by feature(row)
      88         1944 :         for (auto i = 0; i < pFeatures.size(); ++i) {
      89         1712 :             auto* discretizer = new mdlp::CPPFImdlp();
      90         3424 :             auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
      91         1712 :             auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
      92         1712 :             discretizer->fit(Xt, yv);
      93         6848 :             pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
      94         1712 :             auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
      95         1712 :             iota(xStates.begin(), xStates.end(), 0);
      96         1712 :             states[pFeatures[i]] = xStates;
      97         1712 :             discretizers[pFeatures[i]] = discretizer;
      98         1712 :         }
      99          232 :         int n_classes = torch::max(y).item<int>() + 1;
     100          232 :         auto yStates = std::vector<int>(n_classes);
     101          232 :         iota(yStates.begin(), yStates.end(), 0);
     102          232 :         states[pClassName] = yStates;
     103          696 :         pDataset.index_put_({ n, "..." }, y);
     104          464 :         return states;
     105         3888 :     }
     106          168 :     torch::Tensor Proposal::prepareX(torch::Tensor& X)
     107              :     {
     108          168 :         auto Xtd = torch::zeros_like(X, torch::kInt32);
     109         1376 :         for (int i = 0; i < X.size(0); ++i) {
     110         1208 :             auto Xt = std::vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
     111         1208 :             auto Xd = discretizers[pFeatures[i]]->transform(Xt);
     112         3624 :             Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
     113         1208 :         }
     114          336 :         return Xtd;
     115         1376 :     }
     116              : }

Generated by: LCOV version 2.0-1

</html>

20 KiB Raw Blame History

20 KiB

Raw Blame History