20 KiB
20 KiB
<html lang="en">
<head>
</head>
</html>
LCOV - code coverage report | ||||||||||||||||||||||
![]() | ||||||||||||||||||||||
|
||||||||||||||||||||||
![]() |
Line data Source code 1 : // *************************************************************** 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez 3 : // SPDX-FileType: SOURCE 4 : // SPDX-License-Identifier: MIT 5 : // *************************************************************** 6 : 7 : #include <ArffFiles.h> 8 : #include "Proposal.h" 9 : 10 : namespace bayesnet { 11 1166 : Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {} 12 550 : Proposal::~Proposal() 13 : { 14 5214 : for (auto& [key, value] : discretizers) { 15 4664 : delete value; 16 : } 17 550 : } 18 627 : void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y) 19 : { 20 627 : if (!torch::is_floating_point(X)) { 21 0 : throw std::invalid_argument("X must be a floating point tensor"); 22 : } 23 627 : if (torch::is_floating_point(y)) { 24 0 : throw std::invalid_argument("y must be an integer tensor"); 25 : } 26 627 : } 27 583 : map<std::string, std::vector<int>> Proposal::localDiscretizationProposal(const map<std::string, std::vector<int>>& oldStates, Network& model) 28 : { 29 : // order of local discretization is important. no good 0, 1, 2... 30 : // although we rediscretize features after the local discretization of every feature 31 583 : auto order = model.topological_sort(); 32 583 : auto& nodes = model.getNodes(); 33 583 : map<std::string, std::vector<int>> states = oldStates; 34 583 : std::vector<int> indicesToReDiscretize; 35 583 : bool upgrade = false; // Flag to check if we need to upgrade the model 36 4884 : for (auto feature : order) { 37 4301 : auto nodeParents = nodes[feature]->getParents(); 38 4301 : if (nodeParents.size() < 2) continue; // Only has class as parent 39 3641 : upgrade = true; 40 3641 : int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin(); 41 3641 : indicesToReDiscretize.push_back(index); // We need to re-discretize this feature 42 3641 : std::vector<std::string> parents; 43 11055 : transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); }); 44 : // Remove class as parent as it will be added later 45 3641 : parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end()); 46 : // Get the indices of the parents 47 3641 : std::vector<int> indices; 48 3641 : indices.push_back(-1); // Add class index 49 7414 : transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); }); 50 : // Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y) 51 3641 : std::vector<std::string> yJoinParents(Xf.size(1)); 52 11055 : for (auto idx : indices) { 53 2636260 : for (int i = 0; i < Xf.size(1); ++i) { 54 7886538 : yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>()); 55 : } 56 : } 57 3641 : auto arff = ArffFiles(); 58 3641 : auto yxv = arff.factorize(yJoinParents); 59 7282 : auto xvf_ptr = Xf.index({ index }).data_ptr<float>(); 60 3641 : auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1)); 61 3641 : discretizers[feature]->fit(xvf, yxv); 62 4961 : } 63 583 : if (upgrade) { 64 : // Discretize again X (only the affected indices) with the new fitted discretizers 65 4224 : for (auto index : indicesToReDiscretize) { 66 7282 : auto Xt_ptr = Xf.index({ index }).data_ptr<float>(); 67 3641 : auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1)); 68 14564 : pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt))); 69 3641 : auto xStates = std::vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1); 70 3641 : iota(xStates.begin(), xStates.end(), 0); 71 : //Update new states of the feature/node 72 3641 : states[pFeatures[index]] = xStates; 73 3641 : } 74 583 : const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble); 75 583 : model.fit(pDataset, weights, pFeatures, pClassName, states); 76 583 : } 77 1166 : return states; 78 2640352 : } 79 638 : map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y) 80 : { 81 : // Discretize the continuous input data and build pDataset (Classifier::dataset) 82 638 : int m = Xf.size(1); 83 638 : int n = Xf.size(0); 84 638 : map<std::string, std::vector<int>> states; 85 638 : pDataset = torch::zeros({ n + 1, m }, torch::kInt32); 86 638 : auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0)); 87 : // discretize input data by feature(row) 88 5346 : for (auto i = 0; i < pFeatures.size(); ++i) { 89 4708 : auto* discretizer = new mdlp::CPPFImdlp(); 90 9416 : auto Xt_ptr = Xf.index({ i }).data_ptr<float>(); 91 4708 : auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1)); 92 4708 : discretizer->fit(Xt, yv); 93 18832 : pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt))); 94 4708 : auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1); 95 4708 : iota(xStates.begin(), xStates.end(), 0); 96 4708 : states[pFeatures[i]] = xStates; 97 4708 : discretizers[pFeatures[i]] = discretizer; 98 4708 : } 99 638 : int n_classes = torch::max(y).item<int>() + 1; 100 638 : auto yStates = std::vector<int>(n_classes); 101 638 : iota(yStates.begin(), yStates.end(), 0); 102 638 : states[pClassName] = yStates; 103 1914 : pDataset.index_put_({ n, "..." }, y); 104 1276 : return states; 105 10692 : } 106 462 : torch::Tensor Proposal::prepareX(torch::Tensor& X) 107 : { 108 462 : auto Xtd = torch::zeros_like(X, torch::kInt32); 109 3784 : for (int i = 0; i < X.size(0); ++i) { 110 3322 : auto Xt = std::vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1)); 111 3322 : auto Xd = discretizers[pFeatures[i]]->transform(Xt); 112 9966 : Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32)); 113 3322 : } 114 462 : return Xtd; 115 3322 : } 116 : } |
![]() |
Generated by: LCOV version 2.0-1 |
</html>