From c568ba111df24af21734ebc9967a80b2622324f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Fri, 4 Aug 2023 13:05:12 +0200 Subject: [PATCH] Add Proposal class --- src/BayesNet/Proposal.cc | 75 ++++++++++++++++++++++++++++++++++++++++ src/BayesNet/Proposal.h | 25 ++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 src/BayesNet/Proposal.cc create mode 100644 src/BayesNet/Proposal.h diff --git a/src/BayesNet/Proposal.cc b/src/BayesNet/Proposal.cc new file mode 100644 index 0000000..8e18a2b --- /dev/null +++ b/src/BayesNet/Proposal.cc @@ -0,0 +1,75 @@ +#include "Proposal.h" +#include "ArffFiles.h" + +namespace bayesnet { + Proposal::Proposal(vector>& Xv_, vector& yv_) : Xv(Xv_), yv(yv_) {} + void Proposal::localDiscretizationProposal(Network& model, vector& features, string className, map>& states) + { + // order of local discretization is important. no good 0, 1, 2... + auto order = model.topological_sort(); + auto& nodes = model.getNodes(); + vector indicesToReDiscretize; + auto n_samples = Xf.size(1); + bool upgrade = false; // Flag to check if we need to upgrade the model + for (auto feature : order) { + auto nodeParents = nodes[feature]->getParents(); + int index = find(features.begin(), features.end(), feature) - features.begin(); + vector parents; + transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) {return p->getName(); }); + if (parents.size() == 1) continue; // Only has class as parent + upgrade = true; + // Remove class as parent as it will be added later + parents.erase(remove(parents.begin(), parents.end(), className), parents.end()); + // Get the indices of the parents + vector indices; + transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(features.begin(), features.end(), p) - features.begin(); }); + // Now we fit the discretizer of the feature conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y) + vector yJoinParents; + transform(yv.begin(), yv.end(), back_inserter(yJoinParents), [&](const auto& p) {return to_string(p); }); + for (auto idx : indices) { + for (int i = 0; i < n_samples; ++i) { + yJoinParents[i] += to_string(Xv[idx][i]); + } + } + auto arff = ArffFiles(); + auto yxv = arff.factorize(yJoinParents); + auto xvf_ptr = Xf.index({ index }).data_ptr(); + auto xvf = vector(xvf_ptr, xvf_ptr + Xf.size(1)); + discretizers[feature]->fit(xvf, yxv); + indicesToReDiscretize.push_back(index); + } + if (upgrade) { + // Discretize again X (only the affected indices) with the new fitted discretizers + for (auto index : indicesToReDiscretize) { + auto Xt_ptr = Xf.index({ index }).data_ptr(); + auto Xt = vector(Xt_ptr, Xt_ptr + Xf.size(1)); + Xv[index] = discretizers[features[index]]->transform(Xt); + auto xStates = vector(discretizers[features[index]]->getCutPoints().size() + 1); + iota(xStates.begin(), xStates.end(), 0); + states[features[index]] = xStates; + } + + } + } + void Proposal::fit_local_discretization(vector& features, string className, map>& states, torch::Tensor& y) + { + Xv = vector>(); + yv = vector(y.data_ptr(), y.data_ptr() + y.size(0)); + // discretize input data by feature(row) + for (int i = 0; i < features.size(); ++i) { + auto* discretizer = new mdlp::CPPFImdlp(); + auto Xt_ptr = Xf.index({ i }).data_ptr(); + auto Xt = vector(Xt_ptr, Xt_ptr + Xf.size(1)); + discretizer->fit(Xt, yv); + Xv.push_back(discretizer->transform(Xt)); + auto xStates = vector(discretizer->getCutPoints().size() + 1); + iota(xStates.begin(), xStates.end(), 0); + states[features[i]] = xStates; + discretizers[features[i]] = discretizer; + } + int n_classes = torch::max(y).item() + 1; + auto yStates = vector(n_classes); + iota(yStates.begin(), yStates.end(), 0); + states[className] = yStates; + } +} \ No newline at end of file diff --git a/src/BayesNet/Proposal.h b/src/BayesNet/Proposal.h new file mode 100644 index 0000000..f92d9d2 --- /dev/null +++ b/src/BayesNet/Proposal.h @@ -0,0 +1,25 @@ +#ifndef PROPOSAL_H +#define PROPOSAL_H +#include +#include +#include +#include "Network.h" +#include "CPPFImdlp.h" + +namespace bayesnet { + class Proposal { + public: + Proposal(vector>& Xv_, vector& yv_); + virtual ~Proposal() = default; + protected: + void localDiscretizationProposal(Network& model, vector& features, string className, map>& states); + void fit_local_discretization(vector& features, string className, map>& states, torch::Tensor& y); + torch::Tensor Xf; // X continuous nxm tensor + map discretizers; + private: + vector>& Xv; // X discrete nxm vector + vector& yv; + }; +} + +#endif \ No newline at end of file