Restart proposal

2025-08-21 19:20:03 +02:00
parent 1aa3b609e5
commit 74b391907a
1 changed files with 14 additions and 20 deletions
--- a/bayesnet/classifiers/Proposal.cc
+++ b/bayesnet/classifiers/Proposal.cc
@@ -118,21 +118,17 @@ namespace bayesnet {
        }
        return states;
    }
-    map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y, map<std::string, std::vector<int>> states)
+    map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y, map<std::string, std::vector<int>> states_)
    {
        // Discretize the continuous input data and build pDataset (Classifier::dataset)
        // We expect to have in states for numeric features an empty vector and for discretized features a vector of states
        int m = Xf.size(1);
        int n = Xf.size(0);
        map<std::string, std::vector<int>> states;
        pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
        auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
        // discretize input data by feature(row)
        std::unique_ptr<mdlp::Discretizer> discretizer;
        for (auto i = 0; i < pFeatures.size(); ++i) {
            auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
            auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
            if (states[pFeatures[i]].empty()) {
                // If the feature is numeric, we discretize it
            if (discretizationType == discretization_t::BINQ) {
                discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::QUANTILE);
            } else if (discretizationType == discretization_t::BINU) {
@@ -140,15 +136,13 @@ namespace bayesnet {
            } else { // Default is MDLP
                discretizer = std::make_unique<mdlp::CPPFImdlp>(ld_params.min_length, ld_params.max_depth, ld_params.proposed_cuts);
            }
-                pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->fit_transform(Xt, yv)));
+            auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
-                int n_states = discretizer->getCutPoints().size() + 1;
+            auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
-                auto xStates = std::vector<int>(n_states);
+            discretizer->fit(Xt, yv);
            pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
            auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
            iota(xStates.begin(), xStates.end(), 0);
            states[pFeatures[i]] = xStates;
            } else {
                // If the feature is categorical, we just copy it
                pDataset.index_put_({ i, "..." }, Xf[i].to(torch::kInt32));
            }
            discretizers[pFeatures[i]] = std::move(discretizer);
        }
        int n_classes = torch::max(y).item<int>() + 1;