Add predict_proba to Ld classifiers

2025-05-12 19:47:04 +02:00
parent 8a02a3a5cb
commit b11620bbe8
12 changed files with 116 additions and 44 deletions
--- a/sample/sample.cc
+++ b/sample/sample.cc
@@ -4,9 +4,22 @@
 // SPDX-License-Identifier: MIT
 // ***************************************************************

+#include <map>
+#include <string>
 #include <ArffFiles/ArffFiles.hpp>
 #include <fimdlp/CPPFImdlp.h>
-#include <bayesnet/ensembles/XBAODE.h>
+#include <bayesnet/classifiers/TANLd.h>
+#include <bayesnet/classifiers/KDBLd.h>
+#include <bayesnet/ensembles/AODELd.h>
+
+torch::Tensor matrix2tensor(const std::vector<std::vector<float>>& matrix)
+{
+    auto tensor = torch::empty({ static_cast<int>(matrix.size()), static_cast<int>(matrix[0].size()) }, torch::kFloat32);
+    for (int i = 0; i < matrix.size(); ++i) {
+        tensor.index_put_({ i, "..." }, torch::tensor(matrix[i], torch::kFloat32));
+    }
+    return tensor;
+}

 std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
 {
@@ -19,32 +32,40 @@ std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, m
    }
    return Xd;
 }
-tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last)
+std::tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string> loadArff(const std::string& name, bool class_last)
 {
    auto handler = ArffFiles();
    handler.load(name, class_last);
    // Get Dataset X, y
-    std::vector<mdlp::samples_t>& X = handler.getX();
-    mdlp::labels_t& y = handler.getY();
-    // Get className & Features
-    auto className = handler.getClassName();
+    std::vector<mdlp::samples_t> X = handler.getX();
+    mdlp::labels_t y = handler.getY();
    std::vector<std::string> features;
    auto attributes = handler.getAttributes();
    transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
-    torch::Tensor Xd;
-    auto states = map<std::string, std::vector<int>>();
-    auto Xr = discretizeDataset(X, y);
-    Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
-    for (int i = 0; i < features.size(); ++i) {
-        states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
-        auto item = states.at(features[i]);
-        iota(begin(item), end(item), 0);
-        Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
-    }
-    states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
-    iota(begin(states.at(className)), end(states.at(className)), 0);
-    return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
+    auto Xt = matrix2tensor(X);
+    auto yt = torch::tensor(y, torch::kInt32);
+    return { Xt, yt, features, handler.getClassName() };
 }
+// tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last)
+// {
+//     auto [X, y, features, className] = loadArff(name, class_last);
+//     // Discretize the dataset
+//     torch::Tensor Xd;
+//     auto states = map<std::string, std::vector<int>>();
+//     // Fill the class states
+//     states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
+//     iota(begin(states.at(className)), end(states.at(className)), 0);
+//     auto Xr = discretizeDataset(X, y);
+//     Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
+//     for (int i = 0; i < features.size(); ++i) {
+//         states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
+//         auto item = states.at(features[i]);
+//         iota(begin(item), end(item), 0);
+//         Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
+//     }
+//     auto yt = torch::tensor(y, torch::kInt32);
+//     return { Xd, yt, features, className, states };
+// }

 int main(int argc, char* argv[])
 {
@@ -53,29 +74,42 @@ int main(int argc, char* argv[])
        return 1;
    }
    std::string file_name = argv[1];
-    torch::Tensor X, y;
-    std::vector<std::string> features;
-    std::string className;
-    map<std::string, std::vector<int>> states;
-    auto clf = bayesnet::XBAODE(); // false for not using voting in predict
-    std::cout << "Library version: " << clf.getVersion() << std::endl;
-    tie(X, y, features, className, states) = loadDataset(file_name, true);
-    torch::Tensor weights = torch::full({ X.size(1) }, 15, torch::kDouble);
-    torch::Tensor dataset;
-    try {
-        auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
-        dataset = torch::cat({ X, yresized }, 0);
+    std::string model_name = argv[2];
+    std::map<std::string, bayesnet::Classifier*> models{ {"TANLd", new bayesnet::TANLd()}, {"KDBLd", new bayesnet::KDBLd(2)}, {"AODELd", new bayesnet::AODELd() }
+    };
+    if (models.find(model_name) == models.end()) {
+        std::cerr << "Model not found: " << model_name << std::endl;
+        return 1;
    }
-    catch (const std::exception& e) {
-        std::stringstream oss;
-        oss << "* Error in X and y dimensions *\n";
-        oss << "X dimensions: " << dataset.sizes() << "\n";
-        oss << "y dimensions: " << y.sizes();
-        throw std::runtime_error(oss.str());
+    auto clf = models[model_name];
+    std::cout << "Library version: " << clf->getVersion() << std::endl;
+    // auto [X, y, features, className, states] = loadDataset(file_name, true);
+    auto [Xt, yt, features, className] = loadArff(file_name, true);
+    std::map<std::string, std::vector<int>> states;
+    // int m = Xt.size(1);
+    // auto weights = torch::full({ m }, 1 / m, torch::kDouble);
+    // auto dataset = buildDataset(Xv, yv);
+    // try {
+    //     auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
+    //     dataset = torch::cat({ X, yresized }, 0);
+    // }
+    // catch (const std::exception& e) {
+    //     std::stringstream oss;
+    //     oss << "* Error in X and y dimensions *\n";
+    //     oss << "X dimensions: " << dataset.sizes() << "\n";
+    //     oss << "y dimensions: " << y.sizes();
+    //     throw std::runtime_error(oss.str());
+    // }
+    clf->fit(Xt, yt, features, className, states, bayesnet::Smoothing_t::ORIGINAL);
+    auto total = yt.size(0);
+    auto y_proba = clf->predict_proba(Xt);
+    auto y_pred = y_proba.argmax(1);
+    auto accuracy_value = (y_pred == yt).sum().item<float>() / total;
+    auto score = clf->score(Xt, yt);
+    std::cout << "File: " << file_name << " Model: " << model_name << " score: " << score << " Computed accuracy: " << accuracy_value << std::endl;
+    for (const auto clf : models) {
+        delete clf.second;
    }
-    clf.fit(dataset, features, className, states, weights, bayesnet::Smoothing_t::LAPLACE);
-    auto score = clf.score(X, y);
-    std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
    return 0;
 }