diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f3ff0f..0077223 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix the vcpkg configuration in building the library. - Fix the sample app to use the vcpkg configuration. +- Add predict_proba method to all Ld classifiers. ## [1.1.0] - 2025-04-27 diff --git a/README.md b/README.md index 936d67f..e7372ab 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ [![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet) +[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Doctorado-ML/BayesNet) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es&logo=gitea) [![Coverage Badge](https://img.shields.io/badge/Coverage-99,1%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet) [![DOI](https://zenodo.org/badge/667782806.svg)](https://doi.org/10.5281/zenodo.14210344) diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index a285da1..0decd1b 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -28,6 +28,11 @@ namespace bayesnet { auto Xt = prepareX(X); return KDB::predict(Xt); } + torch::Tensor KDBLd::predict_proba(torch::Tensor& X) + { + auto Xt = prepareX(X); + return KDB::predict_proba(Xt); + } std::vector KDBLd::graph(const std::string& name) const { return KDB::graph(name); diff --git a/bayesnet/classifiers/KDBLd.h b/bayesnet/classifiers/KDBLd.h index 77b9eec..6bdce0b 100644 --- a/bayesnet/classifiers/KDBLd.h +++ b/bayesnet/classifiers/KDBLd.h @@ -18,6 +18,7 @@ namespace bayesnet { KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; std::vector graph(const std::string& name = "KDB") const override; torch::Tensor predict(torch::Tensor& X) override; + torch::Tensor predict_proba(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; }; } diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index 846cb92..651d3c2 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -11,7 +11,7 @@ namespace bayesnet { Proposal::~Proposal() { for (auto& [key, value] : discretizers) { - delete value; + delete value; } } void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y) diff --git a/bayesnet/classifiers/SPODELd.cc b/bayesnet/classifiers/SPODELd.cc index d733253..c68b7d9 100644 --- a/bayesnet/classifiers/SPODELd.cc +++ b/bayesnet/classifiers/SPODELd.cc @@ -43,6 +43,11 @@ namespace bayesnet { auto Xt = prepareX(X); return SPODE::predict(Xt); } + torch::Tensor SPODELd::predict_proba(torch::Tensor& X) + { + auto Xt = prepareX(X); + return SPODE::predict_proba(Xt); + } std::vector SPODELd::graph(const std::string& name) const { return SPODE::graph(name); diff --git a/bayesnet/classifiers/SPODELd.h b/bayesnet/classifiers/SPODELd.h index b92d24c..faa3a48 100644 --- a/bayesnet/classifiers/SPODELd.h +++ b/bayesnet/classifiers/SPODELd.h @@ -19,6 +19,7 @@ namespace bayesnet { SPODELd& commonFit(const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing); std::vector graph(const std::string& name = "SPODELd") const override; torch::Tensor predict(torch::Tensor& X) override; + torch::Tensor predict_proba(torch::Tensor& X) override; static inline std::string version() { return "0.0.1"; }; }; } diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index 6e7d443..f9418da 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -29,6 +29,11 @@ namespace bayesnet { auto Xt = prepareX(X); return TAN::predict(Xt); } + torch::Tensor TANLd::predict_proba(torch::Tensor& X) + { + auto Xt = prepareX(X); + return TAN::predict_proba(Xt); + } std::vector TANLd::graph(const std::string& name) const { return TAN::graph(name); diff --git a/bayesnet/classifiers/TANLd.h b/bayesnet/classifiers/TANLd.h index d05a9c3..a904235 100644 --- a/bayesnet/classifiers/TANLd.h +++ b/bayesnet/classifiers/TANLd.h @@ -18,6 +18,7 @@ namespace bayesnet { TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, map>& states, const Smoothing_t smoothing) override; std::vector graph(const std::string& name = "TANLd") const override; torch::Tensor predict(torch::Tensor& X) override; + torch::Tensor predict_proba(torch::Tensor& X) override; }; } #endif // !TANLD_H \ No newline at end of file diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 82c8a60..1d93da3 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -10,13 +10,31 @@ find_package(Torch CONFIG REQUIRED) find_package(fimdlp CONFIG REQUIRED) find_package(folding CONFIG REQUIRED) find_package(arff-files CONFIG REQUIRED) -find_package(bayesnet CONFIG REQUIRED) +find_package(nlohmann_json CONFIG REQUIRED) + +option(BAYESNET_VCPKG_CONFIG "Use vcpkg config for BayesNet" ON) + +if (BAYESNET_VCPKG_CONFIG) + message(STATUS "Using BayesNet vcpkg config") + find_package(bayesnet CONFIG REQUIRED) + set(BayesNet_LIBRARIES bayesnet::bayesnet) +else(BAYESNET_VCPKG_CONFIG) + message(STATUS "Using BayesNet local library config") + find_library(bayesnet NAMES libbayesnet bayesnet libbayesnet.a PATHS ${Platform_SOURCE_DIR}/../lib/lib REQUIRED) + find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ${Platform_SOURCE_DIR}/../lib/include) + add_library(bayesnet::bayesnet UNKNOWN IMPORTED) + set_target_properties(bayesnet::bayesnet PROPERTIES + IMPORTED_LOCATION ${bayesnet} + INTERFACE_INCLUDE_DIRECTORIES ${Bayesnet_INCLUDE_DIRS} + ) +endif(BAYESNET_VCPKG_CONFIG) +message(STATUS "BayesNet: ${bayesnet}") add_executable(bayesnet_sample sample.cc) target_link_libraries(bayesnet_sample PRIVATE fimdlp::fimdlp arff-files::arff-files "${TORCH_LIBRARIES}" - bayesnet::bayesnet + bayesnet::bayesnet folding::folding ) diff --git a/sample/sample.cc b/sample/sample.cc index 5ae5b41..27d520c 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -4,9 +4,22 @@ // SPDX-License-Identifier: MIT // *************************************************************** +#include +#include #include #include -#include +#include +#include +#include + +torch::Tensor matrix2tensor(const std::vector>& matrix) +{ + auto tensor = torch::empty({ static_cast(matrix.size()), static_cast(matrix[0].size()) }, torch::kFloat32); + for (int i = 0; i < matrix.size(); ++i) { + tensor.index_put_({ i, "..." }, torch::tensor(matrix[i], torch::kFloat32)); + } + return tensor; +} std::vector discretizeDataset(std::vector& X, mdlp::labels_t& y) { @@ -19,32 +32,40 @@ std::vector discretizeDataset(std::vector& X, m } return Xd; } -tuple, std::string, map>> loadDataset(const std::string& name, bool class_last) +std::tuple, std::string> loadArff(const std::string& name, bool class_last) { auto handler = ArffFiles(); handler.load(name, class_last); // Get Dataset X, y - std::vector& X = handler.getX(); - mdlp::labels_t& y = handler.getY(); - // Get className & Features - auto className = handler.getClassName(); + std::vector X = handler.getX(); + mdlp::labels_t y = handler.getY(); std::vector features; auto attributes = handler.getAttributes(); transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; }); - torch::Tensor Xd; - auto states = map>(); - auto Xr = discretizeDataset(X, y); - Xd = torch::zeros({ static_cast(Xr.size()), static_cast(Xr[0].size()) }, torch::kInt32); - for (int i = 0; i < features.size(); ++i) { - states[features[i]] = std::vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); - auto item = states.at(features[i]); - iota(begin(item), end(item), 0); - Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32)); - } - states[className] = std::vector(*max_element(y.begin(), y.end()) + 1); - iota(begin(states.at(className)), end(states.at(className)), 0); - return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; + auto Xt = matrix2tensor(X); + auto yt = torch::tensor(y, torch::kInt32); + return { Xt, yt, features, handler.getClassName() }; } +// tuple, std::string, map>> loadDataset(const std::string& name, bool class_last) +// { +// auto [X, y, features, className] = loadArff(name, class_last); +// // Discretize the dataset +// torch::Tensor Xd; +// auto states = map>(); +// // Fill the class states +// states[className] = std::vector(*max_element(y.begin(), y.end()) + 1); +// iota(begin(states.at(className)), end(states.at(className)), 0); +// auto Xr = discretizeDataset(X, y); +// Xd = torch::zeros({ static_cast(Xr.size()), static_cast(Xr[0].size()) }, torch::kInt32); +// for (int i = 0; i < features.size(); ++i) { +// states[features[i]] = std::vector(*max_element(Xr[i].begin(), Xr[i].end()) + 1); +// auto item = states.at(features[i]); +// iota(begin(item), end(item), 0); +// Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32)); +// } +// auto yt = torch::tensor(y, torch::kInt32); +// return { Xd, yt, features, className, states }; +// } int main(int argc, char* argv[]) { @@ -53,29 +74,42 @@ int main(int argc, char* argv[]) return 1; } std::string file_name = argv[1]; - torch::Tensor X, y; - std::vector features; - std::string className; - map> states; - auto clf = bayesnet::XBAODE(); // false for not using voting in predict - std::cout << "Library version: " << clf.getVersion() << std::endl; - tie(X, y, features, className, states) = loadDataset(file_name, true); - torch::Tensor weights = torch::full({ X.size(1) }, 15, torch::kDouble); - torch::Tensor dataset; - try { - auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); - dataset = torch::cat({ X, yresized }, 0); + std::string model_name = argv[2]; + std::map models{ {"TANLd", new bayesnet::TANLd()}, {"KDBLd", new bayesnet::KDBLd(2)}, {"AODELd", new bayesnet::AODELd() } + }; + if (models.find(model_name) == models.end()) { + std::cerr << "Model not found: " << model_name << std::endl; + return 1; } - catch (const std::exception& e) { - std::stringstream oss; - oss << "* Error in X and y dimensions *\n"; - oss << "X dimensions: " << dataset.sizes() << "\n"; - oss << "y dimensions: " << y.sizes(); - throw std::runtime_error(oss.str()); + auto clf = models[model_name]; + std::cout << "Library version: " << clf->getVersion() << std::endl; + // auto [X, y, features, className, states] = loadDataset(file_name, true); + auto [Xt, yt, features, className] = loadArff(file_name, true); + std::map> states; + // int m = Xt.size(1); + // auto weights = torch::full({ m }, 1 / m, torch::kDouble); + // auto dataset = buildDataset(Xv, yv); + // try { + // auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1); + // dataset = torch::cat({ X, yresized }, 0); + // } + // catch (const std::exception& e) { + // std::stringstream oss; + // oss << "* Error in X and y dimensions *\n"; + // oss << "X dimensions: " << dataset.sizes() << "\n"; + // oss << "y dimensions: " << y.sizes(); + // throw std::runtime_error(oss.str()); + // } + clf->fit(Xt, yt, features, className, states, bayesnet::Smoothing_t::ORIGINAL); + auto total = yt.size(0); + auto y_proba = clf->predict_proba(Xt); + auto y_pred = y_proba.argmax(1); + auto accuracy_value = (y_pred == yt).sum().item() / total; + auto score = clf->score(Xt, yt); + std::cout << "File: " << file_name << " Model: " << model_name << " score: " << score << " Computed accuracy: " << accuracy_value << std::endl; + for (const auto clf : models) { + delete clf.second; } - clf.fit(dataset, features, className, states, weights, bayesnet::Smoothing_t::LAPLACE); - auto score = clf.score(X, y); - std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl; return 0; } diff --git a/sample/vcpkg.json b/sample/vcpkg.json index d8ef389..e9102ea 100644 --- a/sample/vcpkg.json +++ b/sample/vcpkg.json @@ -6,7 +6,7 @@ "fimdlp", "libtorch-bin", "folding", - "bayesnet" + "nlohmann-json" ], "overrides": [ {