Compare commits

..

2 Commits

6 changed files with 42 additions and 25 deletions

View File

@@ -5,7 +5,21 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [1.2.2] - 2025-08-19
### Fixed
- Fixed an issue with local discretization that was discretizing all features wether they were numeric or categorical.
## [1.2.1] - 2025-07-19
### Internal
- Update Libtorch to version 2.7.1
- Update libraries versions:
- mdlp: 2.1.1
- Folding: 1.1.2
- ArffFiles: 1.2.1
## [1.2.0] - 2025-07-08

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.27)
project(bayesnet
VERSION 1.2.0
VERSION 1.2.2
DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX

View File

@@ -118,31 +118,37 @@ namespace bayesnet {
}
return states;
}
map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y, map<std::string, std::vector<int>> states)
{
// Discretize the continuous input data and build pDataset (Classifier::dataset)
// We expect to have in states for numeric features an empty vector and for discretized features a vector of states
int m = Xf.size(1);
int n = Xf.size(0);
map<std::string, std::vector<int>> states;
pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// discretize input data by feature(row)
std::unique_ptr<mdlp::Discretizer> discretizer;
for (auto i = 0; i < pFeatures.size(); ++i) {
if (discretizationType == discretization_t::BINQ) {
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::QUANTILE);
} else if (discretizationType == discretization_t::BINU) {
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::UNIFORM);
} else { // Default is MDLP
discretizer = std::make_unique<mdlp::CPPFImdlp>(ld_params.min_length, ld_params.max_depth, ld_params.proposed_cuts);
}
auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
discretizer->fit(Xt, yv);
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
states[pFeatures[i]] = xStates;
if (states[pFeatures[i]].empty()) {
// If the feature is numeric, we discretize it
if (discretizationType == discretization_t::BINQ) {
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::QUANTILE);
} else if (discretizationType == discretization_t::BINU) {
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::UNIFORM);
} else { // Default is MDLP
discretizer = std::make_unique<mdlp::CPPFImdlp>(ld_params.min_length, ld_params.max_depth, ld_params.proposed_cuts);
}
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->fit_transform(Xt, yv)));
int n_states = discretizer->getCutPoints().size() + 1;
auto xStates = std::vector<int>(n_states);
iota(xStates.begin(), xStates.end(), 0);
states[pFeatures[i]] = xStates;
} else {
// If the feature is categorical, we just copy it
pDataset.index_put_({ i, "..." }, Xf[i].to(torch::kInt32));
}
discretizers[pFeatures[i]] = std::move(discretizer);
}
int n_classes = torch::max(y).item<int>() + 1;
@@ -190,7 +196,7 @@ namespace bayesnet {
)
{
// Phase 1: Initial discretization (same as original)
auto currentStates = fit_local_discretization(y);
auto currentStates = fit_local_discretization(y, initialStates);
auto previousModel = Network();
if (convergence_params.verbose) {

View File

@@ -23,9 +23,8 @@ namespace bayesnet {
protected:
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
torch::Tensor prepareX(torch::Tensor& X);
map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
// fit_local_discretization is only called by aodeld
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y, map<std::string, std::vector<int>> states);
// Iterative discretization method
template<typename Classifier>
map<std::string, std::vector<int>> iterativeLocalDiscretization(
@@ -37,18 +36,15 @@ namespace bayesnet {
const map<std::string, std::vector<int>>& initialStates,
const Smoothing_t smoothing
);
torch::Tensor Xf; // X continuous nxm tensor
torch::Tensor y; // y discrete nx1 tensor
map<std::string, std::unique_ptr<mdlp::Discretizer>> discretizers;
// MDLP parameters
struct {
size_t min_length = 3; // Minimum length of the interval to consider it in mdlp
float proposed_cuts = 0.0; // Proposed cuts for the Discretization algorithm
int max_depth = std::numeric_limits<int>::max(); // Maximum depth of the MDLP tree
} ld_params;
// Convergence parameters
struct {
int maxIterations = 10;
@@ -60,6 +56,7 @@ namespace bayesnet {
"max_iterations", "verbose_convergence"
};
private:
map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
std::vector<int> factorize(const std::vector<std::string>& labels_t);
std::vector<std::string>& notes; // Notes during fit from BaseClassifier
torch::Tensor& pDataset; // (n+1)xm tensor

View File

@@ -19,7 +19,7 @@ namespace bayesnet {
Xf = X_;
y = y_;
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
states = fit_local_discretization(y, states_);
// We have discretized the input data
// 1st we need to fit the model to build the normal AODE structure, Ensemble::fit
// calls buildModel to initialize the base models

View File

@@ -20,7 +20,7 @@
#include "bayesnet/ensembles/AODELd.h"
#include "bayesnet/ensembles/BoostAODE.h"
const std::string ACTUAL_VERSION = "1.2.0";
const std::string ACTUAL_VERSION = "1.2.1";
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{