Compare commits
2 Commits
17ee6a909a
...
main
Author | SHA1 | Date | |
---|---|---|---|
9f9369269a
|
|||
89142f8997
|
16
CHANGELOG.md
16
CHANGELOG.md
@@ -5,7 +5,21 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
## [1.2.2] - 2025-08-19
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with local discretization that was discretizing all features wether they were numeric or categorical.
|
||||
|
||||
## [1.2.1] - 2025-07-19
|
||||
|
||||
### Internal
|
||||
|
||||
- Update Libtorch to version 2.7.1
|
||||
- Update libraries versions:
|
||||
- mdlp: 2.1.1
|
||||
- Folding: 1.1.2
|
||||
- ArffFiles: 1.2.1
|
||||
|
||||
## [1.2.0] - 2025-07-08
|
||||
|
||||
|
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.27)
|
||||
|
||||
project(bayesnet
|
||||
VERSION 1.2.0
|
||||
VERSION 1.2.2
|
||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||
LANGUAGES CXX
|
||||
|
@@ -118,31 +118,37 @@ namespace bayesnet {
|
||||
}
|
||||
return states;
|
||||
}
|
||||
map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
|
||||
map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y, map<std::string, std::vector<int>> states)
|
||||
{
|
||||
// Discretize the continuous input data and build pDataset (Classifier::dataset)
|
||||
// We expect to have in states for numeric features an empty vector and for discretized features a vector of states
|
||||
int m = Xf.size(1);
|
||||
int n = Xf.size(0);
|
||||
map<std::string, std::vector<int>> states;
|
||||
pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
|
||||
auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
|
||||
// discretize input data by feature(row)
|
||||
std::unique_ptr<mdlp::Discretizer> discretizer;
|
||||
for (auto i = 0; i < pFeatures.size(); ++i) {
|
||||
if (discretizationType == discretization_t::BINQ) {
|
||||
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::QUANTILE);
|
||||
} else if (discretizationType == discretization_t::BINU) {
|
||||
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::UNIFORM);
|
||||
} else { // Default is MDLP
|
||||
discretizer = std::make_unique<mdlp::CPPFImdlp>(ld_params.min_length, ld_params.max_depth, ld_params.proposed_cuts);
|
||||
}
|
||||
auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
|
||||
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
|
||||
discretizer->fit(Xt, yv);
|
||||
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
|
||||
auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
|
||||
iota(xStates.begin(), xStates.end(), 0);
|
||||
states[pFeatures[i]] = xStates;
|
||||
if (states[pFeatures[i]].empty()) {
|
||||
// If the feature is numeric, we discretize it
|
||||
if (discretizationType == discretization_t::BINQ) {
|
||||
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::QUANTILE);
|
||||
} else if (discretizationType == discretization_t::BINU) {
|
||||
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::UNIFORM);
|
||||
} else { // Default is MDLP
|
||||
discretizer = std::make_unique<mdlp::CPPFImdlp>(ld_params.min_length, ld_params.max_depth, ld_params.proposed_cuts);
|
||||
}
|
||||
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->fit_transform(Xt, yv)));
|
||||
int n_states = discretizer->getCutPoints().size() + 1;
|
||||
auto xStates = std::vector<int>(n_states);
|
||||
iota(xStates.begin(), xStates.end(), 0);
|
||||
states[pFeatures[i]] = xStates;
|
||||
} else {
|
||||
// If the feature is categorical, we just copy it
|
||||
pDataset.index_put_({ i, "..." }, Xf[i].to(torch::kInt32));
|
||||
}
|
||||
discretizers[pFeatures[i]] = std::move(discretizer);
|
||||
}
|
||||
int n_classes = torch::max(y).item<int>() + 1;
|
||||
@@ -190,7 +196,7 @@ namespace bayesnet {
|
||||
)
|
||||
{
|
||||
// Phase 1: Initial discretization (same as original)
|
||||
auto currentStates = fit_local_discretization(y);
|
||||
auto currentStates = fit_local_discretization(y, initialStates);
|
||||
auto previousModel = Network();
|
||||
|
||||
if (convergence_params.verbose) {
|
||||
|
@@ -23,9 +23,8 @@ namespace bayesnet {
|
||||
protected:
|
||||
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
|
||||
torch::Tensor prepareX(torch::Tensor& X);
|
||||
map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
|
||||
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
|
||||
|
||||
// fit_local_discretization is only called by aodeld
|
||||
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y, map<std::string, std::vector<int>> states);
|
||||
// Iterative discretization method
|
||||
template<typename Classifier>
|
||||
map<std::string, std::vector<int>> iterativeLocalDiscretization(
|
||||
@@ -37,18 +36,15 @@ namespace bayesnet {
|
||||
const map<std::string, std::vector<int>>& initialStates,
|
||||
const Smoothing_t smoothing
|
||||
);
|
||||
|
||||
torch::Tensor Xf; // X continuous nxm tensor
|
||||
torch::Tensor y; // y discrete nx1 tensor
|
||||
map<std::string, std::unique_ptr<mdlp::Discretizer>> discretizers;
|
||||
|
||||
// MDLP parameters
|
||||
struct {
|
||||
size_t min_length = 3; // Minimum length of the interval to consider it in mdlp
|
||||
float proposed_cuts = 0.0; // Proposed cuts for the Discretization algorithm
|
||||
int max_depth = std::numeric_limits<int>::max(); // Maximum depth of the MDLP tree
|
||||
} ld_params;
|
||||
|
||||
// Convergence parameters
|
||||
struct {
|
||||
int maxIterations = 10;
|
||||
@@ -60,6 +56,7 @@ namespace bayesnet {
|
||||
"max_iterations", "verbose_convergence"
|
||||
};
|
||||
private:
|
||||
map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
|
||||
std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||
std::vector<std::string>& notes; // Notes during fit from BaseClassifier
|
||||
torch::Tensor& pDataset; // (n+1)xm tensor
|
||||
|
@@ -19,7 +19,7 @@ namespace bayesnet {
|
||||
Xf = X_;
|
||||
y = y_;
|
||||
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
|
||||
states = fit_local_discretization(y);
|
||||
states = fit_local_discretization(y, states_);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal AODE structure, Ensemble::fit
|
||||
// calls buildModel to initialize the base models
|
||||
|
@@ -20,7 +20,7 @@
|
||||
#include "bayesnet/ensembles/AODELd.h"
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
|
||||
const std::string ACTUAL_VERSION = "1.2.0";
|
||||
const std::string ACTUAL_VERSION = "1.2.1";
|
||||
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
{
|
||||
|
Reference in New Issue
Block a user