Add new hyperparameters to the Ld classifiers
- *ld_algorithm*: algorithm to use for local discretization, with the following options: "MDLP", "BINQ", "BINU". - *ld_proposed_cuts*: number of cut points to return. - *mdlp_min_length*: minimum length of a partition in MDLP algorithm to be evaluated for partition. - *mdlp_max_depth*: maximum level of recursion in MDLP algorithm.
This commit is contained in:
11
CHANGELOG.md
11
CHANGELOG.md
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [1.2.0] - 2025-06-30
|
||||||
|
|
||||||
|
### Internal
|
||||||
|
|
||||||
|
- Add docs generation to CMakeLists.txt.
|
||||||
|
- Add new hyperparameters to the Ld classifiers:
|
||||||
|
- *ld_algorithm*: algorithm to use for local discretization, with the following options: "MDLP", "BINQ", "BINU".
|
||||||
|
- *ld_proposed_cuts*: number of cut points to return.
|
||||||
|
- *mdlp_min_length*: minimum length of a partition in MDLP algorithm to be evaluated for partition.
|
||||||
|
- *mdlp_max_depth*: maximum level of recursion in MDLP algorithm.
|
||||||
|
|
||||||
## [1.1.1] - 2025-05-20
|
## [1.1.1] - 2025-05-20
|
||||||
|
|
||||||
### Internal
|
### Internal
|
||||||
|
@@ -10,17 +10,16 @@
|
|||||||
#include "Classifier.h"
|
#include "Classifier.h"
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
class KDB : public Classifier {
|
class KDB : public Classifier {
|
||||||
private:
|
|
||||||
int k;
|
|
||||||
float theta;
|
|
||||||
protected:
|
|
||||||
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
|
|
||||||
void buildModel(const torch::Tensor& weights) override;
|
|
||||||
public:
|
public:
|
||||||
explicit KDB(int k, float theta = 0.03);
|
explicit KDB(int k, float theta = 0.03);
|
||||||
virtual ~KDB() = default;
|
virtual ~KDB() = default;
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||||
|
protected:
|
||||||
|
int k;
|
||||||
|
float theta;
|
||||||
|
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
|
||||||
|
void buildModel(const torch::Tensor& weights) override;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -7,7 +7,25 @@
|
|||||||
#include "KDBLd.h"
|
#include "KDBLd.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
|
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className)
|
||||||
|
{
|
||||||
|
validHyperparameters = validHyperparameters_ld;
|
||||||
|
validHyperparameters.push_back("k");
|
||||||
|
validHyperparameters.push_back("theta");
|
||||||
|
}
|
||||||
|
void KDBLd::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||||
|
{
|
||||||
|
auto hyperparameters = hyperparameters_;
|
||||||
|
if (hyperparameters.contains("k")) {
|
||||||
|
k = hyperparameters["k"];
|
||||||
|
hyperparameters.erase("k");
|
||||||
|
}
|
||||||
|
if (hyperparameters.contains("theta")) {
|
||||||
|
theta = hyperparameters["theta"];
|
||||||
|
hyperparameters.erase("theta");
|
||||||
|
}
|
||||||
|
Proposal::setHyperparameters(hyperparameters);
|
||||||
|
}
|
||||||
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
checkInput(X_, y_);
|
checkInput(X_, y_);
|
||||||
|
@@ -11,12 +11,12 @@
|
|||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
class KDBLd : public KDB, public Proposal {
|
class KDBLd : public KDB, public Proposal {
|
||||||
private:
|
|
||||||
public:
|
public:
|
||||||
explicit KDBLd(int k);
|
explicit KDBLd(int k);
|
||||||
virtual ~KDBLd() = default;
|
virtual ~KDBLd() = default;
|
||||||
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||||
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||||
torch::Tensor predict(torch::Tensor& X) override;
|
torch::Tensor predict(torch::Tensor& X) override;
|
||||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||||
static inline std::string version() { return "0.0.1"; };
|
static inline std::string version() { return "0.0.1"; };
|
||||||
|
@@ -7,13 +7,42 @@
|
|||||||
#include "Proposal.h"
|
#include "Proposal.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
|
Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_)
|
||||||
Proposal::~Proposal()
|
|
||||||
{
|
{
|
||||||
for (auto& [key, value] : discretizers) {
|
}
|
||||||
delete value;
|
void Proposal::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||||
|
{
|
||||||
|
auto hyperparameters = hyperparameters_;
|
||||||
|
if (hyperparameters.contains("ld_proposed_cuts")) {
|
||||||
|
ld_params.proposed_cuts = hyperparameters["ld_proposed_cuts"];
|
||||||
|
hyperparameters.erase("ld_proposed_cuts");
|
||||||
|
}
|
||||||
|
if (hyperparameters.contains("mdlp_max_depth")) {
|
||||||
|
ld_params.max_depth = hyperparameters["mdlp_max_depth"];
|
||||||
|
hyperparameters.erase("mdlp_max_depth");
|
||||||
|
}
|
||||||
|
if (hyperparameters.contains("mdlp_min_length")) {
|
||||||
|
ld_params.min_length = hyperparameters["mdlp_min_length"];
|
||||||
|
hyperparameters.erase("mdlp_min_length");
|
||||||
|
}
|
||||||
|
if (hyperparameters.contains("ld_algorithm")) {
|
||||||
|
auto algorithm = hyperparameters["ld_algorithm"];
|
||||||
|
hyperparameters.erase("ld_algorithm");
|
||||||
|
if (algorithm == "MDLP") {
|
||||||
|
discretizationType = discretization_t::MDLP;
|
||||||
|
} else if (algorithm == "BINQ") {
|
||||||
|
discretizationType = discretization_t::BINQ;
|
||||||
|
} else if (algorithm == "BINU") {
|
||||||
|
discretizationType = discretization_t::BINU;
|
||||||
|
} else {
|
||||||
|
throw std::invalid_argument("Invalid discretization algorithm: " + algorithm.get<std::string>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!hyperparameters.empty()) {
|
||||||
|
throw std::invalid_argument("Invalid hyperparameters for Proposal: " + hyperparameters.dump());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
|
void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
|
||||||
{
|
{
|
||||||
if (!torch::is_floating_point(X)) {
|
if (!torch::is_floating_point(X)) {
|
||||||
@@ -84,8 +113,15 @@ namespace bayesnet {
|
|||||||
pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
|
pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
|
||||||
auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
|
auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
|
||||||
// discretize input data by feature(row)
|
// discretize input data by feature(row)
|
||||||
|
std::unique_ptr<mdlp::Discretizer> discretizer;
|
||||||
for (auto i = 0; i < pFeatures.size(); ++i) {
|
for (auto i = 0; i < pFeatures.size(); ++i) {
|
||||||
auto* discretizer = new mdlp::CPPFImdlp();
|
if (discretizationType == discretization_t::BINQ) {
|
||||||
|
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::QUANTILE);
|
||||||
|
} else if (discretizationType == discretization_t::BINU) {
|
||||||
|
discretizer = std::make_unique<mdlp::BinDisc>(ld_params.proposed_cuts, mdlp::strategy_t::UNIFORM);
|
||||||
|
} else { // Default is MDLP
|
||||||
|
discretizer = std::make_unique<mdlp::CPPFImdlp>(ld_params.min_length, ld_params.max_depth, ld_params.proposed_cuts);
|
||||||
|
}
|
||||||
auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
|
auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
|
||||||
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
|
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
|
||||||
discretizer->fit(Xt, yv);
|
discretizer->fit(Xt, yv);
|
||||||
@@ -93,7 +129,7 @@ namespace bayesnet {
|
|||||||
auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
|
auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
|
||||||
iota(xStates.begin(), xStates.end(), 0);
|
iota(xStates.begin(), xStates.end(), 0);
|
||||||
states[pFeatures[i]] = xStates;
|
states[pFeatures[i]] = xStates;
|
||||||
discretizers[pFeatures[i]] = discretizer;
|
discretizers[pFeatures[i]] = std::move(discretizer);
|
||||||
}
|
}
|
||||||
int n_classes = torch::max(y).item<int>() + 1;
|
int n_classes = torch::max(y).item<int>() + 1;
|
||||||
auto yStates = std::vector<int>(n_classes);
|
auto yStates = std::vector<int>(n_classes);
|
||||||
|
@@ -10,14 +10,16 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
#include <fimdlp/CPPFImdlp.h>
|
#include <fimdlp/CPPFImdlp.h>
|
||||||
|
#include <fimdlp/BinDisc.h>
|
||||||
#include "bayesnet/network/Network.h"
|
#include "bayesnet/network/Network.h"
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
#include "Classifier.h"
|
#include "Classifier.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
class Proposal {
|
class Proposal {
|
||||||
public:
|
public:
|
||||||
Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_);
|
Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_);
|
||||||
virtual ~Proposal();
|
void setHyperparameters(const nlohmann::json& hyperparameters_);
|
||||||
protected:
|
protected:
|
||||||
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
|
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
|
||||||
torch::Tensor prepareX(torch::Tensor& X);
|
torch::Tensor prepareX(torch::Tensor& X);
|
||||||
@@ -25,12 +27,24 @@ namespace bayesnet {
|
|||||||
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
|
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
|
||||||
torch::Tensor Xf; // X continuous nxm tensor
|
torch::Tensor Xf; // X continuous nxm tensor
|
||||||
torch::Tensor y; // y discrete nx1 tensor
|
torch::Tensor y; // y discrete nx1 tensor
|
||||||
map<std::string, mdlp::CPPFImdlp*> discretizers;
|
map<std::string, std::unique_ptr<mdlp::Discretizer>> discretizers;
|
||||||
|
// MDLP parameters
|
||||||
|
struct {
|
||||||
|
size_t min_length = 3; // Minimum length of the interval to consider it in mdlp
|
||||||
|
float proposed_cuts = 0.0; // Proposed cuts for the Discretization algorithm
|
||||||
|
int max_depth = std::numeric_limits<int>::max(); // Maximum depth of the MDLP tree
|
||||||
|
} ld_params;
|
||||||
|
nlohmann::json validHyperparameters_ld = { "ld_algorithm", "ld_proposed_cuts", "mdlp_min_length", "mdlp_max_depth" };
|
||||||
private:
|
private:
|
||||||
std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||||
torch::Tensor& pDataset; // (n+1)xm tensor
|
torch::Tensor& pDataset; // (n+1)xm tensor
|
||||||
std::vector<std::string>& pFeatures;
|
std::vector<std::string>& pFeatures;
|
||||||
std::string& pClassName;
|
std::string& pClassName;
|
||||||
|
enum class discretization_t {
|
||||||
|
MDLP,
|
||||||
|
BINQ,
|
||||||
|
BINU
|
||||||
|
} discretizationType = discretization_t::MDLP; // Default discretization type
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -7,7 +7,11 @@
|
|||||||
#include "SPODELd.h"
|
#include "SPODELd.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
|
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className)
|
||||||
|
{
|
||||||
|
validHyperparameters = validHyperparameters_ld; // Inherits the valid hyperparameters from Proposal
|
||||||
|
}
|
||||||
|
|
||||||
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
checkInput(X_, y_);
|
checkInput(X_, y_);
|
||||||
|
@@ -9,6 +9,7 @@
|
|||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
|
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
|
||||||
{
|
{
|
||||||
|
validHyperparameters = validHyperparameters_ld; // Inherits the valid hyperparameters from Proposal
|
||||||
}
|
}
|
||||||
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
@@ -31,6 +32,7 @@ namespace bayesnet {
|
|||||||
models.clear();
|
models.clear();
|
||||||
for (int i = 0; i < features.size(); ++i) {
|
for (int i = 0; i < features.size(); ++i) {
|
||||||
models.push_back(std::make_unique<SPODELd>(i));
|
models.push_back(std::make_unique<SPODELd>(i));
|
||||||
|
models.back()->setHyperparameters(hyperparameters);
|
||||||
}
|
}
|
||||||
n_models = models.size();
|
n_models = models.size();
|
||||||
significanceModels = std::vector<double>(n_models, 1.0);
|
significanceModels = std::vector<double>(n_models, 1.0);
|
||||||
|
@@ -20,6 +20,8 @@ namespace bayesnet {
|
|||||||
protected:
|
protected:
|
||||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||||
void buildModel(const torch::Tensor& weights) override;
|
void buildModel(const torch::Tensor& weights) override;
|
||||||
|
private:
|
||||||
|
nlohmann::json hyperparameters = {}; // Hyperparameters for the model
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif // !AODELD_H
|
#endif // !AODELD_H
|
@@ -18,7 +18,7 @@
|
|||||||
std::map<std::string, std::string> modules = {
|
std::map<std::string, std::string> modules = {
|
||||||
{ "mdlp", "2.0.1" },
|
{ "mdlp", "2.0.1" },
|
||||||
{ "Folding", "1.1.1" },
|
{ "Folding", "1.1.1" },
|
||||||
{ "json", "3.12" },
|
{ "json", "3.11" },
|
||||||
{ "ArffFiles", "1.1.0" }
|
{ "ArffFiles", "1.1.0" }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user