Reformat code and update version

This commit is contained in:
2023-04-25 10:48:59 +02:00
parent 22997f5d69
commit a1f26a257c
8 changed files with 199 additions and 171 deletions

View File

@@ -7,16 +7,18 @@
namespace mdlp {
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
max_depth(max_depth_),
proposed_cuts(proposed) {
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed): min_length(min_length_),
max_depth(max_depth_),
proposed_cuts(proposed)
{
}
CPPFImdlp::CPPFImdlp() = default;
CPPFImdlp::~CPPFImdlp() = default;
size_t CPPFImdlp::compute_max_num_cut_points() const {
size_t CPPFImdlp::compute_max_num_cut_points() const
{
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
if (proposed_cuts == 0) {
return numeric_limits<size_t>::max();
@@ -29,7 +31,8 @@ namespace mdlp {
return static_cast<size_t>(proposed_cuts);
}
void CPPFImdlp::fit(samples_t &X_, labels_t &y_) {
void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
{
X = X_;
y = y_;
num_cut_points = compute_max_num_cut_points();
@@ -59,7 +62,8 @@ namespace mdlp {
}
}
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) {
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
{
size_t n;
size_t m;
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
@@ -88,10 +92,11 @@ namespace mdlp {
// Decide which values to use
cut = cut + (backWall ? m + 1 : -n);
actual = X[indices[cut]];
return {(actual + previous) / 2, cut};
return { (actual + previous) / 2, cut };
}
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) {
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
{
size_t cut;
pair<precision_t, size_t> result;
// Check if the interval length and the depth are Ok
@@ -110,7 +115,8 @@ namespace mdlp {
}
}
size_t CPPFImdlp::getCandidate(size_t start, size_t end) {
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
{
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
E(A, TA; S) is minimal amongst all the candidate cut points. */
size_t candidate = numeric_limits<size_t>::max();
@@ -143,7 +149,8 @@ namespace mdlp {
return candidate;
}
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) {
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
{
int k;
int k1;
int k2;
@@ -161,13 +168,14 @@ namespace mdlp {
ent2 = metrics.entropy(cut, end);
ig = metrics.informationGain(start, cut, end);
delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
precision_t term = 1 / N * (log2(N - 1) + delta);
return ig > term;
}
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
indices_t CPPFImdlp::sortIndices(samples_t &X_, labels_t &y_) {
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
{
indices_t idx(X_.size());
iota(idx.begin(), idx.end(), 0);
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
@@ -175,11 +183,12 @@ namespace mdlp {
return y_[i1] < y_[i2];
else
return X_[i1] < X_[i2];
});
});
return idx;
}
void CPPFImdlp::resizeCutPoints() {
void CPPFImdlp::resizeCutPoints()
{
//Compute entropy of each of the whole cutpoint set and discards the biggest value
precision_t maxEntropy = 0;
precision_t entropy;