mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 15:35:55 +00:00
Refactor github build action
This commit is contained in:
@@ -7,18 +7,16 @@
|
|||||||
|
|
||||||
namespace mdlp {
|
namespace mdlp {
|
||||||
|
|
||||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed): min_length(min_length_),
|
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
|
||||||
max_depth(max_depth_),
|
max_depth(max_depth_),
|
||||||
proposed_cuts(proposed)
|
proposed_cuts(proposed) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CPPFImdlp::CPPFImdlp() = default;
|
CPPFImdlp::CPPFImdlp() = default;
|
||||||
|
|
||||||
CPPFImdlp::~CPPFImdlp() = default;
|
CPPFImdlp::~CPPFImdlp() = default;
|
||||||
|
|
||||||
size_t CPPFImdlp::compute_max_num_cut_points() const
|
size_t CPPFImdlp::compute_max_num_cut_points() const {
|
||||||
{
|
|
||||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||||
if (proposed_cuts == 0) {
|
if (proposed_cuts == 0) {
|
||||||
return numeric_limits<size_t>::max();
|
return numeric_limits<size_t>::max();
|
||||||
@@ -31,8 +29,7 @@ namespace mdlp {
|
|||||||
return static_cast<size_t>(proposed_cuts);
|
return static_cast<size_t>(proposed_cuts);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
void CPPFImdlp::fit(samples_t &X_, labels_t &y_) {
|
||||||
{
|
|
||||||
X = X_;
|
X = X_;
|
||||||
y = y_;
|
y = y_;
|
||||||
num_cut_points = compute_max_num_cut_points();
|
num_cut_points = compute_max_num_cut_points();
|
||||||
@@ -55,13 +52,12 @@ namespace mdlp {
|
|||||||
computeCutPoints(0, X.size(), 1);
|
computeCutPoints(0, X.size(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
|
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) {
|
||||||
{
|
|
||||||
size_t n;
|
size_t n;
|
||||||
size_t m;
|
size_t m;
|
||||||
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
|
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
|
||||||
size_t idxNext = cut + 1 < end ? cut + 1 : cut;
|
size_t idxNext = cut + 1 < end ? cut + 1 : cut;
|
||||||
bool backWall; // true if duplicates reach begining of the interval
|
bool backWall; // true if duplicates reach beginning of the interval
|
||||||
precision_t previous;
|
precision_t previous;
|
||||||
precision_t actual;
|
precision_t actual;
|
||||||
precision_t next;
|
precision_t next;
|
||||||
@@ -85,11 +81,10 @@ namespace mdlp {
|
|||||||
// Decide which values to use
|
// Decide which values to use
|
||||||
cut = cut + (backWall ? m + 1 : -n);
|
cut = cut + (backWall ? m + 1 : -n);
|
||||||
actual = X[indices[cut]];
|
actual = X[indices[cut]];
|
||||||
return { (actual + previous) / 2, cut };
|
return {(actual + previous) / 2, cut};
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
|
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) {
|
||||||
{
|
|
||||||
size_t cut;
|
size_t cut;
|
||||||
pair<precision_t, size_t> result;
|
pair<precision_t, size_t> result;
|
||||||
if (cutPoints.size() == num_cut_points)
|
if (cutPoints.size() == num_cut_points)
|
||||||
@@ -110,8 +105,7 @@ namespace mdlp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
size_t CPPFImdlp::getCandidate(size_t start, size_t end) {
|
||||||
{
|
|
||||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||||
E(A, TA; S) is minimal amongst all the candidate cut points. */
|
E(A, TA; S) is minimal amongst all the candidate cut points. */
|
||||||
size_t candidate = numeric_limits<size_t>::max();
|
size_t candidate = numeric_limits<size_t>::max();
|
||||||
@@ -144,8 +138,7 @@ namespace mdlp {
|
|||||||
return candidate;
|
return candidate;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) {
|
||||||
{
|
|
||||||
int k;
|
int k;
|
||||||
int k1;
|
int k1;
|
||||||
int k2;
|
int k2;
|
||||||
@@ -163,14 +156,13 @@ namespace mdlp {
|
|||||||
ent2 = metrics.entropy(cut, end);
|
ent2 = metrics.entropy(cut, end);
|
||||||
ig = metrics.informationGain(start, cut, end);
|
ig = metrics.informationGain(start, cut, end);
|
||||||
delta = static_cast<float>(log2(pow(3, precision_t(k)) - 2) -
|
delta = static_cast<float>(log2(pow(3, precision_t(k)) - 2) -
|
||||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||||
return ig > term;
|
return ig > term;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
indices_t CPPFImdlp::sortIndices(samples_t &X_, labels_t &y_) {
|
||||||
{
|
|
||||||
indices_t idx(X_.size());
|
indices_t idx(X_.size());
|
||||||
iota(idx.begin(), idx.end(), 0);
|
iota(idx.begin(), idx.end(), 0);
|
||||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
||||||
@@ -178,18 +170,16 @@ namespace mdlp {
|
|||||||
return y_[i1] < y_[i2];
|
return y_[i1] < y_[i2];
|
||||||
else
|
else
|
||||||
return X_[i1] < X_[i2];
|
return X_[i1] < X_[i2];
|
||||||
});
|
});
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
cutPoints_t CPPFImdlp::getCutPoints()
|
cutPoints_t CPPFImdlp::getCutPoints() {
|
||||||
{
|
|
||||||
sort(cutPoints.begin(), cutPoints.end());
|
sort(cutPoints.begin(), cutPoints.end());
|
||||||
return cutPoints;
|
return cutPoints;
|
||||||
}
|
}
|
||||||
|
|
||||||
int CPPFImdlp::get_depth() const
|
int CPPFImdlp::get_depth() const {
|
||||||
{
|
|
||||||
return depth;
|
return depth;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,4 +1,3 @@
|
|||||||
# GoogleTest requires at least C++14
|
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user