Refactor github build action

This commit is contained in:
2023-04-01 17:53:00 +02:00
parent 42e83b3d26
commit da41a9317d
2 changed files with 16 additions and 27 deletions

View File

@@ -7,18 +7,16 @@
namespace mdlp { namespace mdlp {
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed): min_length(min_length_), CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
max_depth(max_depth_), max_depth(max_depth_),
proposed_cuts(proposed) proposed_cuts(proposed) {
{
} }
CPPFImdlp::CPPFImdlp() = default; CPPFImdlp::CPPFImdlp() = default;
CPPFImdlp::~CPPFImdlp() = default; CPPFImdlp::~CPPFImdlp() = default;
size_t CPPFImdlp::compute_max_num_cut_points() const size_t CPPFImdlp::compute_max_num_cut_points() const {
{
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples // Set the actual maximum number of cut points as a number or as a percentage of the number of samples
if (proposed_cuts == 0) { if (proposed_cuts == 0) {
return numeric_limits<size_t>::max(); return numeric_limits<size_t>::max();
@@ -31,8 +29,7 @@ namespace mdlp {
return static_cast<size_t>(proposed_cuts); return static_cast<size_t>(proposed_cuts);
} }
void CPPFImdlp::fit(samples_t& X_, labels_t& y_) void CPPFImdlp::fit(samples_t &X_, labels_t &y_) {
{
X = X_; X = X_;
y = y_; y = y_;
num_cut_points = compute_max_num_cut_points(); num_cut_points = compute_max_num_cut_points();
@@ -55,13 +52,12 @@ namespace mdlp {
computeCutPoints(0, X.size(), 1); computeCutPoints(0, X.size(), 1);
} }
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) {
{
size_t n; size_t n;
size_t m; size_t m;
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut; size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
size_t idxNext = cut + 1 < end ? cut + 1 : cut; size_t idxNext = cut + 1 < end ? cut + 1 : cut;
bool backWall; // true if duplicates reach begining of the interval bool backWall; // true if duplicates reach beginning of the interval
precision_t previous; precision_t previous;
precision_t actual; precision_t actual;
precision_t next; precision_t next;
@@ -85,11 +81,10 @@ namespace mdlp {
// Decide which values to use // Decide which values to use
cut = cut + (backWall ? m + 1 : -n); cut = cut + (backWall ? m + 1 : -n);
actual = X[indices[cut]]; actual = X[indices[cut]];
return { (actual + previous) / 2, cut }; return {(actual + previous) / 2, cut};
} }
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) {
{
size_t cut; size_t cut;
pair<precision_t, size_t> result; pair<precision_t, size_t> result;
if (cutPoints.size() == num_cut_points) if (cutPoints.size() == num_cut_points)
@@ -110,8 +105,7 @@ namespace mdlp {
} }
} }
size_t CPPFImdlp::getCandidate(size_t start, size_t end) size_t CPPFImdlp::getCandidate(size_t start, size_t end) {
{
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
E(A, TA; S) is minimal amongst all the candidate cut points. */ E(A, TA; S) is minimal amongst all the candidate cut points. */
size_t candidate = numeric_limits<size_t>::max(); size_t candidate = numeric_limits<size_t>::max();
@@ -144,8 +138,7 @@ namespace mdlp {
return candidate; return candidate;
} }
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) {
{
int k; int k;
int k1; int k1;
int k2; int k2;
@@ -163,14 +156,13 @@ namespace mdlp {
ent2 = metrics.entropy(cut, end); ent2 = metrics.entropy(cut, end);
ig = metrics.informationGain(start, cut, end); ig = metrics.informationGain(start, cut, end);
delta = static_cast<float>(log2(pow(3, precision_t(k)) - 2) - delta = static_cast<float>(log2(pow(3, precision_t(k)) - 2) -
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2)); (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
precision_t term = 1 / N * (log2(N - 1) + delta); precision_t term = 1 / N * (log2(N - 1) + delta);
return ig > term; return ig > term;
} }
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_) indices_t CPPFImdlp::sortIndices(samples_t &X_, labels_t &y_) {
{
indices_t idx(X_.size()); indices_t idx(X_.size());
iota(idx.begin(), idx.end(), 0); iota(idx.begin(), idx.end(), 0);
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) { stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
@@ -178,18 +170,16 @@ namespace mdlp {
return y_[i1] < y_[i2]; return y_[i1] < y_[i2];
else else
return X_[i1] < X_[i2]; return X_[i1] < X_[i2];
}); });
return idx; return idx;
} }
cutPoints_t CPPFImdlp::getCutPoints() cutPoints_t CPPFImdlp::getCutPoints() {
{
sort(cutPoints.begin(), cutPoints.end()); sort(cutPoints.begin(), cutPoints.end());
return cutPoints; return cutPoints;
} }
int CPPFImdlp::get_depth() const int CPPFImdlp::get_depth() const {
{
return depth; return depth;
} }
} }

View File

@@ -1,4 +1,3 @@
# GoogleTest requires at least C++14
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
include(FetchContent) include(FetchContent)