Remove unneeded loop in sortIndices

Add some static casts
2025-08-16 07:55:58 +00:00 · 2023-03-19 19:13:37 +01:00
parent f0845c5bd1
commit cfade7a556
9 changed files with 210 additions and 146 deletions
--- a/CPPFImdlp.cpp
+++ b/CPPFImdlp.cpp
@@ -5,20 +5,22 @@
 #include <limits>
 #include "CPPFImdlp.h"
 #include "Metrics.h"
+
 namespace mdlp {

-    CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed): min_length(min_length_),
-        max_depth(max_depth_), proposed_cuts(proposed)
-    {
+    CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
+                                                                               max_depth(max_depth_),
+                                                                               proposed_cuts(proposed) {
    }
+
    CPPFImdlp::CPPFImdlp() = default;
+
    CPPFImdlp::~CPPFImdlp() = default;

-    size_t CPPFImdlp::compute_max_num_cut_points() const
-    {
+    size_t CPPFImdlp::compute_max_num_cut_points() const {
        // Set the actual maximum number of cut points as a number or as a percentage of the number of samples
        if (proposed_cuts == 0) {
-            return  numeric_limits<size_t>::max();
+            return numeric_limits<size_t>::max();
        }
        if (proposed_cuts < 0 || proposed_cuts > static_cast<float>(X.size())) {
            throw invalid_argument("wrong proposed num_cuts value");
@@ -28,8 +30,7 @@ namespace mdlp {
        return static_cast<size_t>(proposed_cuts);
    }

-    void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
-    {
+    void CPPFImdlp::fit(samples_t &X_, labels_t &y_) {
        X = X_;
        y = y_;
        num_cut_points = compute_max_num_cut_points();
@@ -52,12 +53,15 @@ namespace mdlp {
        computeCutPoints(0, X.size(), 1);
    }

-    pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
-    {
-        size_t n, m, idxPrev = cut - 1 >= start ? cut - 1 : cut;
+    pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) {
+        size_t n;
+        size_t m;
+        size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
        size_t idxNext = cut + 1 < end ? cut + 1 : cut;
        bool backWall; // true if duplicates reach begining of the interval
-        precision_t previous, actual, next;
+        precision_t previous;
+        precision_t actual;
+        precision_t next;
        previous = X[indices[idxPrev]];
        actual = X[indices[cut]];
        next = X[indices[idxNext]];
@@ -78,11 +82,10 @@ namespace mdlp {
        // Decide which values to use
        cut = cut + (backWall ? m + 1 : -n);
        actual = X[indices[cut]];
-        return { (actual + previous) / 2, cut };
+        return {(actual + previous) / 2, cut};
    }

-    void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
-    {
+    void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_) {
        size_t cut;
        pair<precision_t, size_t> result;
        if (cutPoints.size() == num_cut_points)
@@ -103,13 +106,15 @@ namespace mdlp {
        }
    }

-    size_t CPPFImdlp::getCandidate(size_t start, size_t end)
-    {
+    size_t CPPFImdlp::getCandidate(size_t start, size_t end) {
        /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
        E(A, TA; S) is minimal amongst all the candidate cut points. */
-        size_t candidate = numeric_limits<size_t>::max(), elements = end - start;
+        size_t candidate = numeric_limits<size_t>::max();
+        size_t elements = end - start;
        bool sameValues = true;
-        precision_t entropy_left, entropy_right, minEntropy;
+        precision_t entropy_left;
+        precision_t entropy_right;
+        precision_t minEntropy;
        // Check if all the values of the variable in the interval are the same
        for (size_t idx = start + 1; idx < end; idx++) {
            if (X[indices[idx]] != X[indices[start]]) {
@@ -134,11 +139,15 @@ namespace mdlp {
        return candidate;
    }

-    bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
-    {
-        int k, k1, k2;
-        precision_t ig, delta;
-        precision_t ent, ent1, ent2;
+    bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end) {
+        int k;
+        int k1;
+        int k2;
+        precision_t ig;
+        precision_t delta;
+        precision_t ent;
+        precision_t ent1;
+        precision_t ent2;
        auto N = precision_t(end - start);
        k = metrics.computeNumClasses(start, end);
        k1 = metrics.computeNumClasses(start, cut);
@@ -148,33 +157,30 @@ namespace mdlp {
        ent2 = metrics.entropy(cut, end);
        ig = metrics.informationGain(start, cut, end);
        delta = static_cast<float>(log2(pow(3, precision_t(k)) - 2) -
-            (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
+                                   (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
        precision_t term = 1 / N * (log2(N - 1) + delta);
        return ig > term;
    }

    // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
-    indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
-    {
+    indices_t CPPFImdlp::sortIndices(samples_t &X_, labels_t &y_) {
        indices_t idx(X_.size());
        iota(idx.begin(), idx.end(), 0);
-        for (size_t i = 0; i < X_.size(); i++)
-            stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
+        stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
            if (X_[i1] == X_[i2])
                return y_[i1] < y_[i2];
            else
                return X_[i1] < X_[i2];
-                });
+        });
        return idx;
    }

-    cutPoints_t CPPFImdlp::getCutPoints()
-    {
+    cutPoints_t CPPFImdlp::getCutPoints() {
        sort(cutPoints.begin(), cutPoints.end());
        return cutPoints;
    }
-    int CPPFImdlp::get_depth() const
-    {
+
+    int CPPFImdlp::get_depth() const {
        return depth;
    }
 }