From 5679d607e5a4ab121e718c5e9921594baa44b0e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Thu, 6 Jul 2023 16:06:52 +0200 Subject: [PATCH] Add transform method to discretize values using CutPoints --- CPPFImdlp.cpp | 12 +++++++++++- CPPFImdlp.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CPPFImdlp.cpp b/CPPFImdlp.cpp index 92f3353..8e5f27b 100644 --- a/CPPFImdlp.cpp +++ b/CPPFImdlp.cpp @@ -7,7 +7,7 @@ namespace mdlp { - CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed): min_length(min_length_), + CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_), max_depth(max_depth_), proposed_cuts(proposed) { @@ -37,6 +37,7 @@ namespace mdlp { y = y_; num_cut_points = compute_max_num_cut_points(); depth = 0; + discretizedData.clear(); cutPoints.clear(); if (X.size() != y.size()) { throw invalid_argument("X and y must have the same size"); @@ -208,4 +209,13 @@ namespace mdlp { } cutPoints.erase(cutPoints.begin() + static_cast(maxEntropyIdx)); } + labels_t& CPPFImdlp::transform(const samples_t& data) + { + discretizedData.reserve(data.size()); + for (const precision_t& item : data) { + auto upper = upper_bound(cutPoints.begin(), cutPoints.end(), item); + discretizedData.push_back(upper - cutPoints.begin()); + } + return discretizedData; + } } diff --git a/CPPFImdlp.h b/CPPFImdlp.h index b6066c4..1fb0cab 100644 --- a/CPPFImdlp.h +++ b/CPPFImdlp.h @@ -20,6 +20,7 @@ namespace mdlp { Metrics metrics = Metrics(y, indices); cutPoints_t cutPoints; size_t num_cut_points = numeric_limits::max(); + labels_t discretizedData = labels_t(); static indices_t sortIndices(samples_t&, labels_t&); @@ -36,6 +37,7 @@ namespace mdlp { ~CPPFImdlp(); void fit(samples_t&, labels_t&); inline cutPoints_t getCutPoints() const { return cutPoints; }; + labels_t& transform(const samples_t&); inline int get_depth() const { return depth; }; static inline string version() { return "1.1.2"; }; };