mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-16 07:55:58 +00:00
Add transform method to discretize values using CutPoints
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed): min_length(min_length_),
|
||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
|
||||
max_depth(max_depth_),
|
||||
proposed_cuts(proposed)
|
||||
{
|
||||
@@ -37,6 +37,7 @@ namespace mdlp {
|
||||
y = y_;
|
||||
num_cut_points = compute_max_num_cut_points();
|
||||
depth = 0;
|
||||
discretizedData.clear();
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
@@ -208,4 +209,13 @@ namespace mdlp {
|
||||
}
|
||||
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
||||
}
|
||||
labels_t& CPPFImdlp::transform(const samples_t& data)
|
||||
{
|
||||
discretizedData.reserve(data.size());
|
||||
for (const precision_t& item : data) {
|
||||
auto upper = upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
||||
discretizedData.push_back(upper - cutPoints.begin());
|
||||
}
|
||||
return discretizedData;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user