Discretizer (#8)

* Add better check in testKBins.py

* Add Discretizer base class for Both discretizers

* Refactor order of constructors init
This commit is contained in:
Ricardo Montañana Gómez
2024-06-05 17:53:08 +02:00
committed by GitHub
parent f258fc220f
commit 638bb2a59e
12 changed files with 294 additions and 154 deletions

View File

@@ -6,16 +6,14 @@
namespace mdlp {
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) :
Discretizer(),
min_length(min_length_),
max_depth(max_depth_),
proposed_cuts(proposed)
{
}
CPPFImdlp::CPPFImdlp() = default;
CPPFImdlp::~CPPFImdlp() = default;
size_t CPPFImdlp::compute_max_num_cut_points() const
{
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
@@ -208,14 +206,5 @@ namespace mdlp {
}
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
}
labels_t& CPPFImdlp::transform(const samples_t& data)
{
discretizedData.clear();
discretizedData.reserve(data.size());
for (const precision_t& item : data) {
auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item);
discretizedData.push_back(upper - cutPoints.begin());
}
return discretizedData;
}
}