2 Commits

Author SHA1 Message Date
7b0673fd4b Update README 2024-06-24 11:47:03 +02:00
a1346e1943 Fix Error in percentile method 2024-06-24 10:55:26 +02:00
3 changed files with 12 additions and 4 deletions

View File

@@ -58,7 +58,7 @@ namespace mdlp {
results.reserve(percentiles.size()); results.reserve(percentiles.size());
for (auto percentile : percentiles) { for (auto percentile : percentiles) {
const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.)); const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
const auto indexLower = clip(i, 0, data.size() - 1); const auto indexLower = clip(i, 0, data.size() - 2);
const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1); const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
const double fraction = const double fraction =
(percentile / 100.0 - percentI) / (percentile / 100.0 - percentI) /

View File

@@ -18,7 +18,7 @@ namespace mdlp {
void fit_t(torch::Tensor& X_, torch::Tensor& y_); void fit_t(torch::Tensor& X_, torch::Tensor& y_);
torch::Tensor transform_t(torch::Tensor& X_); torch::Tensor transform_t(torch::Tensor& X_);
torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_); torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_);
static inline std::string version() { return "1.2.1"; }; static inline std::string version() { return "1.2.2"; };
protected: protected:
labels_t discretizedData = labels_t(); labels_t discretizedData = labels_t();
cutPoints_t cutPoints; cutPoints_t cutPoints;

View File

@@ -14,9 +14,17 @@ The implementation tries to mitigate the problem of different label values with
Other features: Other features:
- Intervals with the same value of the variable are not taken into account for cutpoints. - Intervals with the same value of the variable are not taken into account for cutpoints.
- Intervals have to have more than two examples to be evaluated. - Intervals have to have more than two examples to be evaluated (mdlp).
The algorithm returns the cut points for the variable. - The algorithm returns the cut points for the variable.
- The transform method uses the cut points returning its index in the following way:
cut[i - 1] <= x < cut[i]
using the [std::upper_bound](https://en.cppreference.com/w/cpp/algorithm/upper_bound) method
- K-Bins discretization is also implemented, and "quantile" and "uniform" strategies are available.
## Sample ## Sample