mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-21 02:15:57 +00:00
Compare commits
2 Commits
b3fc598c29
...
7b0673fd4b
Author | SHA1 | Date | |
---|---|---|---|
7b0673fd4b
|
|||
a1346e1943
|
@@ -58,7 +58,7 @@ namespace mdlp {
|
|||||||
results.reserve(percentiles.size());
|
results.reserve(percentiles.size());
|
||||||
for (auto percentile : percentiles) {
|
for (auto percentile : percentiles) {
|
||||||
const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
|
const size_t i = static_cast<size_t>(std::floor(static_cast<double>(data.size() - 1) * percentile / 100.));
|
||||||
const auto indexLower = clip(i, 0, data.size() - 1);
|
const auto indexLower = clip(i, 0, data.size() - 2);
|
||||||
const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
|
const double percentI = static_cast<double>(indexLower) / static_cast<double>(data.size() - 1);
|
||||||
const double fraction =
|
const double fraction =
|
||||||
(percentile / 100.0 - percentI) /
|
(percentile / 100.0 - percentI) /
|
||||||
|
@@ -18,7 +18,7 @@ namespace mdlp {
|
|||||||
void fit_t(torch::Tensor& X_, torch::Tensor& y_);
|
void fit_t(torch::Tensor& X_, torch::Tensor& y_);
|
||||||
torch::Tensor transform_t(torch::Tensor& X_);
|
torch::Tensor transform_t(torch::Tensor& X_);
|
||||||
torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_);
|
torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_);
|
||||||
static inline std::string version() { return "1.2.1"; };
|
static inline std::string version() { return "1.2.2"; };
|
||||||
protected:
|
protected:
|
||||||
labels_t discretizedData = labels_t();
|
labels_t discretizedData = labels_t();
|
||||||
cutPoints_t cutPoints;
|
cutPoints_t cutPoints;
|
||||||
|
12
README.md
12
README.md
@@ -14,9 +14,17 @@ The implementation tries to mitigate the problem of different label values with
|
|||||||
Other features:
|
Other features:
|
||||||
|
|
||||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||||
- Intervals have to have more than two examples to be evaluated.
|
- Intervals have to have more than two examples to be evaluated (mdlp).
|
||||||
|
|
||||||
The algorithm returns the cut points for the variable.
|
- The algorithm returns the cut points for the variable.
|
||||||
|
|
||||||
|
- The transform method uses the cut points returning its index in the following way:
|
||||||
|
|
||||||
|
cut[i - 1] <= x < cut[i]
|
||||||
|
|
||||||
|
using the [std::upper_bound](https://en.cppreference.com/w/cpp/algorithm/upper_bound) method
|
||||||
|
|
||||||
|
- K-Bins discretization is also implemented, and "quantile" and "uniform" strategies are available.
|
||||||
|
|
||||||
## Sample
|
## Sample
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user