From 8f6e16f04f011ad659f25bf424d15dd9ea8195e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 2 Jul 2024 09:40:06 +0200 Subject: [PATCH 1/2] Fix BinDisc quantile mistakes --- .vscode/launch.json | 2 +- BinDisc.cpp | 28 ++- BinDisc.h | 1 - CPPFImdlp.cpp | 6 +- Discretizer.cpp | 14 +- Discretizer.h | 4 +- tests/BinDisc_unittest.cpp | 316 +++++++++++++++++++-------------- tests/Discretizer_unittest.cpp | 9 + tests/Experiments.hpp | 102 +++++++++++ tests/datasets/tests.txt | 35 ++++ tests/k | Bin 0 -> 34000 bytes tests/k.cpp | 32 ++++ tests/t | Bin 0 -> 74504 bytes tests/t.cpp | 102 +++++++++++ tests/tests_do.py | 39 ++++ tests/tests_generate.ipynb | 85 +++++++++ 16 files changed, 618 insertions(+), 157 deletions(-) create mode 100644 tests/Experiments.hpp create mode 100644 tests/datasets/tests.txt create mode 100755 tests/k create mode 100644 tests/k.cpp create mode 100755 tests/t create mode 100644 tests/t.cpp create mode 100644 tests/tests_do.py create mode 100644 tests/tests_generate.ipynb diff --git a/.vscode/launch.json b/.vscode/launch.json index fa381ef..1342f2d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,7 +8,7 @@ "name": "C++ Launch config", "type": "cppdbg", "request": "launch", - "program": "${workspaceFolder}/tests/build/Metrics_unittest", + "program": "${workspaceFolder}/tests/build/BinDisc_unittest", "cwd": "${workspaceFolder}/tests/build", "args": [], "launchCompleteCommand": "exec-run", diff --git a/BinDisc.cpp b/BinDisc.cpp index 551192c..afc2e8d 100644 --- a/BinDisc.cpp +++ b/BinDisc.cpp @@ -1,5 +1,4 @@ #include -#include #include #include "BinDisc.h" #include @@ -20,7 +19,8 @@ namespace mdlp { // y is included for compatibility with the Discretizer interface cutPoints.clear(); if (X.empty()) { - cutPoints.push_back(std::numeric_limits::max()); + cutPoints.push_back(0.0); + cutPoints.push_back(0.0); return; } if (strategy == strategy_t::QUANTILE) { @@ -35,13 +35,12 @@ namespace mdlp { } std::vector linspace(precision_t start, precision_t end, int num) { - // Doesn't include end point as it is not needed if (start == end) { - return { 0 }; + return { start, end }; } precision_t delta = (end - start) / static_cast(num - 1); std::vector linspc; - for (size_t i = 0; i < num - 1; ++i) { + for (size_t i = 0; i < num; ++i) { precision_t val = start + delta * static_cast(i); linspc.push_back(val); } @@ -55,6 +54,7 @@ namespace mdlp { { // Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html std::vector results; + bool first = true; results.reserve(percentiles.size()); for (auto percentile : percentiles) { const size_t i = static_cast(std::floor(static_cast(data.size() - 1) * percentile / 100.)); @@ -64,8 +64,9 @@ namespace mdlp { (percentile / 100.0 - percentI) / (static_cast(indexLower + 1) / static_cast(data.size() - 1) - percentI); const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; - if (value != results.back()) + if (value != results.back() || first) // first needed as results.back() return is undefined for empty vectors results.push_back(value); + first = false; } return results; } @@ -75,25 +76,16 @@ namespace mdlp { auto data = X; std::sort(data.begin(), data.end()); if (data.front() == data.back() || data.size() == 1) { - // if X is constant - cutPoints.push_back(std::numeric_limits::max()); + // if X is constant, pass any two given points that shall be ignored in transform + cutPoints.push_back(data.front()); + cutPoints.push_back(data.front()); return; } cutPoints = percentile(data, quantiles); - normalizeCutPoints(); } void BinDisc::fit_uniform(samples_t& X) { - auto minmax = std::minmax_element(X.begin(), X.end()); cutPoints = linspace(*minmax.first, *minmax.second, n_bins + 1); - normalizeCutPoints(); - } - void BinDisc::normalizeCutPoints() - { - // Add max value to the end - cutPoints.push_back(std::numeric_limits::max()); - // Remove first as it is not needed - cutPoints.erase(cutPoints.begin()); } } \ No newline at end of file diff --git a/BinDisc.h b/BinDisc.h index d1bb94b..eaa7ddf 100644 --- a/BinDisc.h +++ b/BinDisc.h @@ -20,7 +20,6 @@ namespace mdlp { private: void fit_uniform(samples_t&); void fit_quantile(samples_t&); - void normalizeCutPoints(); int n_bins; strategy_t strategy; }; diff --git a/CPPFImdlp.cpp b/CPPFImdlp.cpp index c2d4733..f9fc660 100644 --- a/CPPFImdlp.cpp +++ b/CPPFImdlp.cpp @@ -25,7 +25,7 @@ namespace mdlp { } if (proposed_cuts < 1) return static_cast(round(static_cast(X.size()) * proposed_cuts)); - return static_cast(proposed_cuts); + return static_cast(proposed_cuts); // As the first and last cutpoints shall be ignored in transform } void CPPFImdlp::fit(samples_t& X_, labels_t& y_) @@ -58,6 +58,10 @@ namespace mdlp { resizeCutPoints(); } } + // Insert first & last X value to the cutpoints as them shall be ignored in transform + auto minmax = std::minmax_element(X.begin(), X.end()); + cutPoints.push_back(*minmax.second); + cutPoints.insert(cutPoints.begin(), *minmax.first); } pair CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end) diff --git a/Discretizer.cpp b/Discretizer.cpp index 9d637ca..1a30d38 100644 --- a/Discretizer.cpp +++ b/Discretizer.cpp @@ -5,9 +5,19 @@ namespace mdlp { { discretizedData.clear(); discretizedData.reserve(data.size()); + // CutPoints always have more than two items + // Have to ignore first and last cut points provided + auto first = cutPoints.begin() + 1; + auto last = cutPoints.end() - 1; for (const precision_t& item : data) { - auto upper = std::upper_bound(cutPoints.begin(), cutPoints.end(), item); - discretizedData.push_back(upper - cutPoints.begin()); + auto upper = std::lower_bound(first, last, item); + int number = upper - first; + /* + OJO + */ + if (number < 0) + throw std::runtime_error("number is less than 0 in discretizer::transform"); + discretizedData.push_back(number); } return discretizedData; } diff --git a/Discretizer.h b/Discretizer.h index 9749af8..0c7fafe 100644 --- a/Discretizer.h +++ b/Discretizer.h @@ -18,10 +18,10 @@ namespace mdlp { void fit_t(torch::Tensor& X_, torch::Tensor& y_); torch::Tensor transform_t(torch::Tensor& X_); torch::Tensor fit_transform_t(torch::Tensor& X_, torch::Tensor& y_); - static inline std::string version() { return "1.2.2"; }; + static inline std::string version() { return "1.2.3"; }; protected: labels_t discretizedData = labels_t(); - cutPoints_t cutPoints; + cutPoints_t cutPoints; // At least two cutpoints must be provided, the first and the last will be ignored in transform }; } #endif diff --git a/tests/BinDisc_unittest.cpp b/tests/BinDisc_unittest.cpp index 2d4437c..cdcc895 100644 --- a/tests/BinDisc_unittest.cpp +++ b/tests/BinDisc_unittest.cpp @@ -4,6 +4,7 @@ #include "gtest/gtest.h" #include "ArffFiles.h" #include "../BinDisc.h" +#include "Experiments.hpp" namespace mdlp { const float margin = 1e-4; @@ -40,10 +41,11 @@ namespace mdlp { auto y = labels_t(); fit(X, y); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(3.66667, cuts.at(0), margin); - EXPECT_NEAR(6.33333, cuts.at(1), margin); - EXPECT_EQ(numeric_limits::max(), cuts.at(2)); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(3.66667, cuts.at(1), margin); + EXPECT_NEAR(6.33333, cuts.at(2), margin); + EXPECT_NEAR(9.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -53,10 +55,11 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(3.666667, cuts[0], margin); - EXPECT_NEAR(6.333333, cuts[1], margin); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts[0], margin); + EXPECT_NEAR(3.666667, cuts[1], margin); + EXPECT_NEAR(6.333333, cuts[2], margin); + EXPECT_NEAR(9, cuts[3], margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -66,12 +69,13 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_EQ(4.0, cuts[0]); - EXPECT_EQ(7.0, cuts[1]); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(4.0, cuts.at(1), margin); + EXPECT_NEAR(7.0, cuts.at(2), margin); + EXPECT_NEAR(10.0, cuts.at(3), margin); auto labels = transform(X); - labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; + labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 }; EXPECT_EQ(expected, labels); } TEST_F(TestBinDisc3Q, X10BinsQuantile) @@ -79,12 +83,13 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_EQ(4, cuts[0]); - EXPECT_EQ(7, cuts[1]); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(4.0, cuts.at(1), margin); + EXPECT_NEAR(7.0, cuts.at(2), margin); + EXPECT_NEAR(10.0, cuts.at(3), margin); auto labels = transform(X); - labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; + labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 }; EXPECT_EQ(expected, labels); } TEST_F(TestBinDisc3U, X11BinsUniform) @@ -92,10 +97,11 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(4.33333, cuts[0], margin); - EXPECT_NEAR(7.66667, cuts[1], margin); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(4.33333, cuts.at(1), margin); + EXPECT_NEAR(7.66667, cuts.at(2), margin); + EXPECT_NEAR(11.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -105,10 +111,11 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(4.33333, cuts[0], margin); - EXPECT_NEAR(7.66667, cuts[1], margin); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(4.33333, cuts.at(1), margin); + EXPECT_NEAR(7.66667, cuts.at(2), margin); + EXPECT_NEAR(11.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; EXPECT_EQ(expected, labels); @@ -118,8 +125,9 @@ namespace mdlp { samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(1, cuts.size()); - EXPECT_EQ(numeric_limits::max(), cuts[0]); + ASSERT_EQ(2, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(1, cuts.at(1), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 0, 0 }; EXPECT_EQ(expected, labels); @@ -129,8 +137,9 @@ namespace mdlp { samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(1, cuts.size()); - EXPECT_EQ(numeric_limits::max(), cuts[0]); + ASSERT_EQ(2, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(1, cuts.at(1), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 0, 0 }; EXPECT_EQ(expected, labels); @@ -140,16 +149,18 @@ namespace mdlp { samples_t X = {}; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(1, cuts.size()); - EXPECT_EQ(numeric_limits::max(), cuts[0]); + ASSERT_EQ(2, cuts.size()); + EXPECT_NEAR(0, cuts.at(0), margin); + EXPECT_NEAR(0, cuts.at(1), margin); } TEST_F(TestBinDisc3Q, EmptyQuantile) { samples_t X = {}; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(1, cuts.size()); - EXPECT_EQ(numeric_limits::max(), cuts[0]); + ASSERT_EQ(2, cuts.size()); + EXPECT_NEAR(0, cuts.at(0), margin); + EXPECT_NEAR(0, cuts.at(1), margin); } TEST(TestBinDisc3, ExceptionNumberBins) { @@ -160,10 +171,11 @@ namespace mdlp { samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 }; fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(1.66667, cuts[0], margin); - EXPECT_NEAR(2.33333, cuts[1], margin); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(4, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(1.66667, cuts.at(1), margin); + EXPECT_NEAR(2.33333, cuts.at(2), margin); + EXPECT_NEAR(3.0, cuts.at(3), margin); auto labels = transform(X); labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 }; EXPECT_EQ(expected, labels); @@ -174,9 +186,10 @@ namespace mdlp { samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(2, cuts.size()); - EXPECT_NEAR(1.66667, cuts[0], margin); - EXPECT_EQ(numeric_limits::max(), cuts[1]); + ASSERT_EQ(3, cuts.size()); + EXPECT_NEAR(1, cuts.at(0), margin); + EXPECT_NEAR(1.66667, cuts.at(1), margin); + EXPECT_NEAR(3.0, cuts.at(2), margin); auto labels = transform(X); labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 }; EXPECT_EQ(expected, labels); @@ -187,11 +200,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - ASSERT_EQ(3.75, cuts[0]); - EXPECT_EQ(6.5, cuts[1]); - EXPECT_EQ(9.25, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(3.75, cuts.at(1), margin); + EXPECT_NEAR(6.5, cuts.at(2), margin); + EXPECT_NEAR(9.25, cuts.at(3), margin); + EXPECT_NEAR(12.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -201,11 +215,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - ASSERT_EQ(3.75, cuts[0]); - EXPECT_EQ(6.5, cuts[1]); - EXPECT_EQ(9.25, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(3.75, cuts.at(1), margin); + EXPECT_NEAR(6.5, cuts.at(2), margin); + EXPECT_NEAR(9.25, cuts.at(3), margin); + EXPECT_NEAR(12.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -215,13 +230,14 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.0, cuts[0]); - EXPECT_EQ(7.0, cuts[1]); - EXPECT_EQ(10.0, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.0, cuts.at(1), margin); + EXPECT_NEAR(7.0, cuts.at(2), margin); + EXPECT_NEAR(10.0, cuts.at(3), margin); + EXPECT_NEAR(13.0, cuts.at(4), margin); auto labels = transform(X); - labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; + labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; EXPECT_EQ(expected, labels); } TEST_F(TestBinDisc4Q, X13BinsQuantile) @@ -229,13 +245,14 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.0, cuts[0]); - EXPECT_EQ(7.0, cuts[1]); - EXPECT_EQ(10.0, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.0, cuts.at(1), margin); + EXPECT_NEAR(7.0, cuts.at(2), margin); + EXPECT_NEAR(10.0, cuts.at(3), margin); + EXPECT_NEAR(13.0, cuts.at(4), margin); auto labels = transform(X); - labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; + labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; EXPECT_EQ(expected, labels); } TEST_F(TestBinDisc4U, X14BinsUniform) @@ -243,11 +260,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.25, cuts[0]); - EXPECT_EQ(7.5, cuts[1]); - EXPECT_EQ(10.75, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.25, cuts.at(1), margin); + EXPECT_NEAR(7.5, cuts.at(2), margin); + EXPECT_NEAR(10.75, cuts.at(3), margin); + EXPECT_NEAR(14.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -257,11 +275,12 @@ namespace mdlp { samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.25, cuts[0]); - EXPECT_EQ(7.5, cuts[1]); - EXPECT_EQ(10.75, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.25, cuts.at(1), margin); + EXPECT_NEAR(7.5, cuts.at(2), margin); + EXPECT_NEAR(10.75, cuts.at(3), margin); + EXPECT_NEAR(14.0, cuts.at(4), margin); auto labels = transform(X); labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; EXPECT_EQ(expected, labels); @@ -271,13 +290,14 @@ namespace mdlp { samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.5, cuts[0]); - EXPECT_EQ(8, cuts[1]); - EXPECT_EQ(11.5, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.5, cuts.at(1), margin); + EXPECT_NEAR(8, cuts.at(2), margin); + EXPECT_NEAR(11.5, cuts.at(3), margin); + EXPECT_NEAR(15.0, cuts.at(4), margin); auto labels = transform(X); - labels_t expected = { 3, 2, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 }; + labels_t expected = { 3, 1, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 }; EXPECT_EQ(expected, labels); } TEST_F(TestBinDisc4Q, X15BinsQuantile) @@ -285,13 +305,14 @@ namespace mdlp { samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(4.5, cuts[0]); - EXPECT_EQ(8, cuts[1]); - EXPECT_EQ(11.5, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(1.0, cuts.at(0), margin); + EXPECT_NEAR(4.5, cuts.at(1), margin); + EXPECT_NEAR(8, cuts.at(2), margin); + EXPECT_NEAR(11.5, cuts.at(3), margin); + EXPECT_NEAR(15.0, cuts.at(4), margin); auto labels = transform(X); - labels_t expected = { 3, 3, 3, 3, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 0 }; + labels_t expected = { 3, 3, 3, 3, 1, 0, 1, 2, 2, 2, 1, 0, 0, 1, 0 }; EXPECT_EQ(expected, labels); } TEST_F(TestBinDisc4U, RepeatedValuesUniform) @@ -300,13 +321,14 @@ namespace mdlp { // 0 1 2 3 4 5 6 7 8 9 fit(X); auto cuts = getCutPoints(); - EXPECT_EQ(4, cuts.size()); - EXPECT_EQ(1.0, cuts[0]); - EXPECT_EQ(2.0, cuts[1]); - ASSERT_EQ(3.0, cuts[2]); - EXPECT_EQ(numeric_limits::max(), cuts[3]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(0.0, cuts.at(0), margin); + EXPECT_NEAR(1.0, cuts.at(1), margin); + EXPECT_NEAR(2.0, cuts.at(2), margin); + EXPECT_NEAR(3.0, cuts.at(3), margin); + EXPECT_NEAR(4.0, cuts.at(4), margin); auto labels = transform(X); - labels_t expected = { 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 }; + labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 }; EXPECT_EQ(expected, labels); } TEST_F(TestBinDisc4Q, RepeatedValuesQuantile) @@ -315,50 +337,80 @@ namespace mdlp { // 0 1 2 3 4 5 6 7 8 9 fit(X); auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_EQ(2.0, cuts[0]); - ASSERT_EQ(3.0, cuts[1]); - EXPECT_EQ(numeric_limits::max(), cuts[2]); + ASSERT_EQ(5, cuts.size()); + EXPECT_NEAR(0.0, cuts.at(0), margin); + EXPECT_NEAR(1.0, cuts.at(1), margin); + EXPECT_NEAR(2.0, cuts.at(2), margin); + EXPECT_NEAR(3.0, cuts.at(3), margin); + EXPECT_NEAR(4.0, cuts.at(4), margin); auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 }; + labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 }; EXPECT_EQ(expected, labels); } - TEST_F(TestBinDisc4U, irisUniform) + // TEST_F(TestBinDisc4U, irisUniform) + // { + // ArffFiles file; + // file.load(data_path + "iris.arff", true); + // vector& X = file.getX(); + // fit(X[0]); + // auto Xt = transform(X[0]); + // labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; + // EXPECT_EQ(expected, Xt); + // auto Xtt = fit_transform(X[0], file.getY()); + // EXPECT_EQ(expected, Xtt); + // auto Xt_t = torch::tensor(X[0], torch::kFloat32); + // auto y_t = torch::tensor(file.getY(), torch::kInt32); + // auto Xtt_t = fit_transform_t(Xt_t, y_t); + // for (int i = 0; i < expected.size(); i++) + // EXPECT_EQ(expected[i], Xtt_t[i].item()); + // } + // TEST_F(TestBinDisc4Q, irisQuantile) + // { + // ArffFiles file; + // file.load(data_path + "iris.arff", true); + // vector& X = file.getX(); + // fit(X[0]); + // auto Xt = transform(X[0]); + // labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; + // EXPECT_EQ(expected, Xt); + // auto Xtt = fit_transform(X[0], file.getY()); + // EXPECT_EQ(expected, Xtt); + // auto Xt_t = torch::tensor(X[0], torch::kFloat32); + // auto y_t = torch::tensor(file.getY(), torch::kInt32); + // auto Xtt_t = fit_transform_t(Xt_t, y_t); + // for (int i = 0; i < expected.size(); i++) + // EXPECT_EQ(expected[i], Xtt_t[i].item()); + // fit_t(Xt_t, y_t); + // auto Xt_t2 = transform_t(Xt_t); + // for (int i = 0; i < expected.size(); i++) + // EXPECT_EQ(expected[i], Xt_t2[i].item()); + // } + TEST(TestBinDiscGeneric, Fileset) { - ArffFiles file; - file.load(data_path + "iris.arff", true); - vector& X = file.getX(); - fit(X[0]); - auto Xt = transform(X[0]); - labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - EXPECT_EQ(expected, Xt); - auto Xtt = fit_transform(X[0], file.getY()); - EXPECT_EQ(expected, Xtt); - auto Xt_t = torch::tensor(X[0], torch::kFloat32); - auto y_t = torch::tensor(file.getY(), torch::kInt32); - auto Xtt_t = fit_transform_t(Xt_t, y_t); - for (int i = 0; i < expected.size(); i++) - EXPECT_EQ(expected[i], Xtt_t[i].item()); - } - TEST_F(TestBinDisc4Q, irisQuantile) - { - ArffFiles file; - file.load(data_path + "iris.arff", true); - vector& X = file.getX(); - fit(X[0]); - auto Xt = transform(X[0]); - labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; - EXPECT_EQ(expected, Xt); - auto Xtt = fit_transform(X[0], file.getY()); - EXPECT_EQ(expected, Xtt); - auto Xt_t = torch::tensor(X[0], torch::kFloat32); - auto y_t = torch::tensor(file.getY(), torch::kInt32); - auto Xtt_t = fit_transform_t(Xt_t, y_t); - for (int i = 0; i < expected.size(); i++) - EXPECT_EQ(expected[i], Xtt_t[i].item()); - fit_t(Xt_t, y_t); - auto Xt_t2 = transform_t(Xt_t); - for (int i = 0; i < expected.size(); i++) - EXPECT_EQ(expected[i], Xt_t2[i].item()); + Experiments exps(data_path + "tests.txt"); + int num = 0; + while (exps.is_next()) { + Experiment exp = exps.next(); + std::cout << "Exp #: " << ++num << " From: " << exp.from_ << " To: " << exp.to_ << " Step: " << exp.step_ << " Bins: " << exp.n_bins_ << " Strategy: " << exp.strategy_ << std::endl; + BinDisc disc(exp.n_bins_, exp.strategy_ == "Q" ? strategy_t::QUANTILE : strategy_t::UNIFORM); + std::vector test; + for (float i = exp.from_; i < exp.to_; i += exp.step_) { + test.push_back(i); + } + // show_vector(test, "Test"); + auto empty = std::vector(); + auto Xt = disc.fit_transform(test, empty); + auto cuts = disc.getCutPoints(); + EXPECT_EQ(exp.discretized_data_.size(), Xt.size()); + for (int i = 0; i < exp.discretized_data_.size(); ++i) { + if (exp.discretized_data_.at(i) != Xt.at(i)) { + std::cout << "Error at " << i << " Expected: " << exp.discretized_data_.at(i) << " Got: " << Xt.at(i) << std::endl; + } + } + EXPECT_EQ(exp.cutpoints_.size(), cuts.size()); + for (int i = 0; i < exp.cutpoints_.size(); ++i) { + EXPECT_NEAR(exp.cutpoints_.at(i), cuts.at(i), margin); + } + } } } diff --git a/tests/Discretizer_unittest.cpp b/tests/Discretizer_unittest.cpp index 8c8f201..4fcd856 100644 --- a/tests/Discretizer_unittest.cpp +++ b/tests/Discretizer_unittest.cpp @@ -21,6 +21,15 @@ namespace mdlp { } const std::string data_path = set_data_path(); + TEST(Discretizer, Version) + { + Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM); + auto version = disc->version(); + delete disc; + std::cout << "Version computed: " << version; + EXPECT_EQ("1.2.3", version); + } + TEST(Discretizer, BinIrisUniform) { ArffFiles file; diff --git a/tests/Experiments.hpp b/tests/Experiments.hpp new file mode 100644 index 0000000..166c5fb --- /dev/null +++ b/tests/Experiments.hpp @@ -0,0 +1,102 @@ +#ifndef EXPERIMENTS_HPP +#define EXPERIMENTS_HPP +#include +#include +#include +#include +#include +#include +#include "../typesFImdlp.h" +class Experiment { +public: + Experiment(float from_, float to_, float step_, int n_bins, std::string strategy, std::vector data_discretized, std::vector cutpoints) : + from_{ from_ }, to_{ to_ }, step_{ step_ }, n_bins_{ n_bins }, strategy_{ strategy }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints } + { + if (strategy != "Q" && strategy != "U") { + throw std::invalid_argument("Invalid strategy " + strategy); + } + } + float from_; + float to_; + float step_; + int n_bins_; + std::string strategy_; + std::vector discretized_data_; + std::vector cutpoints_; +}; +class Experiments { +public: + Experiments(const std::string filename) : filename{ filename } + { + test_file.open(filename); + if (!test_file.is_open()) { + throw std::runtime_error("File " + filename + " not found"); + } + exp_end = false; + } + ~Experiments() + { + test_file.close(); + } + bool end() const + { + return exp_end; + } + bool is_next() + { + while (std::getline(test_file, line) && line[0] == '#'); + if (test_file.eof()) { + exp_end = true; + return false; + } + return true; + } + Experiment next() + { + return parse_experiment(line); + } +private: + std::tuple parse_header(const std::string& line) + { + std::istringstream iss(line); + std::string from_, to_, step_, n_bins, strategy; + iss >> from_ >> to_ >> step_ >> n_bins >> strategy; + return { std::stof(from_), std::stof(to_), std::stof(step_), std::stoi(n_bins), strategy }; + } + template + std::vector parse_vector(const std::string& line) + { + std::istringstream iss(line); + std::vector data; + std::string d; + while (iss >> d) { + data.push_back(std::is_same::value ? std::stof(d) : std::stoi(d)); + } + return data; + } + Experiment parse_experiment(std::string& line) + { + auto [from_, to_, step_, n_bins, strategy] = parse_header(line); + std::getline(test_file, line); + auto data_discretized = parse_vector(line); + std::getline(test_file, line); + auto cutpoints = parse_vector(line); + return Experiment{ from_, to_, step_, n_bins, strategy, data_discretized, cutpoints }; + } + std::ifstream test_file; + std::string filename; + std::string line; + bool exp_end; +}; +template +void show_vector(const std::vector& data, std::string title) +{ + std::cout << title << ": "; + std::string sep = ""; + for (const auto& d : data) { + std::cout << sep << d; + sep = ", "; + } + std::cout << std::endl; +} +#endif \ No newline at end of file diff --git a/tests/datasets/tests.txt b/tests/datasets/tests.txt new file mode 100644 index 0000000..6712244 --- /dev/null +++ b/tests/datasets/tests.txt @@ -0,0 +1,35 @@ +# +# from, to, step, #bins, Q/U +# discretized data +# cut points +# +0, 100, 1, 4, Q +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.0, 24.75, 49.5, 74.25, 99.0 +0, 50, 1, 4, Q +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.0, 12.25, 24.5, 36.75, 49.0 +0, 100, 1, 3, Q +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.0, 33.0, 66.0, 99.0 +0, 50, 1, 3, Q +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.0, 16.33333, 32.66667, 49.0 +0, 10, 1, 3, Q +0, 0, 0, 0, 1, 1, 1, 2, 2, 2 +0.0, 3.0, 6.0, 9.0 +0, 100, 1, 4, U +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.0, 24.75, 49.5, 74.25, 99.0 +0, 50, 1, 4, U +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.0, 12.25, 24.5, 36.75, 49.0 +0, 100, 1, 3, U +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.0, 33.0, 66.0, 99.0 +0, 50, 1, 3, U +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.0, 16.33333, 32.66667, 49.0 +0, 10, 1, 3, U +0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +0.0, 3.0, 6.0, 9.0 diff --git a/tests/k b/tests/k new file mode 100755 index 0000000000000000000000000000000000000000..331da27e02f91fa502d6a60c5328cfd8a31744b7 GIT binary patch literal 34000 zcmeHw4|r77weLy7pP*<4R2tFh6wyL~m>I?`}3UlBAc6utsLk}IYIhW-qAF4qi0L#-h}lCz6n z4i(%uMO8I4JgQD|6X{UuhKx_c&^d}vL(OOPSE}ensb3ukJS^>L=#(Sd9I8?5hptz0 z8r~;%8IbX7IYN`%pvVo1oQ8(TX{hZ`xa~dhH%6T zH#fI6Zcz>SVr>btA)JVsaNBq|zRira#*!eKn>stq_GmoO)*5b(CAXPdN|i)qMpN;4 zc$?W8)rzQAn-ih>q|ev5IUF~W@o+4e2sH)*b-*n|SFuL3Es=~z!!4o4b!MP`k*O4K z-AZcLC(9e#I+BLDzIAJ?HDcC96CEv4qb1tX(zYcEy`X2K7^5W|Yb8o!`!)mUL^9Gi zZyv3*3aYkPN1GZUK{X%Y8x?-l^5y2jl2T(;b!gdgv#g}7WRZ33E4L08mK)1AY^Yim zGJPcr6?Vf02$q*DEOSswmSoV%GOC!&eEcgwoSn!=lZ?qmA&ySL>dQCOO2@i4{O|=@ zw_bc|FWPph0gesgA9;{pBXmFL9W!GPEHpL>?!NzaKbG-Cqgm=talOFUYROMDzAQ9| z8!tU^BSO#=<95L<8-4&GVY0DP>f2s?0{u@i9@2HDPcZ&L>dCqHVk^iudQqn`X!;pa zERj&m^&toTG{&eu+W#pI>X^e6Sr?O0k)|14=)gl!t9TrEihH^gJMg@J3&HEatGK5K za~yc;TbEJ?UhjQGt90N`(aeB;2foOG4?6HLtW|0q_^B3l{9xPy;}#gVz_EtRL`e+AJ>0ObxI|UJjwNcq&jW!M|N}l@2O5(`;i{5KS*`j(vNg; z{a&imR(_))n2ZQ(~Ya{YFyQ}T7Bmg_fBowo8Le$;7vzIQ{Z7jFor-Ux1g^LXvL zfUnQ@%V2;1r_(Wc8u<4>1O#z^CdAMqdkHZj&?8aD@1-+UyP>ewAjt=|>w4&b;yy%xsD@h&>{ z!SKP&Oh#{PzcrSk=R zy#A1?D>B;J(N}cGb+FgbnZXok!sCAj&KOJ$l8&$cX=H@a-PaHQ@;S0v)%vP+^}*YY z?ZU|ZP~@4|`$dA&0=>bVHNC;qp`Vdw<9Kf{RdcAPM;#qRw>;(v@KYRv*q`2yw6CXX z&+bycQS}U^h9Eo(EB2Ap`e&Fp#Q+<6B%HTb*G z6T0{nkPa*vAXdAx`jGV-LZiL1l6Tbv=EM70THHw^%(HoFzL(PftOPUpY4M4 zruTn3GP2hTZKo-Qh@BpUZF*!r)=;-hVjJ8_q@x!w#%g*Fnm$^fxVos#^BwJ#yhe#ex{df;b-R4jfmQ9;_moc`O$%sN(d2VQ_%6KTVfWdU}+1fXBlv z2+fPeLDKRWw8Jo#gO;W3z;U>OEU{)N?%=jt>aO*$Cf!DGE|$#zqDQ`3vCt41SYO?B5U&B80$)Ze>yN`bF?>L7h9~$K&yKj0shVYLksXPx zyH+XT)X#0vR0l-`4ka_G_e%mpMfdjY=u4jR>?}B3y8n-uDq%Rs$xyW8Ao-P$W)msJ zGec?)NgVJgG-=IU`~wFOiZtMAC+z^y2#v2lT}|AKS`YMn&uz06V!rC_A~G+P8FSn^!acl&}?tg}2Bq@A@B zzzt=1aE`UYb8HAgEfH{azzplt={>Z@*jP1Weh}CdSD|~gU}zH>rQOU-?M~&Y5lsEs z;o+<#TY{PhLe*aM`XoB(6_*3_dxA^$Cp#6=UkFXW1goYn?C!vmfP;X8WP&S^0e}v3 ztRDdW5b(qYYtZg>@LA|!q+k6{6@I!whs+GfAW9bSDy8l)3v4FzQd@ZR478^dQZx7h z+UT*ZOL|xG!o#*t9DP4ye<7!$oei5+s%C)o>n5Gp3!dRXOXU9aVk9{#%yD%FSIMn0 zpkIrnLqO`yjmP=-)PM?rc^}i{(8i`cs+83ciwshG-6tCdo^-~6%P1sT5z&iQX=OOQ zImL|!9DNHHte)x3rp1}lo3CY-VvKx8hQ9 z4MIme$6n!AQ`>==!%S}f=75nbRda(@*r^Z~*pVpwP>jh~B6q^KtX)O)uojsIZBrw| zG~D0vjP|GR0u-BTYPP*W$6lMKkXt%YeBS=eAtN~tth6ksbJ&_jptKg(<<-5IR6jbA zQ`Zb9fVwI!z_EuQi4Tn%I2pw9Y?CY|~fi75N7n<*ZweJEmjyWAg zJ#eT82uuSnW-Kbo)z3!O_z~0x z(K12?S9p2Oq@e=`6>1&<{#&vPd$FltJG%(-B)1DBwA-J){}bNWhJ*9a{`PbpXv0Sf zrMF>t&9G7Qoo4K+K<(1&l*XQ6NSY3N-yxf-jfT*erCKr@d+7FKrR1iD~boZ1@pc9;s(k`uNYB26l5Y`Li&_x<79BAj43U!ytdjogLJYJ_Z`T@%zJrH=o))skbZRgVv^Vra!Mp*QK0d4H{;nD} z?JcyYT>l8TA0hHbd?JNCAhmusXuE(X9&$(2Wl#CmholuXcOF7^&!+>HEj@57<=i>ok=Dx0ktM%OsiD0mz`?9MdsuldPt*|%-_1qKFi545ogQ zeuKwCewEMsShD2OR@w+cEoM!%G?!bV?H`Q;+aa3ur{A=iSym|1ruQpw58JyJM-_^6 zCycjpmc#KGk%z-KYftwS*z@Oyp$-LmVOJ{WlX<{P))`hZ9AS^MQ7kTw#5lX>GkjQ( z+x;nz!L`3B&FOqxI~}FCDG2Js2^RK1qfqJq`U9yyoo6j*p5*u0+Wnvo1XKH}amgq3 zdip3Kw;%B0v{3JsuEaLDllB9w@DJ#Uk*5&yX+Wds;(bLA^kweCN=rw!CnNNmDo6x2;fo-Vfxebbt=qwsOVn0g2AXk?D9Q3Ud7pLEuGE&@wYsM zha6EQ!+}pe)A}+geiE4$?Q$9WcQur>QZtVoi!;{pbzss~ar-~{guORlcG%V0Ffxbp zBRuHL!;bb=bU`#*vOh5i7Cg>QhCt*z55e-3#mwOuUeT+<2;zvkK2Y>@A5s{cu_CjY zX;H5rYGPvge;gSZN#BA$=m1+F;y5N}FY0R=Q`zI8jbnKQ3 z(cW-yXStl*)A<_quXT(kNTdATpf+gFJ36%M*#)ri3Lk9FQ$J59V{M z-@Kyl^{Kd#{1WJnHCjUsgfX&%KK&MdCx8W?*Fl$YgvR4Pi^iWJ3NdSWI|3ag1jLB1 z2k07s`mP8{7TVvIp{emQZh>(Nj9cLU5ev|ri(?h9u_fA=Y>O{l8jmKTaooG9>t>CK zyFckBR5RS(9&L?qGmYWa*0!X#5u{|)i+eejE>!~F=5TydH117q4!3$+!kuO!c2jiD zTqDo%`!e=)=efSLFV^rZzOMJZelrqdar{EI0cRUPY?Qcd#HsIJZ zG%`{O=m%T@_z|EV=Z*8<7#X2E5vIO5GSUV7d4RhCF9PfXyb^E-unCY(gl`AD5AaFA zrvN_#90Xj4-Q`ojPQY_<&FY@FM@B9Md1l$8iiN|Yz-vIm+ za4+DwxMaE|4gCNoA@QU;A+7_w8}N3(#{eG%JPi0YAbqnt32FLNT)`^_oDaAXuo|!q zuo-YS;N5_a13m^g0C*Vi1HfZ|#YZ8J%iv+a`GEHURs;SRuofi+ z;N|Z^9?<{0krB@nWEp^^fbD>_fcFEo1HK7(AK*uTPXW$9J~Hw$)d7bAC;Wb7q!?FP zX8~3MmI7`7yb7=r@JYa3fO`RZ0cX7ryAd7mQ@{w|x%j?f{vTjBz*fMGfHwnn0qzCd z4LB8N_kDop0S*D)4mbf{yZ;N|Y`{+eR{+k!CAU+&2EZQx-UE05@I$~i0Si8Y zJZ^?<2AmCeFW?Hmy?_zGV}LsVSK_?;2Y}xIJOKD@z&8P(2AqJKX48PP0oUS$e+A&p zfDyocz`Fqd74Q+jEAX}HVZa)|V}M@+oLXcUcLB}^d=Ri2kiK4R23(Ev)Vl$xP*z4I z@1{BhGR|0XSZ? zW9${M&x9_&-_jwly`W%z9(ObcJk|K@`FHpd@qNd^SG}{~?g`r`7QCQ%Y0O>Vc^mO) z0gkQqHY=UIK+$arAAnv1IN=FLTlqm|BAz|?oQH8=MLa)o@_dzfsGl_YsRhpkIJWw^ zLG?rQ!hFOl(9Z&&MKAEMqC}qq`j!6+w{_vzqHnb2gP_-fKG#W)*mSz{>l)B6anfP8 zzXA)je;4RU(7jIj*KGMmgg(tlueRxjK#xMc%t>#t>1oing1*d2NB^B*@d8(fc>W;h z{B4%@9nkY1S8ee8qm2^m4c`I(grEBTmK})i4#h`p(N&lG!N>D)z`>Vq&jJWHx z*>e}@zXCm%J$pg_H_&t0a}f0RK_`7IJstKO7J4pw7EeHYhJ1-rK4#mq5_D=G;nHq@ zrA^-e`s<*tankcc%s}(q3HsFk7#TshvgH+-W5F)aKY{!#C;fW6{a(;N1^ogi-QmxJ zpc`mE7kwD?3806Z^7$(){fZIb7J#11pDIE3gO1z6Z2dRb`fULHa?o>)uM_n7pug*s z&re(Z?*jc3(3d*t9d`S@pr466gM7@IU-2(5P=laTo{|S&wCF*^sSoVfL%GEq7vX#YIWId&a#t^*aj$Pc>EMi4x?{BSqu zOF>@*UP}k8w;R!o7pOkaYe9eBMbGcEZS^L2J_1j!xtM?r=S|Ra&E;&+-xx!F1?U5y zU*yz5>_B6Tfc^^T*E;F>>sdjf?*QF{950)GUeJF4`e%@Dbjq)~yI}i-{Hv)EYU2=i zE=L>XPM$W~2J}sC-57loQoaWIA*VdWwY_#5^TG2bc$T?%D29>_)u2y8j+$*8L~jQD z1ISl7)!kYD4ZJ7UITpsxTuTmImM%wfJUZ99r+oE$ zp!=LUG~4o&$36*quDPcC_USR?DevtEozpM9X0Ei`?-BWV&ik& z$i@SpPeJ~lYYvWs{xRfZ8S-kQCV3C?W%HAEe$bx)J(v9kK>s_?bLn>+^nV0B*Zg`A4EKPZ%f6+c7vTITSH4yY zx(D>t&he8ER@q}nf@d*!vdxhfa`%CLA?ROswxQNL?{G9vz2LbqM;k;R1pS&Z=)<6| z0zKO}NWOR~;y38o)*8_(L8tS{T=E-0uLpexc&)gdf2-xEouK~|^z)tcYn6=%g`Mlv zrBCGDn{Pavk8K3ibE_sE%}2gG@y-I{;k=3Xy7NzY(~&0KR`6A(6Ayi+Y@XO=?4aAs z1wqY?mvIY>TVUJ*;}#gVz_$>nxDtmBakldQ=w?(LEwf$KObqlN9tPdpzz`H3K!i!?wsTC)K2y@9``nKUF(6bI=Mby=t}zk++XYY zajq^aWPVo)yh>n`z%2sr6nMYD9|?R~;6Z_}2&8WCY^J5&Ie2o}T(7UZ#8=|;mX(&3 zmo6$R^UkS@M!dmrQV0vm=jP(BC@USsT`||Fx@u{8yRU4KdG^_>UGLj`^&dHJt;hFms&`tBkhPukM95O#yEuMxghY@_jIg4g?k z#s_hbhv(Pzg#CwQM!zlDQ5++Cpe0ge9=4zlV081s7! zdQ1Y9`TW-~egc>W@48L!TA#NCf4d8RHW-M1y9-|pe71hqGyinsW_&&6F4_1SS%GPW zelDe!*~0D2Fx>Il__dcV>9-vxda?Do-_en#67WnTA{+?om=kL3LKOp$tFDkrlpS~BT_Aj_d z;fugaWijv`<80%)jef4{KbH&tucTky&eeiHf3D)!`uvUH|K>u4=j-xl>?Ofh&sO*< z;HW%|K;uQf8(jTPoS^jl`d9qSr`x9on@pqVB>sRZ7e5ZB%drzgrJYYN@H2Xu5cdoI z7O~G#!B0hkLiU*{il4t%fy@!X zpMIXg=VAV+oIgqFbFSCV_1XLv_XYt!9eUEkZKw>vpQzLc|9fKRMuj$RV}7YPz@x(7 zCF89T+P?_i?LS3Gn8-eEKfDb1Y55+bQ07HLY7zf{@;?_TeJEBIdVlTC`<_=VtC%6k8f;Qt8x^!y@2?~8&qMufjk^qeaM`lWI< zKc^pWlO4(~@-wIQ&l=*-Ki%-iegQY*5*7Yg5|`Ep{yxD!E&X~0|0}_}4}8VXoI8a7A;I4{N8xqcI4F3Z_>cCJ4|B9X87DN<{(BOK;Wk`K z1pk!y!!*Iy3Eu5*?Sg+`uAe!zU+on946z&3;PR;8-G22bLKUskA&;N=E>i5q>)_93 zpIN76$Cm?7YR!cjzIryW(?~WV)o%r+7x#%O!7=I1?wEVLTdY+ZD{7~9)$A@1C z{xw;5dMs(dACz^Z?RE|hNdJ3;U;EpYg72E8`qlim2!6NN`2tbud%$OV9`8R3|1H8l zOZZqeLW-iJz}40rJc6~?~b<@ zz>%om3fKO7so>rF@^!#_kl(uVmd(O{_`=L_bqT&l;>o+B&&PtFA@OIC;Afqo)~`Mn z(brW%j29*c=m4JdKPYiVxATDDn*?7YGCvjkYZ9-Z2A5|A{|oU$?GLXCezoWcH{)^& zE_7&FK`}T_@b2eSUk*I!>CQi|7JRwnONXSfF2NVM*2~`t{sz(0+V2Ga4e^Idq@AL( zRKM<-Tn|4{6Brc&n@frpAr1~GVea=_Z{G~wf~v$-!n_K(+{{_|lJhA5j>9+)UuN*tUWy0?l|IzC-EclCL zzOBm#&kBBn^s8+$B=|b9TSz)TwOHxt_S<=ack8oO@VCqQ()!#Z_yewZ zZZyV|zLGXPDI8Z}%w%{IAsV8aVy!?%qH!E0l2JUEyv1y6ZflJuz!_;XH#N64gqzI> zezB4;!yTPQV_Qpmb2J%^;2GgrIm{+pS~0`?1oUWYGQQ1diicaGW~8H~WgA2sHGZgi z2J4L_jqUBm@)BPeHIIk2C-GeMNHiIaH7~@&-D9oED1P%|CfiJUih9C~CCrZ2L~K)Q zG!puHAn@mkwEX!Z+aLnV;paJQ%$0RjH32iQW(7Th-o&LjGY~WtGPt77Fjri&rm7~i z+%Q*FuU%GEZLVFpvOciRTvxTMIzSJt#|V98=2iTYAb#L{C{`a1#R35v8-u}Dlm+M+ z^0~Qv`1KI~b_l-`3aIDYCo9wsg+fheu_+LcHtLg0-~*8@;nv1zXbp}xwRV_|ot^YB z{MNR33uZe8M+jraL$#}$>fOx*0?mOfbr?dai6`z?#KWzTwiYwo*ceSQ>V;`^f?>9_a1<;Z*2Ka0dc`Ei2mB;!*l*D$G?K z;dms&h%2Ia*nZnK^65}@gD(((B_awJs)g{Db!PoS%uqBEPKK4~T^+CR={ceI4s_sx zX)GDTO95_*M$E>x_HC+J-$G4ONSL%1lL70Yg9;Om`A6e0WSM4jHerTo3Tm60guk40 zs>5`y^O@r8wx7`J3iMptShlYZ=BqDNJ^9K_*2JV22bke9s973NTA@{%Rqn+wLu>Iq zfk>>?>_|i-tYo(LCHVP80qWWRWG368T4FHdsObs>*3_HXe2qOS;9C@lH8mj|!8^_5 zw)QA#UTM}ZBjx|1*@u(W##-u>8FDV9o1|duqm* zurh1HMr$g|8G1?BpeZ)VO||l)Gyw0|YeeaNGODrH^BX2IhH5KSkI)=#yivt=yoV#u zV*6gs7j(G2C|6XJHJ8Q6Cmf<32CQ}K8gJIft88jU5^W}aXc=k7yEdFoe$w87c3_7x zS%bdjwyoI58rnKqBcCq_BP5u#Fe=Tu)mXGLfr}8s@zxR92@t`Xbl7(+%#+ud-B9a{ z2!(w&9EsqEs|jpTwNSt(9qZ7j-Czm%Tex*ol-)puGzw)Zt}ddTqqM0lz7N$>Co!?e2$pN0g-~@*xMWJ&}KM6$%l!2Db&QHgHdYj>9BS=Ug}x> z%icwNSthU2-qUMWO9tSwHAaHfQep2tal8vAk;Kf)x}$t7Yg}50B!;4r``s~WG2-1X zc*zUxO$%FMt)Uh)hY2sE|ArZ9#&4DJEg7;6dg%{}YD0)!t^9_zT-tzThs(La(Y8DGYkQnwsRS>t;gFZjS$rpQ7WAW33a!R& zj>&EO5s&YE)2hR38|dqN1U{ktHe;fQOa+>1qtsuQvQ*->UBu8uy#IAYZl9I zUCnZR208%EoYexDhYWVjbiSSFQ;lH*|~XhaHHY+QDD^x!epn-MO(jlq2QLn#Ki?{{{jL z>&!Y8$CVpnrf9#W6dUO~yqLER(2z}AkuYrfLa`VU+~ksIYq+5qyI)giLn4ki12&wi zHpG%!V|b~UNDxbCow+5j2&=9m8mPt64U}`%3nkQl&h|@nw(l~(>0S=wjghY}G3%G>jO5RA6Ha5*Ys@4D=@E+$JEM=oHH4~nGdlTL;7uCw|CvRO8)a`Ku{zM8e?EYcBcQM)XGZK&R~mV(Z) zPn70m=|N?tIu+8r$mW9VV$2Hxwgdy{Q;EEa6ce|h4&2B&b@S2|9s z&H5_(&p9zpXIULzWtNlkY!Tg8mWYPqjhho~xWE&ENv+T${>4`WC~e5`en4mNq8H(f zv5jh1%@XmYGucM5Db>;xjE+MwD-Nk$W6Vn$l6vEkhP*RS=_+sd%l2MF$JRyz8D$Ke zxjM|GGmkL$=QL-wq*u+T=t|0FjjkuwmM?wK3}*@BnF%7L)EV!G^y{}dYWK0XDzY7}_3*Vu zY;;&S3n{g)Q;TNJiQ-(gvFK3ca0qc_J2`Rsk@l7oU7WDa+%Z98UBZ#h5LriE7*SCS zHq5;E<;!_);C{KNT4B!Xw9*>_ppL!u$;EO`8=~uvxb#>@VioZyUuug-<+@hL-sZD< zPxi=;mTbjANqf8vuVYPabD$bJVtCDPEFvhpq;vs& z-%&#Bo5P9CMoDB_E4olH8CR4oc&Tfwtu>=!f`%m%CILBWZ%!H|oVb@Hqn$YB)9aFW z8($|biEfrJI5tNht7~W!?^=y^Hb&c%xHOKmPqm@G`%rD@V{{d6i8Vr`4Xxu(33sWQ z$3AP6;47FGe6wPd$bGnM4B%;eJob`sG8vCGbR_XD&R<&SzN|ca@};5T0zV%W7bt|j z4_(O2tB(oN_jZ%<(ejldUn%l_(43{vS;27yK2G^gk?$1w*)IMGt~!0c=9Jg>g+3(m zUKhVx{%X+i*r_(Hf1mVE&oiSkM*(A!K)3#3;OX8KEwAstPm6q4o)RP-=}^n#YdkJ= zUj(sgd3`@Wzu^sdeg8f)cJ^7{UNk1z&>QTMOq_4pqU`DMbVe^0PM81?TDNS`3Vk;&%gws4 zTED;E-Jh;&_r10;&(Q#HG;@qYq=d{4{k``HlhvN~1A6Z| zjmeTX{ixwN6>Aw-ex_=p`#;qsukWW$i+sBl6sYyq@JyGyzCUV1trBq0H|gpuGl8JC zwEpy}JSx|Te6dSFQJIDRoG<;C+9*EK^4GcWx;YK!;DGve%lAtEYc->^qpKP&#(|z2 zV%c?@pE(Qb7#J%wqpEVOlS2kX)@#R!udP?LAUPKb-S3K9SgiTi@ox%n+IBd!%B}`= a_^4(Sic1~b! +#include +#include // For std::lower_bound + +std::vector searchsorted(const std::vector& cuts, const std::vector& data) { + std::vector indices; + indices.reserve(data.size()); + + for (const float& value : data) { + // Find the first position in 'a' where 'value' could be inserted to maintain order + auto it = std::lower_bound(cuts.begin(), cuts.end(), value); + // Calculate the index + int index = it - cuts.begin(); + indices.push_back(index); + } + + return indices; +} + +int main() { + std::vector cuts = { 10.0 }; + std::vector data = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 }; + + std::vector result = searchsorted(cuts, data); + + for (int idx : result) { + std::cout << idx << " "; + } + + return 0; +} + diff --git a/tests/t b/tests/t new file mode 100755 index 0000000000000000000000000000000000000000..4242a1fc108441fd802d07013b44b7988f80a6be GIT binary patch literal 74504 zcmeHw3wV^((f^xpi;5;#YpK_jq6IH82>}9XSuP73xfl~bMC`I88?u_4$p(X0EKrFd z8m%Z*YpI`AYHg*~&r(GUNJ;cl8!ewkYc*QbjlniWYSUKT|8M5Zd2e|)Tx|P2&;Q|l z2=ktqIdjgLGiT16dEec=F)(X-YD$WsuT0-i9BufG(+r<6 z7+eGKr}_MHr(s7bh`2ONQ$=F{Ak`aAHw^s_coLTkLqpvxxuSLwh}LZ-t^8EBxw4;M?C+MGfusNH=XiQ}niH{CuO~NEvs5 z9*p#p)J63=q+W;A)6gIWx&&%{k{J5S6S;W<{3uabLG{03Xy~q2FZJq$U&CpTL6?Tq z23FC7Xvl(W8bb(!jap z(Iw+@&n+)Gr#xC&SAR}@-netd??!05x+Iy!D`twz{-T&&i^#AD~9vTPLAs)JjUz>m~pNbpe zVg00JU|nW$Glciy@3hbN9g%y-1*?u*KXmHy{41XXD(N*2!jJW!PQgIE@ZScn7ydzX z)C>Q168uvbtQUVJM&gCfPtu=flJvheiTulw$hj`bxKfk!XGD^5Elh$hO(N&?B>jIq zN&nwX!cTVNrMJ_Q$hj$roQsm+CnnL`=}GYQNyd9t68^Az5DX+>3Ks5Ta^ ziG@N&XyKfq*!WPWxV}C+J3D7dxHei0VokJiX?}50EIYfnEL;QH&4a%rqA6sf7HssY2HwKYC8 zJ5&;mh0(~N3CPZdgyL{@xHuYH9;o0xmSZf*23H8e7^R0QC?XQB$e$aUn-vORfQ2)Y zYpW3TFv-D z4{rj^5G*Yg2^3ZwqKs+T0UrFpj~%OdcBnQIll96=B)ORJL~5pH!^ITNEE*Fs zO3SONYK+jl%B!Q5C85GdZCypgsEAZlR4t1bSVwSdFm4z-xk~b*72#+lF%(xXH-N5< zl@y(d$l#BGf#} zxYwi)FdBsJ2mhV7--YmSi1D!Cp5XEz<8djg`U7nB2OCR-M(zFXu`5wJ(s)L2Q+n^l zj(CL8CgtU?Jcj(V8ee^BLO$s@Wu zHvDEAeuNF*V#9ykhHtas5gnRWyA6M$iN*QdhSxa^B6r&Gr`Y%p-yYV$VGSJCz+nv> z*1%y69M-^L4II|MVGSJCz+nv>*1*401D~Xy@{i!kJ!!$lL9hG}X@=D;vD8F+aOIA) z=eXI#*cX6Koc<#IPe0X-6nZrc_H;JC~oLaw?TuxcoSkDTUItiOY{r znNlZR8@T)cl__P?)y(C4s7$Gnt`%IqgUXa5>8j@PEmWoyQP*NFe}~GHD(NcV@-DFxDHaJi7ml=|rE`2vX3FQhW1 zJi0o$JcY`X>ga0c@C zE4X|tmA^*iYAzp1WlCjqE#`76l_^!xRlwzc+>bJ)D7ySy-b-alO>|{(`6DV*N}|h$ zGR=SX$N3Gfd^_0iUU22^o`Si7?3V23gRQNLM`OCb`pX{}#=g?b^ix-%ieV7{b5^l= za}N?4!3*gB*jF|a2bs_j&k`chS=#(usl`n8vz7U8`V)hOp5V4WO$u)79S}^}8GPl7 z*f3MV5Ru@nPwlU`U>?S7)O}@Mu;KmqHJFs0gQfzLa`|(8+9Uq%TZpj;W?mL-xPD`> zp>9L4Vcz;+V*vN9@&AHx;C?;b-wi_uHf&d<*9h|y!Z0-u2aFT$r!bFTL!eEUx2bYV zR|Ac|2?Q`@!DhhJ2_`hx8o!U3p3}vpT-=6YCpFmccCg{M@uO9p&6;V765J7Nnuou@ z=3oQ&X*k##0=t0S1x^ftu>qd0E{4JjZ)6Z<_{yM4k zm5)|-rUZ}JuF=~+Nnghk_gKzk^)p{onN9bI74xeG|l82)Hx8UtvvHC{yWF?HsA z?Dd`xLT}%T`A~DbaQ5!K>6dQXo1Ss>Z3_)!FDAS+n0`YG{;O<>5Bz5$QJQ}4pzA=Y zJqlINZb9!ly1zmEOPPPsHWM9WKLTB>2xbR5(6J6;)*XDDl+jf11>v`n{ZOy3gEItE zJCN`-k7M^d)IKhnjh@C5@ zd89T>K3miGuz<)Y(7BI4NnaQTPX*}*UZEhx#&@OS%YgB$QzLVTnI&Q%s)@*O3R*pA z_R>HLOeX-%4TD=0k21lmvJgg<8Q7eDPYe4XRS0Zf)e<|6RJ<1|-b_aO1eVwnsyg?5 zJS1r2{yzJsc^g!j$uof>ANm=VuJ)l=YpwA{UQ@8L*7!BO?qxlB-sHJO!JGE1hdT^5 zt=Gx4ga93b!^ze-Woq2Q30Ip>S|_cjUSIF%dFEHpT~P0oR&ywji1aI z>_n~c5D$KJJ5_C3M_mp!HL2EKCVC^*BM6EvEwG9%wp9dQ4Z0NHi}0@$OSxh79wko! zd$p}-8u}RY=zN01R`)1I1*C&4e;pXrDjBwVpJGzL&QwhUu?GHEXK@^Fc&jPA!w?i$ z-Al5R0S~1iG_GTe7Q$_xtj1Q&tZ*49QlCG=t9jJtp-P|v_QAbN4K}UgdQGb-%#@ZE zD}e=S9L)@x6rHs^?QhgYsnHrgofdA>>JSSEw9}5djtfnzvw+*`Tj6J4_I0-6b%Zl# zmj`e!6PTGMv1%VPum}{-MAQnAf!p8*+erAl+faQmNzN3>&vJ*?5e_8ox6;`f+CmgF zN&YHJCP}UF-(iy=vCl$m3yXagrRJ@ZMCO$&a~@diI_qJq+EH3Rq2@?HYkW0Hdz!tr z?m%n&78saWy*2(hjt?lZK2haQs<=s}?Hv>wI=Z^q@ss(h<<%M=im1D}>(?e;O+;(_ z=cv#{;V~LLm^p9Xg|$jR2tdFT&Jn9fcMPz_nNZQFz+W8 z^i8R_HNFwMu5PSa26DV}m_wYy?4{MNB8?A8e>d$Q|8v>oMUyYToI^V^(r66y2xjG; z48rp6314|ku#tj~*7!8k>^@qT($Lvv6>~IOqU6>IXn8zSXc&mhDVmf3M*?Zc7Wq?X zQbiKAd723sHSG;1M_!z_>_}6mX>Dl@Q*cad zjRmR`jkf-|ZLFcL6TBR}UP!H<7wAmCqXlMtu)lORMLYhmY z2DVk|W~a=1SfQ!c+{7E?uNvq)2d--B9U#pp#)qwfxEVz=d?A!e@Yvw!SAyIA4$Zzm zQWv?emh6>_7bYz$}sPdOcw4(g;D;~#<4fhW+d@w>r)1DPf5wG@ezoIwKURj5aKptQZ}Ijo5c+P`ZIzPw#TkhNPil}2_{&&%D-#^gw~ZO zDnP0M^iI$*3p$BIrx=`28g@QQk$L~>gpM*Nt$|%Ykz}}OZ#Oh%4MN0)~ z-Cu!B)hRWzC-Vkw9__VtKwKR)#Z&wRwmnn>&g?36nYxPZae&u&d7I{LCvMsj-ihy` z{nE9h)TYxDY|KD~UefY}?X`n(-_y;D%(lAB{fSsnqc5wrARp9?E?J&*J9WbCNx$}y z8jGdMlzM9d|Ig}#w*G(G8m^SG!VWKym1=P6oG>PcA(N&ti$peKEtM5;nUNz9sKUrCR;5u`tvK6vd-puL>#H9%xg{T$#8R~V&p4q{x zn0#Wxyj~s>dwQ}?@;-si=4OPmdM%h81(gSW7kOw|ZVl#6OA|T@N`TP1cGVbsJ9iQt zZi3l2k)z?BUi5d;FJ)rE-5UP^rC%G!U`YLZ4&Q2y4jf!(506ggXVIAI7q59*&*(Vw z-9o21nHQCTSh;5b`n4YYYKHGv58h^K^><)ItCV1V4C+Tarej1u*)g^OZU%n~;3g;q z^;O-apHh#vaaGI#ZBp;Vn@J!EgYeBbvJy91ZH@mK$$Xb`G%L$q#In`qH}FHPNp9&zRxF0n8vp16P*(0(ASWN5 zP3?`rfH|VAx$$xsgPMh(ysyU8NO^ocrsG)3_3cL8kr?qBF?|z}m8@h{7Z6k41^VE{bQfgfK z@XT(iMvJYpkup{=h}Yu_2uZEqC9Qu`b&kaxL1LO5S+u9o7EUvFZKV%osj>Z7L{P(T z{1Ip2vFdQl4+rj`6QC&TBob|<(|-nDp`;S!0Mz&oR58`~In__D#&%keqQ;@z3$|oS zjboHA+z5rRTJu$TL-)m~#^ZllVVr7Q#@!Y*s#D$=w-Vn|U1K5hNr+R4X+S}V>upM0 ztZHDkTH|-V=ThQqis3|6uXFPhX)g z7A!5=v*4+L#45($%HH&4lkljCN*g5v=Qa2j9wsRr;@L&M^Dd2+R=umsGl$BeApXl2-yQ;{PK%OmPv_2bX z9)04VD0JoZokr{|^gJ#j=u{(6y|&MGHg&yw5T~-tSi{Gil5-@EV=yE7qjBSGDMO(r zzB-<I2+9V;MyJ#Vk(qc^CSw@M^baYOoU0VI8I#)58vJ!}@+n)(+k z12W@Y^%$9Y^GdH89&Mj6Q7by3;gd_*2-pG1995v8tdxch+!~CItoLR)Jmz9xNPnCG zIaF^wNRW%U>tU9>_D(_e1}Rb)TCq6u2JK@1*a@jRKO zuUY{UMTJe&UMHo)>4er;h*oB!((E`kVVcSg@N^K*7R3Xc8H|93l?NrtDQ#RgI&PiP z)}T(K(U9zfc8YF)G0AQqM}VeTV|%zYDz?Vo**`PQ+Pd43`<#{wuj;#w#0 zZZ_=fWCQR^X0>DFftZxbm8q1&2?)Z@ASTdMdb) z)%nZ6# z%|k#-AM!Gz!%ch8N?W#ZklY%7m&Vh$P7Rw(hJ3^2WJ()pi9Et95*d{SgpTB^)V>d^ z3ZSglzcc7xhC22vjj4vK`CLU?HAYc~Nuu?FM@fk2ctk4$EtDbwlAm^&I`C&uXrXf7 ztIKF+tigWBugYj;K{Hqhy3_*0NrS++bKLMa(j}Oc*YdD-6@7j-_Z&#;F?UHNoyuqw zW~&xw55fWt1%#JqJ<=0h7iF78XTYL+m4w##i{t~KmHE)3)5)9d!0t!?Yj)@7@xt4o z?asrvd;Mt2^wu&qy*u-(H85&K5i~Ox3$1W+aesSL9o%}cCh$i}V+XmO*J7Z@sSPD> z|2rN(Fq9qdS$@T={~hmM#RDVkXUChy=Pz1Qu(;OvQq-|qJb7d|0efE3-1W{Q`3?46 zuZ>$&+rt+h)7mvXw*5tr%ev_}DA(ciZsOGrL2Rz9 z-2_l}x6B&^&#OzfP+L>r9hsB)pjI=AIPg|jFM=pE0zHuL;D#F2uC&Qk zrJhkL8d+%OV0d?%$;HcIDCAgSD3JODJs4KiTjLwz9-4wZ!C_ijdX|pU7?F-6?J=y& z`Z#CcHocSCyO8O5X+%xgI*xvz20D7{}ju&ONHLv><|Z1pVc4*YNt zevBf`*fccJL0aF2L=e>4Az@-W_SGIN4w)H1c5ghz^XVZh*W*Vk1!;W@OQ%{#p#zWr zC(uBW$q@i5Lm5R+%*iS6w6ow(;B9o?2Gv^QchDhD@v>cI^+2gf65jlaz-!@-5|w#I+U#V?mf<`B*i2s>8w zPtk-{z6#yif}3Gjbe$?|stvH0dJae)0kew1nOuAb>VN^`kNV9M@m1RD=}kw=^RvUC z?$Fy+TA6xg^`kb7`c`yF^{}Y}+in1l{ma(8R zBhvIr^7}$0OyGgjP(MzdIQ`sh>36qy?HIh*{vXGlZ{amH9Sa{s?AgUz2j*zyLByU9 zv0@#uC#$sqF{d;3{E^~p51IiKmKx9cXY`2vP-4&PIh~R$_N<`>lgFO)-aP!krha43 zr{1EjQ48y7X$?gkefBit&fm}zS$dM?j618;s9Nye$HB*)-T#ufQ>`MsXvAB*z%Mak zS=yiW%u{nuApq8CYy4)E^(tJsC(Gf_ok=BH-r}U`NeVR8K>~Uy&!Hvg_!B#U6<6g! zD13x|PD+lC-mBL8u}XNWH?4m{A^drPw)8tFo5|VY>Bwd@ulhaS^4N^?Q~cVOXfo;e za*&?-F=#{9adLO}EG;On)lMFX#(^R0Ck$%%H*w;h?1r^*#vjnV(a+Hne)2#QwyRfE zhH{sotu1)PN%Q{0G&!!?%|&RX2?3+!DUav$p+U(6rGIALO3d^kJC3O`5A;&ld4YH> zz2(I`AJBVhbP&R5aXj=U$gF_lZ>)gR#ck#w20d$tRIk19|73Y^|96@h_hD0YCcF-(j z508@1DCixd-cSP|x#X=!nciBRaG?FxIKG(+YBSy4Uq}kSV$)$A^*-X($nopwAj6y( z=UW*A_)RlX2c?TK#tfxTJ{F=#ot76%WQLINGN+qHZrUHc$+0u=1sRyOByD~>&>!6w zZ6KTllh%?xhjD#~iVWJyKlX^$zOj?uS~0ykrb|CTbFB~z4L!mrtsP6f|CIh@3qB}> zcN=lAKEb3;qPL@J2gQqilSss0-oc9jX(nX%F%9H=NF@ZpxL{KuK%VKYRoP?-}wV$iJ&b5yqMCigqBuW$HiKtlnh3$(%HLNip+J zFr3zS9u@nEUcPva$IrY&omKhCyYdnC=w-nhRKp#;w9{j0NAyxp7)SI{3XD5?F^`38 z129iphK3LOeKR!9)A;vIF1@^lo7T&E$CGwu8P2l>m8|9>R@Z)lA-%?Y4tczE`p-%Z zR$c0`885Fv>up(wCH1qw;>h^gq^Koe?-v$0oHCtuS%0QE0mq)_o$dBFzZ}gft32pc zb222x*KcF1F;`6)kpK2o^PFll>8jy*Y7KnmrwvG^eiz>Ad5RFDw>Mbpi#TnXBkeg- zKz~Fn>~tTx(3fF0WR%Y3rxGFUqVuJEe%Kz|_R%EDTGJC^`mjKUkZ)Bl7QVMcrymuDB%-=GD}4+8CM-(og8!jhvwC)mZD%QmRb2v}v-+^ZMd;E~uu zwt|s9F!v5=4R5uX&3{0<9h_PhKmOuQ%%+3qI}%2;aK%_PP#9rr{Os3>AD<_~P$Vh~ z>?5Kn@lKYA{RVP)emI7Cih9}vR@5D*>Zai>UOcJeckJ-?8=^i4oZSIzTKBN{Bz0co zM(N8fbq5+LI#h!LjR$uDP&Bv;5b|`VXbZI~gZqJp=_kkI>Kw;n>3ci1*xbHiKfvQ* z#78euSnoV%ICeh_vZuc%LV7!M?lX>(*O5uig7=`0wxn4jb=is^u0v6!%1Ez@NIQLp zXyzeecbuHu=`%Is7ywN#&(w3MGv5B8q{8`&)TG~Of@^v zp_e;H$U`>MO7A`cAKRgu*1$dmdVE~kga3_S`ZLhM%AQGD`F3NVaI6XM>8W^GM>+H* zs~Bh}Er#w%YB};sKr(~%@+6+WBElHXpPp|-(A8)i#K&XH)|T|q@7O@eurWe5&_6&n zEa?$_K$Pv!R6zGgF!#_D-c5oU@frpPdQcC~OKbc!>YXzx|A4YrD|=H^6u;Fbo1B0t z8xc&Wj0SPNxvt|#*kCl*f&?-ev`8;05_VE{NqP_`p=(|Afj_BYmv)a;1=7rMccIr^ zYg82(3hUz);Ja%*L^95~~y8nxSw0whObLbwb zCJS$PzY3*VUhqSi`=>AlOyC=OntHHDa3!9!_fS$jW7Ce(c>p9Pa*&^F*jW=+SpDoB+&!A%`AG-GtJp_JhXh$$y~%vMP6vTJia=aRb<wTax#XkhI@z;sxe z*p?VK`;~{#u=n)xeBS!0L%X-ov0g>`)~5lvcf7K`M&Y<4X*h}%tslpTk%UMG)|3@+ z(KTiHgxtTVwN6pjdx=`+5Cv28qf@XB$;dhbt79nAA-{Hi3vKH6lKXIS>`^fbDUsfj z!=cO(hGE5%d$wY+R9X_s0@&QjUZyfWPyCK$Ze;tr3Hf~>8?)4!qoyJ6ayX-~m z?UEhWWBNB@X>Y)JRef0zG=KtjCk5kO+c_eDN2ae&NonDwNzT5h0FgbtcE+x37OfTX zQHVk;>^R2K2VK5LM|-s_PsF<-2xjmB?h0gBofQUg1%5jNEw#q4e1XKa#((j9YsIWZ zoej1|Uiky@(90WY364h{T1fc5CEicsAa1~L)~mO4?cN8;k@U5vCQu(_`pzR{rdHph z22AWOX9f1U8^O9CUHDap+R>qQ>ik2`t3m8}lRL(nlzR|ERUL{$;c9VWLmdoa9u0z) zESiL}XdtZ1*7zm_Q@DSYm>+u<4KxMrr2CuENMjzh%V(h`%p#_?F|ZMSqLrv?P|LQ4 zWqm+Hbu{6Ct?}uqmv}>J81`1#g0clkOdwFpyzhS9!qYCBEM((ZcAn?+;jCocxx;;G z^DK16mcgUAS!E7mr=btnFLYXkqHj_ed;ERB;13AE=cm-sM@xmq7CHzGI;)~up9G(T z1x1Ld4kj%49rSARB6L*3b|){OA_`V)~;0aZ+I__01dq2Xqkm-o zQuj6BJ0ucN^w;Bm{sNjMd0K|riEZm53rYny9E5%{a%gLF0rlhr)f4yHq%U%Gjmow* z_4ybp3M#bb=kn+1dkV3H(y*V(H*Z4F;p?yl4r}1B1`ccBum%om;IIY`Yv8a34r}1B z2L6Q_pkJKq@4Bcax-^PkoAuFe1{>3&wfJ4tSoE4mi9$@Ri&a-eD`U0BvPdz0?snqD znn-P=20!a5m;DEjC;H5v&5c%7`bul6Dtwi7<>kI;t*^2w=39mzwJk9w`V8Z2pD7PN zf{I^q<)0M|S6AbAewE)c9g$;YwdcxIR>i-i{h= zq@>V^-Q!v@2}nQw;co^0HsG%Xf1UUFX@$Nh=Kdfac`kVsSm#sOCW-v6US!iRT?cRhfYTmUCOm`G&c1@apKvjB&0 zOe7WnKK5uL(E|Q$fSrJ!0MZM5{{Zv>4*FRl;RhTFxESy&fGYq;0B!)B4%h;?1h5nE z2EZWNdjPNua4X;%z&8Om0vb5awgH|2xC-r;13n1Y47dYuE8uRxp^qgJX?XpA3E|9v%mX|I$4?dUh)aKN_!=Ky|2<>#Pp{Bp$^fPTQafQtbi09*n1 z7T^ZJoE@+?!12gs?FO66C6AK-4l zw*dztk+KJHB;X5e&^O=&WRmHZGJ=3>0QUht3U~t2k$3`dJK!$Bfk>|I13VM(xDyTIJAk=>F9R+B9EqG|J>WvX^?>z&n*qNI z*a7$}z+S+efWz_2BIh7;ng@6u-~zx00qX%@09+4v2OfcJ0o(%k7T~*pp8j3E^SN8$F0JsJ4Gr+e1i}A$aGr+3=>6;qg1Uw&b;M=GVm=Aaz zU@zbXz(JjuH^2peoq+2A4Se6_e*yXce*x$RJn>!38(IfK^A6Yu z7zEr5SO$2+hnRQ3xqzDhe+ak>a2eib+Xwgyz~e?>e!8G{z-s^(0B#1X2ORMc^ai*H za0_4q;9G!q1AYdW(v5L{19lGR2V4$V0(jlWSZ9E{05<~C7mT(6t_F+)J^+|D(lCAt zI12Dxz#!nC0m}eC2V4Uf>_L6NI>0u-4S;dLmjTmGhyVQx>I41&FbH_sUepJq3+{lg zlxqr&l=_U66Ne5-Yfe$Wiw%=8{NGO`&LECyLo(npjV$~v0&WoDaG@Dv8vgR`#$2PG zaZJYaV}@Ute$>@zD~w4eoOkA!keQnL&qjkN5`8WH z-T*xZ(4<#b^hbg21|3_rMW15PUj+J9(6JR-^ywCT575u_Q=h(p?7@EMXM^4dI<_oJ ze!eAt7Upx*#Gma;{kZ_(FE{bY2imU2zOES{+$12&xIHwNhWqBiMo z+4@9c9OMk#^^o7}Lty2A0cWRhL!{?@pl9~e&*M_zBl_uQF6dN0SwH83ekSN_H+r7K z);MFJXC0n9iGe2c)h`3H%_-2=fl_+d+Lvzi919k_DPfbR}8b(aNM_xx8PG2Px~>SU8n zuiUhNPVJk0ve`g2=sQ3^$4)O(ef9xI@9(_|`fS_}%(wL=HONfFGZf?g3_SCRXSSW^ zVzVC|Xou$L8{jef@om)(*+4JoG{;{>dnSEA1}m57!=Z~&KgQ2-;ohV#w(94Bo(cMB zJH5oBF93Z!=;zt#u*0jsLh|cDF9h9Zr(b5(UoUioSyubAEc#~9r=or`U(o^jJkY1u z>!ba8X}=frdeF1&^l~MCBycoOw}5^d?gw6M(=lYEu2y_L(AI-*FZg&azHQ@6wdSH^ z5Z2~H$>;nnnhVf*&X?F_SaZG+G8Tbn3)(d2gZlEg)t48+vkyF%yLpNf59z%JJm)-| zNStNoDYfVscpEgYA9@z((?CyVHwBidAA zd3q4^GjTugSGImctTEjQ9+L4D>g&@s9*lj!5msMIaQ46JXNg26WSM=9T76vw`h%b+ z>+6G{H-kRMUO!a@9;AyMpi|BvSzNLk^hZG-VXuF=)&4-_9)1q`H|%uVUNI8%$NQmA z1N}+R^X>Ihr*Q++zY@^j0X>c}Q8Vb9KzGC~*IPQ?1p2E<>icN?ptpejHttRRkS*`BWb6aaHNQ+Gs_ZUy4>_~ZzO(=`rh!f|)4*44GN>(E z+%yXL#^vBkw(nEE@)ppOjhFJ6&7hCA_rta)uL1r0peKuaHiG^H=stUWTinwo^kn`e z4*HK#Kbe1`ue<&!=nL)gQ`Lq@dK?8h<$t~ONPP-|z6p^$uiEQmsOU9kx`4xDw?K0$SLVb7w z^fu6w%_aSM$;+U7>5=-d5A-ddx7p=WAGTV3puG0tU%UJec=Y*+^4;G7-K!1iBjv%X zLHDvJ>O(!~l-GCY!DTP^fv2mVHnxD?0eUifcnkEspeM73&p;pbRR8;t5Bc^Zw&1(f z?7CWL>B$dz8tAv%>8Wd(f%LE#^yQ$hb&ey_1|kVq2_7#!(HKU8{vha%*reL3KMnM!Ku@;km4Ln(^klZO z3iO9S&$jo`X73Mz{wvVo!Ym(tq1FBlslU`tPhDc_csJ-D_tQQ-3;8|hQAd5{oO%49 zkHj+~ul43booS$-33`9)iJq&x44$O^AM_5;lkFcHLH`5jj<~?K9@{`qfSydpanS$d zu5XJg(vC;Y7tgcc8Y~^pvUEHO^c2v?+UYhM2!eh*=*i}YesO&;=*i}I4d}x`Pd0}e zK@WkRYE^V|F^NqaRy3 zngyPMmPF!coCnR=Yd{)rF4RUj=of5DBns^^Qq|@_^tGT@gMNXXP7W7kqCX0H6X?lw z@FM8fCZYRKe-G$U(67L~*+1oT`LsuEWZ*g3W8gW*&LjCrqGy4=1@yD*bX)$Y0Q5G{ zllg*b(078KYz)nyKhaP7n?Tg7l3}wj^uG%HRwUm9qU-up${^fLB9g@{`~b5 z;JF<KH@;TTC`gx#x%`x@ixD(Jv&@Z;jCtIJPY>{~Kz*7gFWc?(b zMc}!wA0C>o72tUeJg?*4w8_+0&9Q6%{hFQL`t+en3+U@W-)OH-eZ61xmD<=1o)cP= z$Ib&!RP=ATWW*rC2i!=|^H9IPbrS^74d9t*mti{_mw~<>bT6G#AJ>5XW6-_U1@&Pg z=-=BpkzK%f0$a{V873UVBvd&RsY5+|smCO(#ON2>8~>L4y=e0$K(Q;ol-40=A* z*pZ5Q#B#xHga10%xMhf9nUoD!KIr}-#;;PkQ(sOqo*6jk$sxwRfrH*1Vl)jJ^wJRH z*1%y6{Qs_j&Jlhd;wFK#_0zRPb^%?cuPCBR zm#=k|8>EcK5bE;z86GXgHMOdY5S_2KXH*%X8((x5qzfS}U+qGtxP>lF|LYejz27px z&v)++AKFnx$uQYqiUf3&Kfkn%v`!>evpbUR1rg8q+z4w(mcgX3g= zrwg1fuvFkOf!`H)m%xVvJ}Gdgz}EzRAn;RxgHQ8w)5i-uL*O`p(*@2KSSoOt!0!sY zOW;ESpA@)L;A;Xu5csJ;YJA4jsS|ypX3Uv4+LxW1nVp&K%gM^g%^I7N;~Q0oPc#I> zF(I6jJ31NnxSXs$+~YRVb|>~6`JGJhg{bYV%f z(vv*nl8IUM<4eY6jUAWc%gj8tc6n_{q`LOp`n+-HhL=Ro!G|6qnZ-s?!Nf5}(bS3A z#?OQ&7wE;K{amy7J1-vp2tTAFpEJTwz_UMEj)q=xG)+S<9^Kay`{}dp>pJN( zcPhz$DZmdj{*rK$sqGX{?i0W369zh~dhz3^W~3U&7+M#?POnolzIudTqZsDkf8yUX z!p~*>Ul2Db*flb?`x!<3qiZhyQjD(}dLLmV{<{``B&TzPA9vPuGj3Dx*0A1}H02@O zc=i8L=7)vc005ZBJS1o0pDjBAGz>f2LthUyYTeRtyjDAnEzO# z{UN_dwtyFZF?Rr8S0nw@+{;+b@rK*K-^lpkhTi`)|2p7DKySxh=w}pfO1S$>@Ezk7 zei&2nj%2FhUsS5_Qv`pv;IGJ3c&)eJ2>yP*!siQrkKiA;Lg9UaKXZVR|I#H2Pj*As z`GS9KjGxQ%`G4d_1^tK*J3!GBn$_&HyPI`n0`G~WBA|C|p5{=9(-|8}0@4**9O zeG4z~@43v+;3UP}x%qxB^ZOLIJ94mU*AP8i%v5})3HS`->&BiI#V=?hBK&i+ z6khl92EmV+qVSxHLM3{?k;WBYqVRgW`vkw?B8As-#tl*Yx6V-bGDBVEf)Bd%utxC5 z$-HL^|09C`bD@%>@$U&fPwa&-Uwzz?IHVuiwETKT$2 z_`8Y~ev#mx0iJxK`+86Kw_o9B66b7jmxX|l^yXiz@cax0_y+`^H&5X?KMVXvf}bnv zaWrsr4MHGHa&8ejq<8!1su28DA?M_;b+Q<#dlTJWcpD*SnZp8~@s`M0{( z*Al@uFH!tw3I7iSKU(^)?c^1~PZ2pN@^#EmC4Zpw=XAlJC3yEdUJksMo~wnwa+;qd zV7mGGFTrn(`njyf^|9cMISTJ%DxRAit@`tu*$S`uYX!eEQ{hJn|Brw_7W!E{TH)!P ze7fj&@~EHmllJ!OW07NAsqj|_u?UJHJ-gS-I>GO#RQwBsKN}05_}7Y^7Ylx|;HMWU zey#tz1b;@p6Ho8qcFJIcC}IXy9IyJR3+yq!5@M58c5D!S*K?T{zk#q z$a>L!a+~1YKawvw|{Itl^7BT>`mW%Q75m8+`I`j)p?>Paz!Qi#`uaa!!(UbdKP&@$8K7BPaM7qwVkp!Os!DF--U$ z5c~$w{{@17U-0fYHVq4t+jaTFGl2K%|M!L8-JcHxpD+5?^D@AvRhb080(jETePw=@FjM5*C-~W-=R&~`hvW5%Tcgl{PM*8~)v--!pU3V5&ntQG#%LB*dVa-L4YKO7TB{ddP# z(*^II-&+MAs8;g%**4l3fB=%@Y?gLm_Iynie5H(cy5O%6`~wU9jM4l*5&YS?3a{78 z>w@2PzQWVze&`wt1Mt$rYT&6q4@$hJ<@`eMTV&lqEMG4tk#p8JAwP8}4l#b=R3IC8 zlK-B_q0d0kHAnDnJ**Y{AHsg-)9YwE@WWESZuGP$JQMNX%fj!D1OFuWYYG*=9{aFS zsy~aTDZG|HP4M>#evULyEBMFfIQbtI{CvN{!)*9!6a2zrh1dJk_=%ZbzN1`1ej!{P&8SO2Kaw zd)^}W+Xeq}mg3iO@+QIWam52W1@HEgyMg!8=ed{&>c2bQzFP3FNW48o`t!enKST7X z^^=yVW-`_u;`6rMf{PuI1=Un%YS z7>m!X3I5F`e#RUl_XED3e|t#bA5&tC!EiXF|LKzz{$s&k zFZknY6khwK-wWQIfBT!@Uy^woD{@BQhUB}?7n214z)U57r|_=={#d!UgH6J}c&g$@ zk*`+;zfk-;6t_F-({Ye!+pDz44f}b))@xLkfC4zUyW7i7);xfgrktl6!~fqe0NykwSJB{U&()Yp~7o_l_&Ub&vo+81>URwR|m^6O*l!9Oba^F+S(1G@x&z4Tw(|0z?I{I6B3cD0R768tk)DZK9gI>DbW{xe_N zZ4tbC-=S}Kr2f0}xAUhd{DO;=e65FD1n<_v&wW$*6it6%6EK-6%(UT)oidQ~D;hLK8@=&BQR*|UjV`T>l;mF#2pnvcii5AR995t>e7`6wb9&+A*?~}C&a_YnW5X*$p+GRC zkilt%Mrhilb0*KupK63=%(`gG!Q3+Ag}~VP+cpE zW#@$E^S8o=@GY>B{3wPO4FoJ~6ec$slB3cp#bDz?0KQ0@!rGFtFEf%RzzXYb+}|%xUx91-zH~zS#o(`Ss`0QQ7mU% zO}Mh8sv;CFF4opeOMsmgPX&xUR2jKiPd8RaabQ~Ze(b}vF|sC^9ZaibWo&ygd$8r% zA@!ZXp<1*ZUMe<~tO5jHRFqA+@~{<_=KONv{0j?b(h^=at@@(5@E_wsGwLv34y&3L zsg2cCEho2|KWj;LAOIzoC|rI476>iyG2YXYofE+*!^O%H@-L*O3QM^ybdd)Ug71aH zxVq*Lt+FTwRzmi{rWfI_eReDqS;jIkSW{$~`GwKIMWps|H4(Pdnn)=&AF>;$Avd&<~j53$zzYyMXZgxoaz9i=CH2&;B;Y{V%FcENdq|^}4MmPt`%?c=8LMpUb#H|>i z{EM&>N}`pay4pyIG9q|3+PeNjo(-E~tgNQ$YAC8iE&XNw8ME@IOkJ=bG$u2byEK7q zy7uZrwP|=htSYu>S|!+ z56-~;d!bA(=^}9Gk+;Z-aI}&QAs1`4+-o@o3L$QcTC`PAc_dtcO%u*4fQYS+E2J&N za_l-F#1=xKgw1q3E!9i&92TlALC1w2R^%D$cx|t0$sjCXZ(RyYjZKJDhL@B_LebLv z1+_IWPz05gC87Ka`4*pO2<1Zquyq{!Vs$u%hRUN?a^MhvE6N6zH!;&!MIkI7`xmI$ zd72uorSl@S!=yl9ELKWgB(O{tRv~S8d^WQ!5pDkCLq#}wMvAXgM^_kqpu*Zq%_U5+ zT|bK$k&}va)O@SB2sO~)0mMb*b@f(=4lIq8g(5XIRW)$TE+Zg$rlGM97Jp?0%dRLT zm+XXBfsWS>*;3pG2=AU)Sav9E6rqyZst|saqoh0n1=(%Pnufk1oOe7ej`@RZBUza7hV_xfaQq0$7gwaH9=o z0-d+Ql}q8*&4V1BV^kD5w%;f+nFGTj>2vr}377xyUsKSh1_w4 z=bq~kXQT>aJ~{W@J7hjF zV%wHSmY@;|yuV;t`?feYv>zvmvpPjF`Us0-r5U*Jag>gv*!B9!3sJH#q;~cYlGpjA zY+kU+0_Dj1jz)W1bAP1$$J!SUcBLR2F*{U?gN@pe0~LLjO%kV+XC*BQW>OH0nZ`m_ zN8R$Or8uD_-`-sY2YW&7an?FRLKta%94RD7q1-&#s?C$Q&U`tMtH4~`C$S{@vJR5_ z+fu#aW`q~Dh_ujy1NDZUyE=wxKkQbO*9J=M`lb=~F+;VMD4PqNETLm>7^w@2x>Z%2 zTgIp)!kCI^Wqt*;fJK!<|BO&cxwZ7vCs0GxQ5~Hew5{Y&a`9je)aG77BNunW4#A$6 z(iTmYSfYLjjZ)zV5}f)FKG{?t28lFtz47_LI{^g4%)!$bTMmYw@lYaG8NF+(n1E)z zr2orKADH8VRy`?)T&POedF^hrOg%hM_WnE&RHL+*KeH5j8#x}@6>(xv{?uA>l;ZGk zlV2vqkv!W%yDueN=2p9ME^2DRC0Eqdeu)fTdFlS1K|v45o=`4P2gF{XS$3A)WJ|qG z7V}bMu0t5MTAiHqs)$q+S1&hLa<*E@G?7d2i1F%ZNvsS@IyzTvjyb^BRW7BBJ95g^ za0JLOS5@x!(S`S{_45o41~5VXnH$l;+AQU(DZ3mfaGdW{EN1iMk^`dSvH2trM_W0~ z?0C7vI5JAEZs@AJr_2;w=GDQg&8#q^1~8 z_oL+zsKgb8sJ5_Mpd)>s-e8BZ8SMcN7(2dx>@k^El{ zA5T#xPstBuXO79Fjr&ku7^p7ijEP6=g<0;Ku<=M5Sn;>Zq!9N=Xv~jf@K}@|Je5~f z9oi%H^8M{v>dgstn1Thly5l(>d#FkpETXuyV0vLuRZ+;X>Y3O_q;qd8we4*j&3?Ic zOUKk=_{cGiJh8Z+J_|&j+|6afXCpNQ$$eZ>r|EJb=2T)6<_DjLoD*^cAFgK~oLw+u zk3tDP_OqjT?C&}Tn%?GEGQ*atsAv%8;WypBey9qpxE9?Nq=#93u$*OuX-=rVqR z2C0~3m}b+s;P}J&*v;wr*+B=(d>)6egEc)fU~wG?BzM&Jfipe3q$gYM_`@29C}3=H zc_duJuHX>HgM$vT_Y^Yl%VSs(m@Wk9eRgCzKYK1xs2tkuV+>HxfxNC|tF5!^w2Q$H z?mzQS?|Fpg&Yr4YH6thMG7{&xj?z({@m4PV&b3ydON6$wq=$ySt?DZ7Q>O~ysS~Cb zu|I)8iX+Ln#GXf!A>|c^tHZ_7*mAuIAeJSkfrLvr%znS|gvYYx2cu@hoNorq6gR2F zB**P4XvZL&M*Ag#W{4fnXSJlPr&CzPHxg9-k&itn>VVOV-DExs1T>dtE06MU4tJN&J8Wo4+{&hJA~ z5GpxY#9$oH#)@KNU_Y)z9GkRPurCk#4t@=Vi>TXnV^(1iwWPy7>7HlU=kaVCd%5*! z&Nktc`0+I4eqTQEe7ObQX)5J1`Z{FY*Q5>>#+f7+KrerJ@5Ow`Rw+~G#OaOMdUwR! z+H3<)>S*`B@u}pQ&A0Sz(e8PKs?*eXl>+^xx5(vXN_ugUUS3jrxHZFi8_R`5Rd2%a zYXbaIC*1#de)GHrX+zH2hAU(FOZb&cNS}cBr7COj_D)3~aR(YKco~J>6>MJ9^RVpk z*vLw=+f^fmp;hC(lJFv}#QC;%O>aFP3NYU=hqp%Z&-1M;&-5l-b)kN@&ZA!klm{|n z34U8Z_0+yl^)z~&H|^&$E1^#IuAids_t5L1YyFmdpFl@<#ocA=?N;qwG$)+^%^ee_ zqu)GtMNq;n;eT#%d9=Es!u7(tIb&92L+^tj^$w}t?}P2<%;GV0M?9;ZBGgsJ%0t&g zYO2g+tMhnL`ma8o^jUJYUFIV1O$OeXatTR2r$yHjq+P0)Rax#I0s4~KTGL57oDRmV zoetbDwpT0FV`iW6L}^*ozac7&ftAa4jE{&6P&|?o9=~ASJG`OkLK+=&`FQi zCz5-nv3jJTa10;nDuK^L2a)0|kgS|5?v9cPs4ced&h0$S_0>57GRf4lPj$Rr;9&{- zf4d;p@peIu4h6hFR{^`CrRx4%g?0h1&sEUsFO2HXRrDQ;IB7mHA@#dIJMoGGpleAM zbKaW`V?VyyLrXd&z;iMRZ&bLeU)2pQt*;LuqhDK9iH}*tmWP&Q+48|`wtY@dI2dBM z?l(67C8wvpYP9UxwFEsQax1ZXAj8cNAZGoVWhUM0zPHxRD2LeKDT=?AKlvDK$5vd@rzUS2R&@dRR)ah z&wIHh*%!^-RNh9mw#0*7IduG1A0P}ARyZC7s`LW1icAv+fA9tW&QAcFc_B5CRt}6l zXv41%Vi!ZAlXpG1mcqUfo2Pks&5Afq)zU$js+TOb)jUda_7R47Vag9!S)U0*yQOm74l*PZ5I3@mRA&f$Vy{# zF?Dtj-Rpb0(ypD~RF-6I4z^0`h=l3KV;|A@euF447wb5!II*SH42vLJIYDdR2!N6Q$P6Xkj(0uDxBhC z%WbG7b*ZSzd`I6pYm<$}*e<@jj%Z5CRRcLzpVa zU3GcP$mECRnXyPc?)mld%$h3xGJwoTnS9wxSqbXu5`-4xmG=7LNOcS!7RKlBlnnJf zFiM8LM^oX7Xfdi(K|21+$w0G`IjW6uo7@cHH?buoOS z&A;M05kgY%mny=B5AgHd@Bs>OlHfHUUi(Gg$3x#)r0eHN{XD7fw=?5D%~i(N$~o%S zOZ|GOKhniN&{d}IGqSg@f4}EGsqb^~yX#*9`ltAfGTr|cX}?A4>+cO5EOOla4+DP` z{&ao)yKTKvKPZs;n1VlDe>m>xisFx0bbb9haDMqBDE+%{#7h^+uwPf9K)<=B>+9cH zn{%|{nl8+`|C&$38$hSuaMNY|J8tWxevj0r?vU +#include +#include +#include +#include + +typedef float precision_t; + +std::vector transform(const std::vector cutPoints, const std::vector& data) +{ + std::vector discretizedData; + discretizedData.reserve(data.size()); + for (const float& item : data) { + auto upper = std::lower_bound(cutPoints.begin(), cutPoints.end(), item); + discretizedData.push_back(upper - cutPoints.begin()); + } + return discretizedData; +} +template +void show_vector(const std::vector& data, std::string title) +{ + std::cout << title << ": "; + std::string sep = ""; + for (const auto& d : data) { + std::cout << sep << d; + sep = ", "; + } + std::cout << std::endl; +} +std::vector linspace(precision_t start, precision_t end, int num) +{ + if (start == end) { + return { start, end }; + } + precision_t delta = (end - start) / static_cast(num - 1); + std::vector linspc; + for (size_t i = 0; i < num - 1; ++i) { + precision_t val = start + delta * static_cast(i); + linspc.push_back(val); + } + return linspc; +} +size_t clip(const size_t n, size_t lower, size_t upper) +{ + return std::max(lower, std::min(n, upper)); +} +std::vector percentile(std::vector& data, std::vector& percentiles) +{ + // Implementation taken from https://dpilger26.github.io/NumCpp/doxygen/html/percentile_8hpp_source.html + std::vector results; + results.reserve(percentiles.size()); + for (auto percentile : percentiles) { + const size_t i = static_cast(std::floor(static_cast(data.size() - 1) * percentile / 100.)); + const auto indexLower = clip(i, 0, data.size() - 2); + const double percentI = static_cast(indexLower) / static_cast(data.size() - 1); + const double fraction = + (percentile / 100.0 - percentI) / + (static_cast(indexLower + 1) / static_cast(data.size() - 1) - percentI); + const auto value = data[indexLower] + (data[indexLower + 1] - data[indexLower]) * fraction; + if (value != results.back()) + results.push_back(value); + } + return results; +} +int main() +{ + // std::vector test; + // std::vector cuts = { 0, 24.75, 49.5, 74.25, 10000 }; + // for (int i = 0; i < 100; ++i) { + // test.push_back(i); + // } + // auto Xt = transform(cuts, test); + // show_vector(Xt, "Discretized data:"); + // std::vector test2 = { 0,1,2,3,4,5,6,7,8,9,10,11 }; + // std::vector cuts2 = { 0,1,2,3,4,5,6,7,8,9 }; + // auto Xt2 = transform(cuts2, test2); + // show_vector(Xt2, "discretized data2: "); + auto quantiles = linspace(0.0, 100.0, 3 + 1); + std::vector data = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; + std::vector cutPoints; + std::sort(data.begin(), data.end()); + cutPoints = percentile(data, quantiles); + cutPoints.push_back(std::numeric_limits::max()); + data.push_back(15); + data.push_back(0); + cutPoints.pop_back(); + cutPoints.erase(cutPoints.begin()); + cutPoints.clear(); + cutPoints.push_back(9.0); + auto Xt = transform(cutPoints, data); + show_vector(data, "Original data"); + show_vector(Xt, "Discretized data"); + show_vector(cutPoints, "Cutpoints"); + return 0; +} +/* +n_bins = 3 +data = [1,2,3,4,5,6,7,8,9,10] +quantiles = np.linspace(0, 100, n_bins + 1) +bin_edges = np.percentile(data, quantiles) + +*/ \ No newline at end of file diff --git a/tests/tests_do.py b/tests/tests_do.py new file mode 100644 index 0000000..3cfb500 --- /dev/null +++ b/tests/tests_do.py @@ -0,0 +1,39 @@ +from sklearn.preprocessing import KBinsDiscretizer + +with open("datasets/tests.txt") as f: + data = f.readlines() + +data = [x.strip() for x in data if x[0] != "#"] + +for i in range(0, len(data), 3): + print("Experiment:", data[i]) + from_, to_, step_, n_bins_, strategy_ = data[i].split(",") + strategy = "quantile" if strategy_.strip() == "Q" else "uniform" + disc = KBinsDiscretizer( + n_bins=int(n_bins_), + encode="ordinal", + strategy=strategy, + ) + X = [[float(x)] for x in range(int(from_), int(to_), int(step_))] + # result = disc.fit_transform(X) + disc.fit(X) + result = disc.transform(X) + result = [int(x) for x in result.flatten()] + expected = [int(x) for x in data[i + 1].split(",")] + assert len(result) == len(expected) + for j in range(len(result)): + if result[j] != expected[j]: + print("Error at", j, "Expected=", expected[j], "Result=", result[j]) + expected_cuts = disc.bin_edges_[0] + computed_cuts = [float(x) for x in data[i + 2].split(",")] + assert len(expected_cuts) == len(computed_cuts) + for j in range(len(expected_cuts)): + if round(expected_cuts[j], 5) != computed_cuts[j]: + print( + "Error at", + j, + "Expected=", + expected_cuts[j], + "Result=", + computed_cuts[j], + ) diff --git a/tests/tests_generate.ipynb b/tests/tests_generate.ipynb new file mode 100644 index 0000000..376c76d --- /dev/null +++ b/tests/tests_generate.ipynb @@ -0,0 +1,85 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import KBinsDiscretizer" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "experiments = [\n", + " [0, 100, 1, 4, \"Q\"],\n", + " [0, 50, 1, 4, \"Q\"],\n", + " [0, 100, 1, 3, \"Q\"],\n", + " [0, 50, 1, 3, \"Q\"],\n", + " [0, 10, 1, 3, \"Q\"],\n", + " [0, 100, 1, 4, \"U\"],\n", + " [0, 50, 1, 4, \"U\"],\n", + " [0, 100, 1, 3, \"U\"],\n", + " [0, 50, 1, 3, \"U\"],\n", + " [0, 10, 1, 3, \"U\"],\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"datasets/tests.txt\", \"w\") as file:\n", + " file.write(\"#\\n\")\n", + " file.write(\"# from, to, step, #bins, Q/U\\n\")\n", + " file.write(\"# discretized data\\n\")\n", + " file.write(\"# cut points\\n\")\n", + " file.write(\"#\\n\")\n", + " for experiment in experiments:\n", + " (from_, to_, step_, bins_, strategy) = experiment\n", + " disc = KBinsDiscretizer(n_bins=bins_, encode='ordinal', strategy='quantile' if strategy.strip() == \"Q\" else 'uniform')\n", + " data = [[x] for x in range(from_, to_, step_)]\n", + " disc.fit(data)\n", + " result = disc.transform(data)\n", + " file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n", + " sep = \"\"\n", + " for res in result:\n", + " file.write(f\"{sep}{int(res):d}\")\n", + " sep= \", \"\n", + " file.write(\"\\n\")\n", + " sep = \"\"\n", + " for res in disc.bin_edges_[0]:\n", + " file.write(sep + str(round(res,5)))\n", + " sep = \", \"\n", + " file.write(\"\\n\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c488ace719267e731a9c53ab1afa2da5cd76f311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 2 Jul 2024 11:50:42 +0200 Subject: [PATCH 2/2] Fix FImdlp tests --- CPPFImdlp.cpp | 2 +- tests/BinDisc_unittest.cpp | 76 ++++++++++++------------- tests/Experiments.hpp | 8 ++- tests/FImdlp_unittest.cpp | 87 ++++++++++++++-------------- tests/datasets/tests.txt | 114 +++++++++++++++++++++++++++++++++++++ tests/tests_do.py | 25 +++++--- tests/tests_generate.ipynb | 74 +++++++++++++++++++----- 7 files changed, 283 insertions(+), 103 deletions(-) diff --git a/CPPFImdlp.cpp b/CPPFImdlp.cpp index f9fc660..7e38497 100644 --- a/CPPFImdlp.cpp +++ b/CPPFImdlp.cpp @@ -25,7 +25,7 @@ namespace mdlp { } if (proposed_cuts < 1) return static_cast(round(static_cast(X.size()) * proposed_cuts)); - return static_cast(proposed_cuts); // As the first and last cutpoints shall be ignored in transform + return static_cast(proposed_cuts); // The 2 extra cutpoints should not be considered here as this parameter is considered before they are added } void CPPFImdlp::fit(samples_t& X_, labels_t& y_) diff --git a/tests/BinDisc_unittest.cpp b/tests/BinDisc_unittest.cpp index cdcc895..8827922 100644 --- a/tests/BinDisc_unittest.cpp +++ b/tests/BinDisc_unittest.cpp @@ -347,44 +347,44 @@ namespace mdlp { labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 }; EXPECT_EQ(expected, labels); } - // TEST_F(TestBinDisc4U, irisUniform) - // { - // ArffFiles file; - // file.load(data_path + "iris.arff", true); - // vector& X = file.getX(); - // fit(X[0]); - // auto Xt = transform(X[0]); - // labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - // EXPECT_EQ(expected, Xt); - // auto Xtt = fit_transform(X[0], file.getY()); - // EXPECT_EQ(expected, Xtt); - // auto Xt_t = torch::tensor(X[0], torch::kFloat32); - // auto y_t = torch::tensor(file.getY(), torch::kInt32); - // auto Xtt_t = fit_transform_t(Xt_t, y_t); - // for (int i = 0; i < expected.size(); i++) - // EXPECT_EQ(expected[i], Xtt_t[i].item()); - // } - // TEST_F(TestBinDisc4Q, irisQuantile) - // { - // ArffFiles file; - // file.load(data_path + "iris.arff", true); - // vector& X = file.getX(); - // fit(X[0]); - // auto Xt = transform(X[0]); - // labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; - // EXPECT_EQ(expected, Xt); - // auto Xtt = fit_transform(X[0], file.getY()); - // EXPECT_EQ(expected, Xtt); - // auto Xt_t = torch::tensor(X[0], torch::kFloat32); - // auto y_t = torch::tensor(file.getY(), torch::kInt32); - // auto Xtt_t = fit_transform_t(Xt_t, y_t); - // for (int i = 0; i < expected.size(); i++) - // EXPECT_EQ(expected[i], Xtt_t[i].item()); - // fit_t(Xt_t, y_t); - // auto Xt_t2 = transform_t(Xt_t); - // for (int i = 0; i < expected.size(); i++) - // EXPECT_EQ(expected[i], Xt_t2[i].item()); - // } + TEST_F(TestBinDisc4U, irisUniform) + { + ArffFiles file; + file.load(data_path + "iris.arff", true); + vector& X = file.getX(); + fit(X[0]); + auto Xt = transform(X[0]); + labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; + EXPECT_EQ(expected, Xt); + auto Xtt = fit_transform(X[0], file.getY()); + EXPECT_EQ(expected, Xtt); + auto Xt_t = torch::tensor(X[0], torch::kFloat32); + auto y_t = torch::tensor(file.getY(), torch::kInt32); + auto Xtt_t = fit_transform_t(Xt_t, y_t); + for (int i = 0; i < expected.size(); i++) + EXPECT_EQ(expected[i], Xtt_t[i].item()); + } + TEST_F(TestBinDisc4Q, irisQuantile) + { + ArffFiles file; + file.load(data_path + "iris.arff", true); + vector& X = file.getX(); + fit(X[0]); + auto Xt = transform(X[0]); + labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; + EXPECT_EQ(expected, Xt); + auto Xtt = fit_transform(X[0], file.getY()); + EXPECT_EQ(expected, Xtt); + auto Xt_t = torch::tensor(X[0], torch::kFloat32); + auto y_t = torch::tensor(file.getY(), torch::kInt32); + auto Xtt_t = fit_transform_t(Xt_t, y_t); + for (int i = 0; i < expected.size(); i++) + EXPECT_EQ(expected[i], Xtt_t[i].item()); + fit_t(Xt_t, y_t); + auto Xt_t2 = transform_t(Xt_t); + for (int i = 0; i < expected.size(); i++) + EXPECT_EQ(expected[i], Xt_t2[i].item()); + } TEST(TestBinDiscGeneric, Fileset) { Experiments exps(data_path + "tests.txt"); diff --git a/tests/Experiments.hpp b/tests/Experiments.hpp index 166c5fb..b41e84a 100644 --- a/tests/Experiments.hpp +++ b/tests/Experiments.hpp @@ -76,7 +76,13 @@ private: } Experiment parse_experiment(std::string& line) { - auto [from_, to_, step_, n_bins, strategy] = parse_header(line); + if (line == "RANGE") { + std::getline(test_file, line); + auto [from_, to_, step_, n_bins, strategy] = parse_header(line); + } else { + std::getline(test_file, line); + + } std::getline(test_file, line); auto data_discretized = parse_vector(line); std::getline(test_file, line); diff --git a/tests/FImdlp_unittest.cpp b/tests/FImdlp_unittest.cpp index b439631..d68b983 100644 --- a/tests/FImdlp_unittest.cpp +++ b/tests/FImdlp_unittest.cpp @@ -124,7 +124,7 @@ namespace mdlp { { samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 }; labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 }; - cutPoints_t expected = { 1.5f, 2.5f }; + cutPoints_t expected = { 1.0, 1.5f, 2.5f, 4.0 }; fit(X_, y_); auto computed = getCutPoints(); EXPECT_EQ(computed.size(), expected.size()); @@ -167,29 +167,31 @@ namespace mdlp { y = { 1 }; fit(X, y); computed = getCutPoints(); - EXPECT_EQ(computed.size(), 0); + EXPECT_EQ(computed.size(), 2); X = { 1, 3 }; y = { 1, 2 }; fit(X, y); computed = getCutPoints(); - EXPECT_EQ(computed.size(), 0); + EXPECT_EQ(computed.size(), 2); X = { 2, 4 }; y = { 1, 2 }; fit(X, y); computed = getCutPoints(); - EXPECT_EQ(computed.size(), 0); + EXPECT_EQ(computed.size(), 2); X = { 1, 2, 3 }; y = { 1, 2, 2 }; fit(X, y); computed = getCutPoints(); - EXPECT_EQ(computed.size(), 1); - EXPECT_NEAR(computed[0], 1.5, precision); + EXPECT_EQ(computed.size(), 3); + EXPECT_NEAR(computed[0], 1, precision); + EXPECT_NEAR(computed[1], 1.5, precision); + EXPECT_NEAR(computed[2], 3, precision); } TEST_F(TestFImdlp, TestArtificialDataset) { fit(X, y); - cutPoints_t expected = { 5.05f }; + cutPoints_t expected = { 4.7, 5.05, 6.0 }; vector computed = getCutPoints(); EXPECT_EQ(computed.size(), expected.size()); for (unsigned long i = 0; i < computed.size(); i++) { @@ -200,10 +202,10 @@ namespace mdlp { TEST_F(TestFImdlp, TestIris) { vector expected = { - {5.45f, 5.75f}, - {2.75f, 2.85f, 2.95f, 3.05f, 3.35f}, - {2.45f, 4.75f, 5.05f}, - {0.8f, 1.75f} + {4.3, 5.45f, 5.75f, 7.9}, + {2, 2.75f, 2.85f, 2.95f, 3.05f, 3.35f, 4.4}, + {1, 2.45f, 4.75f, 5.05f, 6.9}, + {0.1, 0.8f, 1.75f, 2.5} }; vector depths = { 3, 5, 4, 3 }; auto test = CPPFImdlp(); @@ -213,7 +215,7 @@ namespace mdlp { TEST_F(TestFImdlp, ComputeCutPointsGCase) { cutPoints_t expected; - expected = { 1.5 }; + expected = { 0, 1.5, 2 }; samples_t X_ = { 0, 1, 2, 2, 2 }; labels_t y_ = { 1, 1, 1, 2, 2 }; fit(X_, y_); @@ -247,10 +249,10 @@ namespace mdlp { // Set max_depth to 1 auto test = CPPFImdlp(3, 1, 0); vector expected = { - {5.45f}, - {3.35f}, - {2.45f}, - {0.8f} + {4.3, 5.45f, 7.9}, + {2, 3.35f, 4.4}, + {1, 2.45f, 6.9}, + {0.1, 0.8f, 2.5} }; vector depths = { 1, 1, 1, 1 }; test_dataset(test, "iris", expected, depths); @@ -261,10 +263,10 @@ namespace mdlp { auto test = CPPFImdlp(75, 100, 0); // Set min_length to 75 vector expected = { - {5.45f, 5.75f}, - {2.85f, 3.35f}, - {2.45f, 4.75f}, - {0.8f, 1.75f} + {4.3, 5.45f, 5.75f, 7.9}, + {2, 2.85f, 3.35f, 4.4}, + {1, 2.45f, 4.75f, 6.9}, + {0.1, 0.8f, 1.75f, 2.5} }; vector depths = { 3, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); @@ -275,10 +277,10 @@ namespace mdlp { // Set min_length to 75 auto test = CPPFImdlp(75, 2, 0); vector expected = { - {5.45f, 5.75f}, - {2.85f, 3.35f}, - {2.45f, 4.75f}, - {0.8f, 1.75f} + {4.3, 5.45f, 5.75f, 7.9}, + {2, 2.85f, 3.35f, 4.4}, + {1, 2.45f, 4.75f, 6.9}, + {0.1, 0.8f, 1.75f, 2.5} }; vector depths = { 2, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); @@ -289,10 +291,10 @@ namespace mdlp { // Set min_length to 75 auto test = CPPFImdlp(75, 2, 1); vector expected = { - {5.45f}, - {2.85f}, - {2.45f}, - {0.8f} + {4.3, 5.45f, 7.9}, + {2, 2.85f, 4.4}, + {1, 2.45f, 6.9}, + {0.1, 0.8f, 2.5} }; vector depths = { 2, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); @@ -304,10 +306,10 @@ namespace mdlp { // Set min_length to 75 auto test = CPPFImdlp(75, 2, 0.2f); vector expected = { - {5.45f, 5.75f}, - {2.85f, 3.35f}, - {2.45f, 4.75f}, - {0.8f, 1.75f} + {4.3, 5.45f, 5.75f, 7.9}, + {2, 2.85f, 3.35f, 4.4}, + {1, 2.45f, 4.75f, 6.9}, + {0.1, 0.8f, 1.75f, 2.5} }; vector depths = { 2, 2, 2, 2 }; test_dataset(test, "iris", expected, depths); @@ -327,7 +329,6 @@ namespace mdlp { computed = compute_max_num_cut_points(); ASSERT_EQ(expected, computed); } - } TEST_F(TestFImdlp, TransformTest) { @@ -345,15 +346,15 @@ namespace mdlp { vector& X = file.getX(); labels_t& y = file.getY(); fit(X[1], y); - // auto computed = transform(X[1]); - // EXPECT_EQ(computed.size(), expected.size()); - // for (unsigned long i = 0; i < computed.size(); i++) { - // EXPECT_EQ(computed[i], expected[i]); - // } - // auto computed_ft = fit_transform(X[1], y); - // EXPECT_EQ(computed_ft.size(), expected.size()); - // for (unsigned long i = 0; i < computed_ft.size(); i++) { - // EXPECT_EQ(computed_ft[i], expected[i]); - // } + auto computed = transform(X[1]); + EXPECT_EQ(computed.size(), expected.size()); + for (unsigned long i = 0; i < computed.size(); i++) { + EXPECT_EQ(computed[i], expected[i]); + } + auto computed_ft = fit_transform(X[1], y); + EXPECT_EQ(computed_ft.size(), expected.size()); + for (unsigned long i = 0; i < computed_ft.size(); i++) { + EXPECT_EQ(computed_ft[i], expected[i]); + } } } diff --git a/tests/datasets/tests.txt b/tests/datasets/tests.txt index 6712244..91e772e 100644 --- a/tests/datasets/tests.txt +++ b/tests/datasets/tests.txt @@ -3,33 +3,147 @@ # discretized data # cut points # +RANGE 0, 100, 1, 4, Q 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 0.0, 24.75, 49.5, 74.25, 99.0 +RANGE 0, 50, 1, 4, Q 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 0.0, 12.25, 24.5, 36.75, 49.0 +RANGE 0, 100, 1, 3, Q 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 0.0, 33.0, 66.0, 99.0 +RANGE 0, 50, 1, 3, Q 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 0.0, 16.33333, 32.66667, 49.0 +RANGE 0, 10, 1, 3, Q 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 0.0, 3.0, 6.0, 9.0 +RANGE 0, 100, 1, 4, U 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 0.0, 24.75, 49.5, 74.25, 99.0 +RANGE 0, 50, 1, 4, U 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 0.0, 12.25, 24.5, 36.75, 49.0 +RANGE 0, 100, 1, 3, U 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 0.0, 33.0, 66.0, 99.0 +RANGE 0, 50, 1, 3, U 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 0.0, 16.33333, 32.66667, 49.0 +RANGE 0, 10, 1, 3, U 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 0.0, 3.0, 6.0, 9.0 +RANGE +1, 10, 1, 3, Q +0, 0, 0, 1, 1, 1, 2, 2, 2 +1.0, 3.66667, 6.33333, 9.0 +RANGE +1, 10, 1, 3, U +0, 0, 0, 1, 1, 1, 2, 2, 2 +1.0, 3.66667, 6.33333, 9.0 +RANGE +1, 11, 1, 3, Q +0, 0, 0, 1, 1, 1, 1, 2, 2, 2 +1.0, 4.0, 7.0, 10.0 +RANGE +1, 11, 1, 3, U +0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.0, 7.0, 10.0 +RANGE +1, 12, 1, 3, Q +0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.33333, 7.66667, 11.0 +RANGE +1, 12, 1, 3, U +0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.33333, 7.66667, 11.0 +RANGE +1, 13, 1, 3, Q +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.66667, 8.33333, 12.0 +RANGE +1, 13, 1, 3, U +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.66667, 8.33333, 12.0 +RANGE +1, 14, 1, 3, Q +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.0, 9.0, 13.0 +RANGE +1, 14, 1, 3, U +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.0, 9.0, 13.0 +RANGE +1, 15, 1, 3, Q +0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.33333, 9.66667, 14.0 +RANGE +1, 15, 1, 3, U +0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.33333, 9.66667, 14.0 +VECTOR +Q3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0] +1, 0, 0, 1, 0, 0, 1, 0, 0 +1.0, 1.66667, 3.0 +VECTOR +U3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0] +2, 0, 0, 2, 0, 0, 2, 0, 0 +1.0, 1.66667, 2.33333, 3.0 +VECTOR +Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0] +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.66667, 8.33333, 12.0 +VECTOR +U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0] +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 +1.0, 4.66667, 8.33333, 12.0 +VECTOR +Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0] +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.0, 9.0, 13.0 +VECTOR +U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0] +0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.0, 9.0, 13.0 +VECTOR +Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0] +0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.33333, 9.66667, 14.0 +VECTOR +U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0] +0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.33333, 9.66667, 14.0 +VECTOR +Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0] +0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.66667, 10.33333, 15.0 +VECTOR +U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0] +0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 +1.0, 5.66667, 10.33333, 15.0 +VECTOR +Q3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] +2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0 +1.0, 5.66667, 10.33333, 15.0 +VECTOR +U3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0] +2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0 +1.0, 5.66667, 10.33333, 15.0 +VECTOR +Q3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0] +0, 0, 0, 0, 1, 1, 2, 2, 2, 2 +0.0, 1.0, 3.0, 4.0 +VECTOR +U3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0] +0, 0, 0, 0, 1, 1, 2, 2, 2, 2 +0.0, 1.33333, 2.66667, 4.0 diff --git a/tests/tests_do.py b/tests/tests_do.py index 3cfb500..95a2c26 100644 --- a/tests/tests_do.py +++ b/tests/tests_do.py @@ -1,3 +1,4 @@ +import json from sklearn.preprocessing import KBinsDiscretizer with open("datasets/tests.txt") as f: @@ -5,27 +6,37 @@ with open("datasets/tests.txt") as f: data = [x.strip() for x in data if x[0] != "#"] -for i in range(0, len(data), 3): - print("Experiment:", data[i]) - from_, to_, step_, n_bins_, strategy_ = data[i].split(",") +for i in range(0, len(data), 4): + experiment_type = data[i] + print("Experiment:", data[i + 1]) + if experiment_type == "RANGE": + range_data = data[i + 1] + from_, to_, step_, n_bins_, strategy_ = range_data.split(",") + X = [[float(x)] for x in range(int(from_), int(to_), int(step_))] + else: + strategy_ = data[i + 1][0] + n_bins_ = data[i + 1][1] + vector = data[i + 1][2:] + X = [[float(x)] for x in json.loads(vector)] + strategy = "quantile" if strategy_.strip() == "Q" else "uniform" disc = KBinsDiscretizer( n_bins=int(n_bins_), encode="ordinal", strategy=strategy, ) - X = [[float(x)] for x in range(int(from_), int(to_), int(step_))] - # result = disc.fit_transform(X) + expected_data = data[i + 2] + cuts_data = data[i + 3] disc.fit(X) result = disc.transform(X) result = [int(x) for x in result.flatten()] - expected = [int(x) for x in data[i + 1].split(",")] + expected = [int(x) for x in expected_data.split(",")] assert len(result) == len(expected) for j in range(len(result)): if result[j] != expected[j]: print("Error at", j, "Expected=", expected[j], "Result=", result[j]) expected_cuts = disc.bin_edges_[0] - computed_cuts = [float(x) for x in data[i + 2].split(",")] + computed_cuts = [float(x) for x in cuts_data.split(",")] assert len(expected_cuts) == len(computed_cuts) for j in range(len(expected_cuts)): if round(expected_cuts[j], 5) != computed_cuts[j]: diff --git a/tests/tests_generate.ipynb b/tests/tests_generate.ipynb index 376c76d..467ce2f 100644 --- a/tests/tests_generate.ipynb +++ b/tests/tests_generate.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "experiments = [\n", + "experiments_range = [\n", " [0, 100, 1, 4, \"Q\"],\n", " [0, 50, 1, 4, \"Q\"],\n", " [0, 100, 1, 3, \"Q\"],\n", @@ -25,7 +25,29 @@ " [0, 50, 1, 4, \"U\"],\n", " [0, 100, 1, 3, \"U\"],\n", " [0, 50, 1, 3, \"U\"],\n", + "# \n", " [0, 10, 1, 3, \"U\"],\n", + " [1, 10, 1, 3, \"Q\"],\n", + " [1, 10, 1, 3, \"U\"],\n", + " [1, 11, 1, 3, \"Q\"],\n", + " [1, 11, 1, 3, \"U\"],\n", + " [1, 12, 1, 3, \"Q\"],\n", + " [1, 12, 1, 3, \"U\"],\n", + " [1, 13, 1, 3, \"Q\"],\n", + " [1, 13, 1, 3, \"U\"],\n", + " [1, 14, 1, 3, \"Q\"],\n", + " [1, 14, 1, 3, \"U\"],\n", + " [1, 15, 1, 3, \"Q\"],\n", + " [1, 15, 1, 3, \"U\"]\n", + "]\n", + "experiments_vectors = [\n", + " (3, [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]),\n", + " (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]),\n", + " (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]),\n", + " (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]),\n", + " (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]),\n", + " (3, [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]),\n", + " (3, [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0])\n", "]" ] }, @@ -33,31 +55,57 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/rmontanana/miniconda3/lib/python3.11/site-packages/sklearn/preprocessing/_discretization.py:307: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n", + " warnings.warn(\n" + ] + } + ], "source": [ + "def write_lists(file, data, cuts):\n", + " sep = \"\"\n", + " for res in data:\n", + " file.write(f\"{sep}{int(res):d}\")\n", + " sep= \", \"\n", + " file.write(\"\\n\")\n", + " sep = \"\"\n", + " for res in cuts:\n", + " file.write(sep + str(round(res,5)))\n", + " sep = \", \"\n", + " file.write(\"\\n\")\n", + "\n", "with open(\"datasets/tests.txt\", \"w\") as file:\n", " file.write(\"#\\n\")\n", " file.write(\"# from, to, step, #bins, Q/U\\n\")\n", " file.write(\"# discretized data\\n\")\n", " file.write(\"# cut points\\n\")\n", " file.write(\"#\\n\")\n", - " for experiment in experiments:\n", + " for experiment in experiments_range:\n", + " file.write(\"RANGE\\n\")\n", " (from_, to_, step_, bins_, strategy) = experiment\n", " disc = KBinsDiscretizer(n_bins=bins_, encode='ordinal', strategy='quantile' if strategy.strip() == \"Q\" else 'uniform')\n", " data = [[x] for x in range(from_, to_, step_)]\n", " disc.fit(data)\n", " result = disc.transform(data)\n", " file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n", - " sep = \"\"\n", - " for res in result:\n", - " file.write(f\"{sep}{int(res):d}\")\n", - " sep= \", \"\n", - " file.write(\"\\n\")\n", - " sep = \"\"\n", - " for res in disc.bin_edges_[0]:\n", - " file.write(sep + str(round(res,5)))\n", - " sep = \", \"\n", - " file.write(\"\\n\")" + " write_lists(file, result, disc.bin_edges_[0])\n", + " for n_bins, experiment in experiments_vectors:\n", + " for strategy in [\"Q\", \"U\"]:\n", + " file.write(\"VECTOR\\n\")\n", + " file.write(f\"{strategy}{n_bins}{experiment}\\n\")\n", + " disc = KBinsDiscretizer(\n", + " n_bins=n_bins,\n", + " encode=\"ordinal\",\n", + " \n", + " strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\",\n", + " )\n", + " data = [[x] for x in experiment]\n", + " result = disc.fit_transform(data)\n", + " write_lists(file, result, disc.bin_edges_[0])" ] } ],