From d90e21ec4c50da01c2f1c668feb538f46e5a996f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Tue, 2 Jul 2024 18:54:07 +0200 Subject: [PATCH] Include upper_bound and lower_bound implementation --- BinDisc.cpp | 2 + CPPFImdlp.cpp | 1 + Discretizer.cpp | 41 +- Discretizer.h | 5 + tests/BinDisc_unittest.cpp | 688 ++++++++++++++++----------------- tests/Discretizer_unittest.cpp | 205 ++++++++-- tests/Experiments.hpp | 73 ++-- tests/datasets/tests.txt | 73 ++++ tests/tests_do.py | 13 +- tests/tests_generate.ipynb | 93 ++++- 10 files changed, 782 insertions(+), 412 deletions(-) diff --git a/BinDisc.cpp b/BinDisc.cpp index afc2e8d..8b3028c 100644 --- a/BinDisc.cpp +++ b/BinDisc.cpp @@ -24,8 +24,10 @@ namespace mdlp { return; } if (strategy == strategy_t::QUANTILE) { + direction = bound_dir_t::RIGHT; fit_quantile(X); } else if (strategy == strategy_t::UNIFORM) { + direction = bound_dir_t::RIGHT; fit_uniform(X); } } diff --git a/CPPFImdlp.cpp b/CPPFImdlp.cpp index 7e38497..0c04a63 100644 --- a/CPPFImdlp.cpp +++ b/CPPFImdlp.cpp @@ -12,6 +12,7 @@ namespace mdlp { max_depth(max_depth_), proposed_cuts(proposed) { + direction = bound_dir_t::LEFT; } size_t CPPFImdlp::compute_max_num_cut_points() const diff --git a/Discretizer.cpp b/Discretizer.cpp index 1a30d38..f0d63e7 100644 --- a/Discretizer.cpp +++ b/Discretizer.cpp @@ -1,17 +1,52 @@ #include "Discretizer.h" namespace mdlp { + // The next to templates have been taken to have the chance to customize them to match + // np.searchsorted that is used in scikit-learn KBinsDiscretizer + // Code Taken from https://cplusplus.com/reference/algorithm/upper_bound/?kw=upper_bound + template + ForwardIterator upper_bound(ForwardIterator first, ForwardIterator last, const T& val) + { + ForwardIterator it; + typename iterator_traits::difference_type count, step; + count = std::distance(first, last); + while (count > 0) { + it = first; step = count / 2; std::advance(it, step); + if (!(val < *it)) // or: if (!comp(val,*it)), for version (2) + { + first = ++it; count -= step + 1; + } else count = step; + } + return first; + } + // Code Taken from https://cplusplus.com/reference/algorithm/lower_bound/?kw=lower_bound + template + ForwardIterator lower_bound(ForwardIterator first, ForwardIterator last, const T& val) + { + ForwardIterator it; + typename iterator_traits::difference_type count, step; + count = distance(first, last); + while (count > 0) { + it = first; step = count / 2; advance(it, step); + if (*it < val) { // or: if (comp(*it,val)), for version (2) + first = ++it; + count -= step + 1; + } else count = step; + } + return first; + } labels_t& Discretizer::transform(const samples_t& data) { discretizedData.clear(); discretizedData.reserve(data.size()); - // CutPoints always have more than two items + // CutPoints always have at least two items // Have to ignore first and last cut points provided auto first = cutPoints.begin() + 1; auto last = cutPoints.end() - 1; + auto bound = direction == bound_dir_t::LEFT ? my_lower_bound::iterator, float> : my_upper_bound::iterator, float>; for (const precision_t& item : data) { - auto upper = std::lower_bound(first, last, item); - int number = upper - first; + auto pos = bound(first, last, item); + int number = pos - first; /* OJO */ diff --git a/Discretizer.h b/Discretizer.h index 0c7fafe..de700b2 100644 --- a/Discretizer.h +++ b/Discretizer.h @@ -7,6 +7,10 @@ #include "typesFImdlp.h" namespace mdlp { + enum class bound_dir_t { + LEFT, + RIGHT + }; class Discretizer { public: Discretizer() = default; @@ -22,6 +26,7 @@ namespace mdlp { protected: labels_t discretizedData = labels_t(); cutPoints_t cutPoints; // At least two cutpoints must be provided, the first and the last will be ignored in transform + bound_dir_t direction; // used in transform }; } #endif diff --git a/tests/BinDisc_unittest.cpp b/tests/BinDisc_unittest.cpp index 8827922..51085dc 100644 --- a/tests/BinDisc_unittest.cpp +++ b/tests/BinDisc_unittest.cpp @@ -35,382 +35,360 @@ namespace mdlp { public: TestBinDisc4Q(int n_bins = 4) : BinDisc(n_bins, strategy_t::QUANTILE) {}; }; - TEST_F(TestBinDisc3U, Easy3BinsUniform) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; - auto y = labels_t(); - fit(X, y); - auto cuts = getCutPoints(); - ASSERT_EQ(4, cuts.size()); - EXPECT_NEAR(1, cuts.at(0), margin); - EXPECT_NEAR(3.66667, cuts.at(1), margin); - EXPECT_NEAR(6.33333, cuts.at(2), margin); - EXPECT_NEAR(9.0, cuts.at(3), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc3Q, Easy3BinsQuantile) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(4, cuts.size()); - EXPECT_NEAR(1, cuts[0], margin); - EXPECT_NEAR(3.666667, cuts[1], margin); - EXPECT_NEAR(6.333333, cuts[2], margin); - EXPECT_NEAR(9, cuts[3], margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc3U, X10BinsUniform) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(4, cuts.size()); - EXPECT_NEAR(1, cuts.at(0), margin); - EXPECT_NEAR(4.0, cuts.at(1), margin); - EXPECT_NEAR(7.0, cuts.at(2), margin); - EXPECT_NEAR(10.0, cuts.at(3), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc3Q, X10BinsQuantile) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(4, cuts.size()); - EXPECT_NEAR(1, cuts.at(0), margin); - EXPECT_NEAR(4.0, cuts.at(1), margin); - EXPECT_NEAR(7.0, cuts.at(2), margin); - EXPECT_NEAR(10.0, cuts.at(3), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc3U, X11BinsUniform) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(4, cuts.size()); - EXPECT_NEAR(1, cuts.at(0), margin); - EXPECT_NEAR(4.33333, cuts.at(1), margin); - EXPECT_NEAR(7.66667, cuts.at(2), margin); - EXPECT_NEAR(11.0, cuts.at(3), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc3U, X11BinsQuantile) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(4, cuts.size()); - EXPECT_NEAR(1, cuts.at(0), margin); - EXPECT_NEAR(4.33333, cuts.at(1), margin); - EXPECT_NEAR(7.66667, cuts.at(2), margin); - EXPECT_NEAR(11.0, cuts.at(3), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc3U, ConstantUniform) - { - samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(2, cuts.size()); - EXPECT_NEAR(1, cuts.at(0), margin); - EXPECT_NEAR(1, cuts.at(1), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 0, 0 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc3Q, ConstantQuantile) - { - samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(2, cuts.size()); - EXPECT_NEAR(1, cuts.at(0), margin); - EXPECT_NEAR(1, cuts.at(1), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 0, 0 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc3U, EmptyUniform) - { - samples_t X = {}; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(2, cuts.size()); - EXPECT_NEAR(0, cuts.at(0), margin); - EXPECT_NEAR(0, cuts.at(1), margin); - } - TEST_F(TestBinDisc3Q, EmptyQuantile) - { - samples_t X = {}; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(2, cuts.size()); - EXPECT_NEAR(0, cuts.at(0), margin); - EXPECT_NEAR(0, cuts.at(1), margin); - } - TEST(TestBinDisc3, ExceptionNumberBins) - { - EXPECT_THROW(BinDisc(2), std::invalid_argument); - } - TEST_F(TestBinDisc3U, EasyRepeated) - { - samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(4, cuts.size()); - EXPECT_NEAR(1, cuts.at(0), margin); - EXPECT_NEAR(1.66667, cuts.at(1), margin); - EXPECT_NEAR(2.33333, cuts.at(2), margin); - EXPECT_NEAR(3.0, cuts.at(3), margin); - auto labels = transform(X); - labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 }; - EXPECT_EQ(expected, labels); - ASSERT_EQ(3.0, X[0]); // X is not modified - } - TEST_F(TestBinDisc3Q, EasyRepeated) - { - samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(3, cuts.size()); - EXPECT_NEAR(1, cuts.at(0), margin); - EXPECT_NEAR(1.66667, cuts.at(1), margin); - EXPECT_NEAR(3.0, cuts.at(2), margin); - auto labels = transform(X); - labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 }; - EXPECT_EQ(expected, labels); - ASSERT_EQ(3.0, X[0]); // X is not modified - } - TEST_F(TestBinDisc4U, Easy4BinsUniform) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(1.0, cuts.at(0), margin); - EXPECT_NEAR(3.75, cuts.at(1), margin); - EXPECT_NEAR(6.5, cuts.at(2), margin); - EXPECT_NEAR(9.25, cuts.at(3), margin); - EXPECT_NEAR(12.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4Q, Easy4BinsQuantile) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(1.0, cuts.at(0), margin); - EXPECT_NEAR(3.75, cuts.at(1), margin); - EXPECT_NEAR(6.5, cuts.at(2), margin); - EXPECT_NEAR(9.25, cuts.at(3), margin); - EXPECT_NEAR(12.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4U, X13BinsUniform) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(1.0, cuts.at(0), margin); - EXPECT_NEAR(4.0, cuts.at(1), margin); - EXPECT_NEAR(7.0, cuts.at(2), margin); - EXPECT_NEAR(10.0, cuts.at(3), margin); - EXPECT_NEAR(13.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4Q, X13BinsQuantile) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(1.0, cuts.at(0), margin); - EXPECT_NEAR(4.0, cuts.at(1), margin); - EXPECT_NEAR(7.0, cuts.at(2), margin); - EXPECT_NEAR(10.0, cuts.at(3), margin); - EXPECT_NEAR(13.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4U, X14BinsUniform) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(1.0, cuts.at(0), margin); - EXPECT_NEAR(4.25, cuts.at(1), margin); - EXPECT_NEAR(7.5, cuts.at(2), margin); - EXPECT_NEAR(10.75, cuts.at(3), margin); - EXPECT_NEAR(14.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4Q, X14BinsQuantile) - { - samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(1.0, cuts.at(0), margin); - EXPECT_NEAR(4.25, cuts.at(1), margin); - EXPECT_NEAR(7.5, cuts.at(2), margin); - EXPECT_NEAR(10.75, cuts.at(3), margin); - EXPECT_NEAR(14.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4U, X15BinsUniform) - { - samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(1.0, cuts.at(0), margin); - EXPECT_NEAR(4.5, cuts.at(1), margin); - EXPECT_NEAR(8, cuts.at(2), margin); - EXPECT_NEAR(11.5, cuts.at(3), margin); - EXPECT_NEAR(15.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 3, 1, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4Q, X15BinsQuantile) - { - samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(1.0, cuts.at(0), margin); - EXPECT_NEAR(4.5, cuts.at(1), margin); - EXPECT_NEAR(8, cuts.at(2), margin); - EXPECT_NEAR(11.5, cuts.at(3), margin); - EXPECT_NEAR(15.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 3, 3, 3, 3, 1, 0, 1, 2, 2, 2, 1, 0, 0, 1, 0 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4U, RepeatedValuesUniform) - { - samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 }; - // 0 1 2 3 4 5 6 7 8 9 - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(0.0, cuts.at(0), margin); - EXPECT_NEAR(1.0, cuts.at(1), margin); - EXPECT_NEAR(2.0, cuts.at(2), margin); - EXPECT_NEAR(3.0, cuts.at(3), margin); - EXPECT_NEAR(4.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4Q, RepeatedValuesQuantile) - { - samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 }; - // 0 1 2 3 4 5 6 7 8 9 - fit(X); - auto cuts = getCutPoints(); - ASSERT_EQ(5, cuts.size()); - EXPECT_NEAR(0.0, cuts.at(0), margin); - EXPECT_NEAR(1.0, cuts.at(1), margin); - EXPECT_NEAR(2.0, cuts.at(2), margin); - EXPECT_NEAR(3.0, cuts.at(3), margin); - EXPECT_NEAR(4.0, cuts.at(4), margin); - auto labels = transform(X); - labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 }; - EXPECT_EQ(expected, labels); - } - TEST_F(TestBinDisc4U, irisUniform) - { - ArffFiles file; - file.load(data_path + "iris.arff", true); - vector& X = file.getX(); - fit(X[0]); - auto Xt = transform(X[0]); - labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - EXPECT_EQ(expected, Xt); - auto Xtt = fit_transform(X[0], file.getY()); - EXPECT_EQ(expected, Xtt); - auto Xt_t = torch::tensor(X[0], torch::kFloat32); - auto y_t = torch::tensor(file.getY(), torch::kInt32); - auto Xtt_t = fit_transform_t(Xt_t, y_t); - for (int i = 0; i < expected.size(); i++) - EXPECT_EQ(expected[i], Xtt_t[i].item()); - } - TEST_F(TestBinDisc4Q, irisQuantile) - { - ArffFiles file; - file.load(data_path + "iris.arff", true); - vector& X = file.getX(); - fit(X[0]); - auto Xt = transform(X[0]); - labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; - EXPECT_EQ(expected, Xt); - auto Xtt = fit_transform(X[0], file.getY()); - EXPECT_EQ(expected, Xtt); - auto Xt_t = torch::tensor(X[0], torch::kFloat32); - auto y_t = torch::tensor(file.getY(), torch::kInt32); - auto Xtt_t = fit_transform_t(Xt_t, y_t); - for (int i = 0; i < expected.size(); i++) - EXPECT_EQ(expected[i], Xtt_t[i].item()); - fit_t(Xt_t, y_t); - auto Xt_t2 = transform_t(Xt_t); - for (int i = 0; i < expected.size(); i++) - EXPECT_EQ(expected[i], Xt_t2[i].item()); - } + // TEST_F(TestBinDisc3U, Easy3BinsUniform) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; + // auto y = labels_t(); + // fit(X, y); + // auto cuts = getCutPoints(); + // ASSERT_EQ(4, cuts.size()); + // EXPECT_NEAR(1, cuts.at(0), margin); + // EXPECT_NEAR(3.66667, cuts.at(1), margin); + // EXPECT_NEAR(6.33333, cuts.at(2), margin); + // EXPECT_NEAR(9.0, cuts.at(3), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc3Q, Easy3BinsQuantile) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(4, cuts.size()); + // EXPECT_NEAR(1, cuts[0], margin); + // EXPECT_NEAR(3.666667, cuts[1], margin); + // EXPECT_NEAR(6.333333, cuts[2], margin); + // EXPECT_NEAR(9, cuts[3], margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc3U, X10BinsUniform) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(4, cuts.size()); + // EXPECT_NEAR(1, cuts.at(0), margin); + // EXPECT_NEAR(4.0, cuts.at(1), margin); + // EXPECT_NEAR(7.0, cuts.at(2), margin); + // EXPECT_NEAR(10.0, cuts.at(3), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc3Q, X10BinsQuantile) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(4, cuts.size()); + // EXPECT_NEAR(1, cuts.at(0), margin); + // EXPECT_NEAR(4.0, cuts.at(1), margin); + // EXPECT_NEAR(7.0, cuts.at(2), margin); + // EXPECT_NEAR(10.0, cuts.at(3), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc3U, X11BinsUniform) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(4, cuts.size()); + // EXPECT_NEAR(1, cuts.at(0), margin); + // EXPECT_NEAR(4.33333, cuts.at(1), margin); + // EXPECT_NEAR(7.66667, cuts.at(2), margin); + // EXPECT_NEAR(11.0, cuts.at(3), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc3U, X11BinsQuantile) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(4, cuts.size()); + // EXPECT_NEAR(1, cuts.at(0), margin); + // EXPECT_NEAR(4.33333, cuts.at(1), margin); + // EXPECT_NEAR(7.66667, cuts.at(2), margin); + // EXPECT_NEAR(11.0, cuts.at(3), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc3U, ConstantUniform) + // { + // samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(2, cuts.size()); + // EXPECT_NEAR(1, cuts.at(0), margin); + // EXPECT_NEAR(1, cuts.at(1), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 0, 0 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc3Q, ConstantQuantile) + // { + // samples_t X = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(2, cuts.size()); + // EXPECT_NEAR(1, cuts.at(0), margin); + // EXPECT_NEAR(1, cuts.at(1), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 0, 0 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc3U, EmptyUniform) + // { + // samples_t X = {}; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(2, cuts.size()); + // EXPECT_NEAR(0, cuts.at(0), margin); + // EXPECT_NEAR(0, cuts.at(1), margin); + // } + // TEST_F(TestBinDisc3Q, EmptyQuantile) + // { + // samples_t X = {}; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(2, cuts.size()); + // EXPECT_NEAR(0, cuts.at(0), margin); + // EXPECT_NEAR(0, cuts.at(1), margin); + // } + // TEST(TestBinDisc3, ExceptionNumberBins) + // { + // EXPECT_THROW(BinDisc(2), std::invalid_argument); + // } + // TEST_F(TestBinDisc3U, EasyRepeated) + // { + // samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(4, cuts.size()); + // EXPECT_NEAR(1, cuts.at(0), margin); + // EXPECT_NEAR(1.66667, cuts.at(1), margin); + // EXPECT_NEAR(2.33333, cuts.at(2), margin); + // EXPECT_NEAR(3.0, cuts.at(3), margin); + // auto labels = transform(X); + // labels_t expected = { 2, 0, 0, 2, 0, 0, 2, 0, 0 }; + // EXPECT_EQ(expected, labels); + // ASSERT_EQ(3.0, X[0]); // X is not modified + // } + // TEST_F(TestBinDisc3Q, EasyRepeated) + // { + // samples_t X = { 3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(3, cuts.size()); + // EXPECT_NEAR(1, cuts.at(0), margin); + // EXPECT_NEAR(1.66667, cuts.at(1), margin); + // EXPECT_NEAR(3.0, cuts.at(2), margin); + // auto labels = transform(X); + // labels_t expected = { 1, 0, 0, 1, 0, 0, 1, 0, 0 }; + // EXPECT_EQ(expected, labels); + // ASSERT_EQ(3.0, X[0]); // X is not modified + // } + // TEST_F(TestBinDisc4U, Easy4BinsUniform) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(1.0, cuts.at(0), margin); + // EXPECT_NEAR(3.75, cuts.at(1), margin); + // EXPECT_NEAR(6.5, cuts.at(2), margin); + // EXPECT_NEAR(9.25, cuts.at(3), margin); + // EXPECT_NEAR(12.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc4Q, Easy4BinsQuantile) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(1.0, cuts.at(0), margin); + // EXPECT_NEAR(3.75, cuts.at(1), margin); + // EXPECT_NEAR(6.5, cuts.at(2), margin); + // EXPECT_NEAR(9.25, cuts.at(3), margin); + // EXPECT_NEAR(12.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc4U, X13BinsUniform) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(1.0, cuts.at(0), margin); + // EXPECT_NEAR(4.0, cuts.at(1), margin); + // EXPECT_NEAR(7.0, cuts.at(2), margin); + // EXPECT_NEAR(10.0, cuts.at(3), margin); + // EXPECT_NEAR(13.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc4Q, X13BinsQuantile) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(1.0, cuts.at(0), margin); + // EXPECT_NEAR(4.0, cuts.at(1), margin); + // EXPECT_NEAR(7.0, cuts.at(2), margin); + // EXPECT_NEAR(10.0, cuts.at(3), margin); + // EXPECT_NEAR(13.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc4U, X14BinsUniform) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(1.0, cuts.at(0), margin); + // EXPECT_NEAR(4.25, cuts.at(1), margin); + // EXPECT_NEAR(7.5, cuts.at(2), margin); + // EXPECT_NEAR(10.75, cuts.at(3), margin); + // EXPECT_NEAR(14.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc4Q, X14BinsQuantile) + // { + // samples_t X = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(1.0, cuts.at(0), margin); + // EXPECT_NEAR(4.25, cuts.at(1), margin); + // EXPECT_NEAR(7.5, cuts.at(2), margin); + // EXPECT_NEAR(10.75, cuts.at(3), margin); + // EXPECT_NEAR(14.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc4U, X15BinsUniform) + // { + // samples_t X = { 15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(1.0, cuts.at(0), margin); + // EXPECT_NEAR(4.5, cuts.at(1), margin); + // EXPECT_NEAR(8, cuts.at(2), margin); + // EXPECT_NEAR(11.5, cuts.at(3), margin); + // EXPECT_NEAR(15.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 3, 1, 3, 3, 1, 0, 3, 2, 2, 2, 1, 0, 0, 1, 0 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc4Q, X15BinsQuantile) + // { + // samples_t X = { 15.0, 13.0, 12.0, 14.0, 6.0, 1.0, 8.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0 }; + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(1.0, cuts.at(0), margin); + // EXPECT_NEAR(4.5, cuts.at(1), margin); + // EXPECT_NEAR(8, cuts.at(2), margin); + // EXPECT_NEAR(11.5, cuts.at(3), margin); + // EXPECT_NEAR(15.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 3, 3, 3, 3, 1, 0, 1, 2, 2, 2, 1, 0, 0, 1, 0 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc4U, RepeatedValuesUniform) + // { + // samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 }; + // // 0 1 2 3 4 5 6 7 8 9 + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(0.0, cuts.at(0), margin); + // EXPECT_NEAR(1.0, cuts.at(1), margin); + // EXPECT_NEAR(2.0, cuts.at(2), margin); + // EXPECT_NEAR(3.0, cuts.at(3), margin); + // EXPECT_NEAR(4.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 }; + // EXPECT_EQ(expected, labels); + // } + // TEST_F(TestBinDisc4Q, RepeatedValuesQuantile) + // { + // samples_t X = { 0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0 }; + // // 0 1 2 3 4 5 6 7 8 9 + // fit(X); + // auto cuts = getCutPoints(); + // ASSERT_EQ(5, cuts.size()); + // EXPECT_NEAR(0.0, cuts.at(0), margin); + // EXPECT_NEAR(1.0, cuts.at(1), margin); + // EXPECT_NEAR(2.0, cuts.at(2), margin); + // EXPECT_NEAR(3.0, cuts.at(3), margin); + // EXPECT_NEAR(4.0, cuts.at(4), margin); + // auto labels = transform(X); + // labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 }; + // EXPECT_EQ(expected, labels); + // } TEST(TestBinDiscGeneric, Fileset) { Experiments exps(data_path + "tests.txt"); int num = 0; while (exps.is_next()) { + ++num; Experiment exp = exps.next(); - std::cout << "Exp #: " << ++num << " From: " << exp.from_ << " To: " << exp.to_ << " Step: " << exp.step_ << " Bins: " << exp.n_bins_ << " Strategy: " << exp.strategy_ << std::endl; - BinDisc disc(exp.n_bins_, exp.strategy_ == "Q" ? strategy_t::QUANTILE : strategy_t::UNIFORM); + BinDisc disc(exp.n_bins_, exp.strategy_[0] == 'Q' ? strategy_t::QUANTILE : strategy_t::UNIFORM); std::vector test; - for (float i = exp.from_; i < exp.to_; i += exp.step_) { - test.push_back(i); + if (exp.type_ == experiment_t::RANGE) { + for (float i = exp.from_; i < exp.to_; i += exp.step_) { + test.push_back(i); + } + } else { + test = exp.dataset_; } // show_vector(test, "Test"); auto empty = std::vector(); auto Xt = disc.fit_transform(test, empty); auto cuts = disc.getCutPoints(); EXPECT_EQ(exp.discretized_data_.size(), Xt.size()); + auto flag = false; + size_t n_errors = 0; for (int i = 0; i < exp.discretized_data_.size(); ++i) { if (exp.discretized_data_.at(i) != Xt.at(i)) { - std::cout << "Error at " << i << " Expected: " << exp.discretized_data_.at(i) << " Got: " << Xt.at(i) << std::endl; + if (!flag) { + std::cout << "Exp #: " << num << " From: " << exp.from_ << " To: " << exp.to_ << " Step: " << exp.step_ << " Bins: " << exp.n_bins_ << " Strategy: " << exp.strategy_ << std::endl; + std::cout << "Error at " << i << " Expected: " << exp.discretized_data_.at(i) << " Got: " << Xt.at(i) << std::endl; + flag = true; + EXPECT_EQ(exp.discretized_data_.at(i), Xt.at(i)); + } + n_errors++; } } + if (flag) { + std::cout << "*** Found " << n_errors << " mistakes in this experiment dataset" << std::endl; + } EXPECT_EQ(exp.cutpoints_.size(), cuts.size()); for (int i = 0; i < exp.cutpoints_.size(); ++i) { EXPECT_NEAR(exp.cutpoints_.at(i), cuts.at(i), margin); } } + std::cout << "* Number of experiments tested: " << num << std::endl; } } diff --git a/tests/Discretizer_unittest.cpp b/tests/Discretizer_unittest.cpp index 4fcd856..b6c1819 100644 --- a/tests/Discretizer_unittest.cpp +++ b/tests/Discretizer_unittest.cpp @@ -30,34 +30,187 @@ namespace mdlp { EXPECT_EQ("1.2.3", version); } - TEST(Discretizer, BinIrisUniform) - { - ArffFiles file; - Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM); - file.load(data_path + "iris.arff", true); - vector& X = file.getX(); - auto y = labels_t(); - disc->fit(X[0], y); - auto Xt = disc->transform(X[0]); - labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - delete disc; - EXPECT_EQ(expected, Xt); - } - TEST(Discretizer, BinIrisQuantile) - { - ArffFiles file; - Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE); - file.load(data_path + "iris.arff", true); - vector& X = file.getX(); - auto y = labels_t(); - disc->fit(X[0], y); - auto Xt = disc->transform(X[0]); - labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; - delete disc; - EXPECT_EQ(expected, Xt); - } + // TEST(Discretizer, BinIrisUniform) + // { + // ArffFiles file; + // Discretizer* disc = new BinDisc(4, strategy_t::UNIFORM); + // file.load(data_path + "iris.arff", true); + // vector& X = file.getX(); + // auto y = labels_t(); + // disc->fit(X[0], y); + // auto Xt = disc->transform(X[0]); + // labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; + // delete disc; + // EXPECT_EQ(expected, Xt); + // } + // TEST(Discretizer, BinIrisQuantile) + // { + // ArffFiles file; + // Discretizer* disc = new BinDisc(4, strategy_t::QUANTILE); + // file.load(data_path + "iris.arff", true); + // vector& X = file.getX(); + // auto y = labels_t(); + // disc->fit(X[0], y); + // auto Xt = disc->transform(X[0]); + // labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; + // delete disc; + // EXPECT_EQ(expected, Xt); + // } + TEST(Discretizer, FImdlpIris) { + auto labelsq = { + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 1, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 3, + 3, + 3, + 1, + 3, + 1, + 2, + 0, + 3, + 1, + 0, + 2, + 2, + 2, + 1, + 3, + 1, + 2, + 2, + 1, + 2, + 2, + 2, + 2, + 3, + 3, + 3, + 3, + 2, + 1, + 1, + 1, + 2, + 2, + 1, + 2, + 3, + 2, + 1, + 1, + 1, + 2, + 2, + 0, + 1, + 1, + 1, + 2, + 1, + 1, + 2, + 2, + 3, + 2, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 1, + 2, + 3, + 3, + 3, + 3, + 2, + 3, + 1, + 3, + 2, + 3, + 3, + 2, + 2, + 3, + 3, + 3, + 3, + 3, + 2, + 2, + 3, + 2, + 3, + 2, + 3, + 3, + 3, + 2, + 3, + 3, + 3, + 2, + 3, + 2, + 2, + }; labels_t expected = { 5, 3, 4, 4, 5, 5, 5, 5, 2, 4, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 3, 5, 5, 5, 4, 4, 5, 5, 5, 4, 4, 5, 4, 3, 5, 5, 0, 4, 5, diff --git a/tests/Experiments.hpp b/tests/Experiments.hpp index b41e84a..dbdad02 100644 --- a/tests/Experiments.hpp +++ b/tests/Experiments.hpp @@ -7,13 +7,39 @@ #include #include #include "../typesFImdlp.h" + +template +void show_vector(const std::vector& data, std::string title) +{ + std::cout << title << ": "; + std::string sep = ""; + for (const auto& d : data) { + std::cout << sep << d; + sep = ", "; + } + std::cout << std::endl; +} +enum class experiment_t { + RANGE, + VECTOR +}; class Experiment { public: Experiment(float from_, float to_, float step_, int n_bins, std::string strategy, std::vector data_discretized, std::vector cutpoints) : - from_{ from_ }, to_{ to_ }, step_{ step_ }, n_bins_{ n_bins }, strategy_{ strategy }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints } + from_{ from_ }, to_{ to_ }, step_{ step_ }, n_bins_{ n_bins }, strategy_{ strategy }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::RANGE } { - if (strategy != "Q" && strategy != "U") { - throw std::invalid_argument("Invalid strategy " + strategy); + validate_strategy(); + + } + Experiment(std::vector dataset, int n_bins, std::string strategy, std::vector data_discretized, std::vector cutpoints) : + n_bins_{ n_bins }, strategy_{ strategy }, dataset_{ dataset }, discretized_data_{ data_discretized }, cutpoints_{ cutpoints }, type_{ experiment_t::VECTOR } + { + validate_strategy(); + } + void validate_strategy() + { + if (strategy_ != "Q" && strategy_ != "U") { + throw std::invalid_argument("Invalid strategy " + strategy_); } } float from_; @@ -21,8 +47,10 @@ public: float step_; int n_bins_; std::string strategy_; + std::vector dataset_; std::vector discretized_data_; std::vector cutpoints_; + experiment_t type_; }; class Experiments { public: @@ -76,33 +104,30 @@ private: } Experiment parse_experiment(std::string& line) { + // Read experiment lines + std::string experiment, data, cuts, strategy; + std::getline(test_file, experiment); + std::getline(test_file, data); + std::getline(test_file, cuts); + // split data into variables + float from_, to_, step_; + int n_bins; + std::vector dataset; + auto data_discretized = parse_vector(data); + auto cutpoints = parse_vector(cuts); if (line == "RANGE") { - std::getline(test_file, line); - auto [from_, to_, step_, n_bins, strategy] = parse_header(line); - } else { - std::getline(test_file, line); - + tie(from_, to_, step_, n_bins, strategy) = parse_header(experiment); + return Experiment{ from_, to_, step_, n_bins, strategy, data_discretized, cutpoints }; } - std::getline(test_file, line); - auto data_discretized = parse_vector(line); - std::getline(test_file, line); - auto cutpoints = parse_vector(line); - return Experiment{ from_, to_, step_, n_bins, strategy, data_discretized, cutpoints }; + strategy = experiment.substr(0, 1); + n_bins = std::stoi(experiment.substr(1, 1)); + data = experiment.substr(3, experiment.size() - 4); + dataset = parse_vector(data); + return Experiment(dataset, n_bins, strategy, data_discretized, cutpoints); } std::ifstream test_file; std::string filename; std::string line; bool exp_end; }; -template -void show_vector(const std::vector& data, std::string title) -{ - std::cout << title << ": "; - std::string sep = ""; - for (const auto& d : data) { - std::cout << sep << d; - sep = ", "; - } - std::cout << std::endl; -} #endif \ No newline at end of file diff --git a/tests/datasets/tests.txt b/tests/datasets/tests.txt index 91e772e..5046f08 100644 --- a/tests/datasets/tests.txt +++ b/tests/datasets/tests.txt @@ -3,6 +3,9 @@ # discretized data # cut points # +# +# Range experiments +# RANGE 0, 100, 1, 4, Q 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 @@ -91,6 +94,9 @@ RANGE 1, 15, 1, 3, U 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2 1.0, 5.33333, 9.66667, 14.0 +# +# Vector experiments +# VECTOR Q3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0] 1, 0, 0, 1, 0, 0, 1, 0, 0 @@ -147,3 +153,70 @@ VECTOR U3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0] 0, 0, 0, 0, 1, 1, 2, 2, 2, 2 0.0, 1.33333, 2.66667, 4.0 +# +# Vector experiments with iris +# +VECTOR +Q3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9] +0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1 +4.3, 5.4, 6.3, 7.9 +VECTOR +U3[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 2, 1, 1, 2, 0, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1 +4.3, 5.5, 6.7, 7.9 +VECTOR +Q4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9] +1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 +4.3, 5.1, 5.8, 6.4, 7.9 +VECTOR +U4[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0, 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9] +0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 +4.3, 5.2, 6.1, 7.0, 7.9 +VECTOR +Q3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0] +2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 1, 1, 1, 1, 0, 1, 0, 2, 2, 0, 1, 0, 0, 2, 1, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 0, 1, 0, 2, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 0, 2, 2, 1, 0, 1, 2, 1 +2.0, 2.9, 3.2, 4.4 +VECTOR +U3[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0] +1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 +2.0, 2.8, 3.6, 4.4 +VECTOR +Q4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0] +3, 2, 2, 2, 3, 3, 3, 3, 1, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 0, 2, 3, 3, 2, 3, 2, 3, 3, 2, 2, 2, 0, 1, 1, 3, 0, 1, 0, 0, 2, 0, 1, 1, 2, 2, 0, 0, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 0, 0, 0, 0, 0, 2, 3, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 1, 1, 0, 1, 3, 0, 2, 1, 2, 2, 0, 1, 0, 3, 2, 0, 2, 0, 1, 2, 2, 3, 0, 0, 2, 1, 1, 0, 3, 2, 1, 2, 1, 2, 1, 3, 1, 1, 0, 2, 3, 2, 2, 2, 2, 2, 0, 2, 3, 2, 0, 2, 3, 2 +2.0, 2.8, 3.0, 3.3, 4.4 +VECTOR +U4[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0] +2, 1, 2, 1, 2, 3, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 3, 1, 2, 2, 2, 1, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 0, 1, 1, 2, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 2, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 2, 2, 1, 1, 0, 1, 2, 1, 2, 1, 0, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 0, 1, 2, 1 +2.0, 2.6, 3.2, 3.8, 4.4 +VECTOR +Q3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +1.0, 2.63333, 4.9, 6.9 +VECTOR +U3[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +1.0, 2.96667, 4.93333, 6.9 +VECTOR +Q4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1] +0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 3, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3 +1.0, 1.6, 4.35, 5.1, 6.9 +VECTOR +U4[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1.0, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4.0, 4.9, 4.7, 4.3, 4.4, 4.8, 5.0, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4.0, 4.4, 4.6, 4.0, 3.3, 4.2, 4.2, 4.2, 4.3, 3.0, 4.1, 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5.0, 5.1, 5.3, 5.5, 6.7, 6.9, 5.0, 5.7, 4.9, 6.7, 4.9, 5.7, 6.0, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 2, 2, 2, 3, 3, 3, 2, 3, 2, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, 3, 2, 2, 2, 2, 2 +1.0, 2.475, 3.95, 5.425, 6.9 +VECTOR +Q3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.1, 0.86667, 1.6, 2.5 +VECTOR +U3[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8] +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +0.1, 0.9, 1.7, 2.5 +VECTOR +Q4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8] +0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 3, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 +0.1, 0.3, 1.3, 1.8, 2.5 +VECTOR +U4[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1.0, 1.1, 1.0, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1.0, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.9, 2.1, 2.0, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2.0, 2.0, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2.0, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8] +0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 3, 3, 3, 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2, 2, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2 +0.1, 0.7, 1.3, 1.9, 2.5 diff --git a/tests/tests_do.py b/tests/tests_do.py index 95a2c26..3cd8199 100644 --- a/tests/tests_do.py +++ b/tests/tests_do.py @@ -6,6 +6,7 @@ with open("datasets/tests.txt") as f: data = [x.strip() for x in data if x[0] != "#"] +errors = False for i in range(0, len(data), 4): experiment_type = data[i] print("Experiment:", data[i + 1]) @@ -31,20 +32,28 @@ for i in range(0, len(data), 4): result = disc.transform(X) result = [int(x) for x in result.flatten()] expected = [int(x) for x in expected_data.split(",")] + # + # Check the Results + # assert len(result) == len(expected) for j in range(len(result)): if result[j] != expected[j]: - print("Error at", j, "Expected=", expected[j], "Result=", result[j]) + print("* Error at", j, "Expected=", expected[j], "Result=", result[j]) + errors = True expected_cuts = disc.bin_edges_[0] computed_cuts = [float(x) for x in cuts_data.split(",")] assert len(expected_cuts) == len(computed_cuts) for j in range(len(expected_cuts)): if round(expected_cuts[j], 5) != computed_cuts[j]: print( - "Error at", + "* Error at", j, "Expected=", expected_cuts[j], "Result=", computed_cuts[j], ) + errors = True +if errors: + raise Exception("There were errors!") +print("*** All tests run succesfully! ***") diff --git a/tests/tests_generate.ipynb b/tests/tests_generate.ipynb index 467ce2f..b2936a7 100644 --- a/tests/tests_generate.ipynb +++ b/tests/tests_generate.ipynb @@ -6,7 +6,8 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.preprocessing import KBinsDiscretizer" + "from sklearn.preprocessing import KBinsDiscretizer\n", + "from sklearn.datasets import load_iris" ] }, { @@ -84,6 +85,12 @@ " file.write(\"# discretized data\\n\")\n", " file.write(\"# cut points\\n\")\n", " file.write(\"#\\n\")\n", + " #\n", + " # Range experiments\n", + " #\n", + " file.write(\"#\\n\")\n", + " file.write(\"# Range experiments\\n\")\n", + " file.write(\"#\\n\")\n", " for experiment in experiments_range:\n", " file.write(\"RANGE\\n\")\n", " (from_, to_, step_, bins_, strategy) = experiment\n", @@ -93,6 +100,12 @@ " result = disc.transform(data)\n", " file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n", " write_lists(file, result, disc.bin_edges_[0])\n", + " #\n", + " # Vector experiments\n", + " #\n", + " file.write(\"#\\n\")\n", + " file.write(\"# Vector experiments\\n\")\n", + " file.write(\"#\\n\")\n", " for n_bins, experiment in experiments_vectors:\n", " for strategy in [\"Q\", \"U\"]:\n", " file.write(\"VECTOR\\n\")\n", @@ -105,8 +118,84 @@ " )\n", " data = [[x] for x in experiment]\n", " result = disc.fit_transform(data)\n", - " write_lists(file, result, disc.bin_edges_[0])" + " write_lists(file, result, disc.bin_edges_[0])\n", + " #\n", + " # Vector experiments iris\n", + " #\n", + " file.write(\"#\\n\");\n", + " file.write(\"# Vector experiments with iris\\n\");\n", + " file.write(\"#\\n\");\n", + " X, y = load_iris(return_X_y=True)\n", + " for i in range(X.shape[1]):\n", + " for n_bins in [3, 4]:\n", + " for strategy in [\"Q\", \"U\"]:\n", + " file.write(\"VECTOR\\n\")\n", + " experiment = X[:, i]\n", + " file.write(f\"{strategy}{n_bins}{experiment.tolist()}\\n\")\n", + " disc = KBinsDiscretizer(\n", + " n_bins=n_bins,\n", + " encode=\"ordinal\",\n", + " strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\")\n", + " data = [[x] for x in experiment]\n", + " result = disc.fit_transform(data)\n", + " write_lists(file, result, disc.bin_edges_[0])" ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cut points [array([ 0., 33., 66., 99.])]\n", + "i=32 X[32]=[32] result[32]=[0.]\n", + "i=33 X[33]=[33] result[33]=[1.]\n", + "i=34 X[34]=[34] result[34]=[1.]\n", + "i=65 X[65]=[65] result[65]=[1.]\n", + "i=66 X[66]=[66] result[66]=[2.]\n", + "i=67 X[67]=[67] result[67]=[2.]\n" + ] + } + ], + "source": [ + "X = [[x] for x in range(100)]\n", + "disc = KBinsDiscretizer(n_bins=3, encode=\"ordinal\", strategy=\"uniform\")\n", + "result = disc.fit_transform(X)\n", + "print(\"Cut points\", disc.bin_edges_)\n", + "test = [32, 33, 34, 65, 66, 67]\n", + "for i in test:\n", + " print(f\"{i=} X[{i}]={X[i]} result[{i}]={result[i]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "right [0 1 1 1 2 2]\n", + "left [0 0 1 1 1 2]\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "print(\"right\", np.searchsorted(disc.bin_edges_[0][1:-1],test, side=\"right\"))\n", + "print(\"left \", np.searchsorted(disc.bin_edges_[0][1:-1],test))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {