2 Commits

Author SHA1 Message Date
Ricardo Montañana Gómez
cb9babace1 Merge c488ace719 into 7b0673fd4b 2024-07-02 11:50:55 +02:00
c488ace719 Fix FImdlp tests 2024-07-02 11:50:42 +02:00
7 changed files with 283 additions and 103 deletions

View File

@@ -25,7 +25,7 @@ namespace mdlp {
} }
if (proposed_cuts < 1) if (proposed_cuts < 1)
return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts)); return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
return static_cast<size_t>(proposed_cuts); // As the first and last cutpoints shall be ignored in transform return static_cast<size_t>(proposed_cuts); // The 2 extra cutpoints should not be considered here as this parameter is considered before they are added
} }
void CPPFImdlp::fit(samples_t& X_, labels_t& y_) void CPPFImdlp::fit(samples_t& X_, labels_t& y_)

View File

@@ -347,44 +347,44 @@ namespace mdlp {
labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 }; labels_t expected = { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3 };
EXPECT_EQ(expected, labels); EXPECT_EQ(expected, labels);
} }
// TEST_F(TestBinDisc4U, irisUniform) TEST_F(TestBinDisc4U, irisUniform)
// { {
// ArffFiles file; ArffFiles file;
// file.load(data_path + "iris.arff", true); file.load(data_path + "iris.arff", true);
// vector<samples_t>& X = file.getX(); vector<samples_t>& X = file.getX();
// fit(X[0]); fit(X[0]);
// auto Xt = transform(X[0]); auto Xt = transform(X[0]);
// labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; labels_t expected = { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 2, 2, 3, 0, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 3, 3, 1, 2, 1, 3, 2, 2, 3, 2, 1, 2, 3, 3, 3, 2, 2, 1, 3, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
// EXPECT_EQ(expected, Xt); EXPECT_EQ(expected, Xt);
// auto Xtt = fit_transform(X[0], file.getY()); auto Xtt = fit_transform(X[0], file.getY());
// EXPECT_EQ(expected, Xtt); EXPECT_EQ(expected, Xtt);
// auto Xt_t = torch::tensor(X[0], torch::kFloat32); auto Xt_t = torch::tensor(X[0], torch::kFloat32);
// auto y_t = torch::tensor(file.getY(), torch::kInt32); auto y_t = torch::tensor(file.getY(), torch::kInt32);
// auto Xtt_t = fit_transform_t(Xt_t, y_t); auto Xtt_t = fit_transform_t(Xt_t, y_t);
// for (int i = 0; i < expected.size(); i++) for (int i = 0; i < expected.size(); i++)
// EXPECT_EQ(expected[i], Xtt_t[i].item<int>()); EXPECT_EQ(expected[i], Xtt_t[i].item<int>());
// } }
// TEST_F(TestBinDisc4Q, irisQuantile) TEST_F(TestBinDisc4Q, irisQuantile)
// { {
// ArffFiles file; ArffFiles file;
// file.load(data_path + "iris.arff", true); file.load(data_path + "iris.arff", true);
// vector<samples_t>& X = file.getX(); vector<samples_t>& X = file.getX();
// fit(X[0]); fit(X[0]);
// auto Xt = transform(X[0]); auto Xt = transform(X[0]);
// labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 }; labels_t expected = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 3, 3, 1, 3, 1, 2, 0, 3, 1, 0, 2, 2, 2, 1, 3, 1, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 2, 1, 2, 3, 2, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 3, 2, 3, 1, 3, 2, 3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 2, 2 };
// EXPECT_EQ(expected, Xt); EXPECT_EQ(expected, Xt);
// auto Xtt = fit_transform(X[0], file.getY()); auto Xtt = fit_transform(X[0], file.getY());
// EXPECT_EQ(expected, Xtt); EXPECT_EQ(expected, Xtt);
// auto Xt_t = torch::tensor(X[0], torch::kFloat32); auto Xt_t = torch::tensor(X[0], torch::kFloat32);
// auto y_t = torch::tensor(file.getY(), torch::kInt32); auto y_t = torch::tensor(file.getY(), torch::kInt32);
// auto Xtt_t = fit_transform_t(Xt_t, y_t); auto Xtt_t = fit_transform_t(Xt_t, y_t);
// for (int i = 0; i < expected.size(); i++) for (int i = 0; i < expected.size(); i++)
// EXPECT_EQ(expected[i], Xtt_t[i].item<int>()); EXPECT_EQ(expected[i], Xtt_t[i].item<int>());
// fit_t(Xt_t, y_t); fit_t(Xt_t, y_t);
// auto Xt_t2 = transform_t(Xt_t); auto Xt_t2 = transform_t(Xt_t);
// for (int i = 0; i < expected.size(); i++) for (int i = 0; i < expected.size(); i++)
// EXPECT_EQ(expected[i], Xt_t2[i].item<int>()); EXPECT_EQ(expected[i], Xt_t2[i].item<int>());
// } }
TEST(TestBinDiscGeneric, Fileset) TEST(TestBinDiscGeneric, Fileset)
{ {
Experiments exps(data_path + "tests.txt"); Experiments exps(data_path + "tests.txt");

View File

@@ -76,7 +76,13 @@ private:
} }
Experiment parse_experiment(std::string& line) Experiment parse_experiment(std::string& line)
{ {
if (line == "RANGE") {
std::getline(test_file, line);
auto [from_, to_, step_, n_bins, strategy] = parse_header(line); auto [from_, to_, step_, n_bins, strategy] = parse_header(line);
} else {
std::getline(test_file, line);
}
std::getline(test_file, line); std::getline(test_file, line);
auto data_discretized = parse_vector<int>(line); auto data_discretized = parse_vector<int>(line);
std::getline(test_file, line); std::getline(test_file, line);

View File

@@ -124,7 +124,7 @@ namespace mdlp {
{ {
samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 }; samples_t X_ = { 1, 2, 2, 3, 4, 2, 3 };
labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 }; labels_t y_ = { 0, 0, 1, 2, 3, 4, 5 };
cutPoints_t expected = { 1.5f, 2.5f }; cutPoints_t expected = { 1.0, 1.5f, 2.5f, 4.0 };
fit(X_, y_); fit(X_, y_);
auto computed = getCutPoints(); auto computed = getCutPoints();
EXPECT_EQ(computed.size(), expected.size()); EXPECT_EQ(computed.size(), expected.size());
@@ -167,29 +167,31 @@ namespace mdlp {
y = { 1 }; y = { 1 };
fit(X, y); fit(X, y);
computed = getCutPoints(); computed = getCutPoints();
EXPECT_EQ(computed.size(), 0); EXPECT_EQ(computed.size(), 2);
X = { 1, 3 }; X = { 1, 3 };
y = { 1, 2 }; y = { 1, 2 };
fit(X, y); fit(X, y);
computed = getCutPoints(); computed = getCutPoints();
EXPECT_EQ(computed.size(), 0); EXPECT_EQ(computed.size(), 2);
X = { 2, 4 }; X = { 2, 4 };
y = { 1, 2 }; y = { 1, 2 };
fit(X, y); fit(X, y);
computed = getCutPoints(); computed = getCutPoints();
EXPECT_EQ(computed.size(), 0); EXPECT_EQ(computed.size(), 2);
X = { 1, 2, 3 }; X = { 1, 2, 3 };
y = { 1, 2, 2 }; y = { 1, 2, 2 };
fit(X, y); fit(X, y);
computed = getCutPoints(); computed = getCutPoints();
EXPECT_EQ(computed.size(), 1); EXPECT_EQ(computed.size(), 3);
EXPECT_NEAR(computed[0], 1.5, precision); EXPECT_NEAR(computed[0], 1, precision);
EXPECT_NEAR(computed[1], 1.5, precision);
EXPECT_NEAR(computed[2], 3, precision);
} }
TEST_F(TestFImdlp, TestArtificialDataset) TEST_F(TestFImdlp, TestArtificialDataset)
{ {
fit(X, y); fit(X, y);
cutPoints_t expected = { 5.05f }; cutPoints_t expected = { 4.7, 5.05, 6.0 };
vector<precision_t> computed = getCutPoints(); vector<precision_t> computed = getCutPoints();
EXPECT_EQ(computed.size(), expected.size()); EXPECT_EQ(computed.size(), expected.size());
for (unsigned long i = 0; i < computed.size(); i++) { for (unsigned long i = 0; i < computed.size(); i++) {
@@ -200,10 +202,10 @@ namespace mdlp {
TEST_F(TestFImdlp, TestIris) TEST_F(TestFImdlp, TestIris)
{ {
vector<cutPoints_t> expected = { vector<cutPoints_t> expected = {
{5.45f, 5.75f}, {4.3, 5.45f, 5.75f, 7.9},
{2.75f, 2.85f, 2.95f, 3.05f, 3.35f}, {2, 2.75f, 2.85f, 2.95f, 3.05f, 3.35f, 4.4},
{2.45f, 4.75f, 5.05f}, {1, 2.45f, 4.75f, 5.05f, 6.9},
{0.8f, 1.75f} {0.1, 0.8f, 1.75f, 2.5}
}; };
vector<int> depths = { 3, 5, 4, 3 }; vector<int> depths = { 3, 5, 4, 3 };
auto test = CPPFImdlp(); auto test = CPPFImdlp();
@@ -213,7 +215,7 @@ namespace mdlp {
TEST_F(TestFImdlp, ComputeCutPointsGCase) TEST_F(TestFImdlp, ComputeCutPointsGCase)
{ {
cutPoints_t expected; cutPoints_t expected;
expected = { 1.5 }; expected = { 0, 1.5, 2 };
samples_t X_ = { 0, 1, 2, 2, 2 }; samples_t X_ = { 0, 1, 2, 2, 2 };
labels_t y_ = { 1, 1, 1, 2, 2 }; labels_t y_ = { 1, 1, 1, 2, 2 };
fit(X_, y_); fit(X_, y_);
@@ -247,10 +249,10 @@ namespace mdlp {
// Set max_depth to 1 // Set max_depth to 1
auto test = CPPFImdlp(3, 1, 0); auto test = CPPFImdlp(3, 1, 0);
vector<cutPoints_t> expected = { vector<cutPoints_t> expected = {
{5.45f}, {4.3, 5.45f, 7.9},
{3.35f}, {2, 3.35f, 4.4},
{2.45f}, {1, 2.45f, 6.9},
{0.8f} {0.1, 0.8f, 2.5}
}; };
vector<int> depths = { 1, 1, 1, 1 }; vector<int> depths = { 1, 1, 1, 1 };
test_dataset(test, "iris", expected, depths); test_dataset(test, "iris", expected, depths);
@@ -261,10 +263,10 @@ namespace mdlp {
auto test = CPPFImdlp(75, 100, 0); auto test = CPPFImdlp(75, 100, 0);
// Set min_length to 75 // Set min_length to 75
vector<cutPoints_t> expected = { vector<cutPoints_t> expected = {
{5.45f, 5.75f}, {4.3, 5.45f, 5.75f, 7.9},
{2.85f, 3.35f}, {2, 2.85f, 3.35f, 4.4},
{2.45f, 4.75f}, {1, 2.45f, 4.75f, 6.9},
{0.8f, 1.75f} {0.1, 0.8f, 1.75f, 2.5}
}; };
vector<int> depths = { 3, 2, 2, 2 }; vector<int> depths = { 3, 2, 2, 2 };
test_dataset(test, "iris", expected, depths); test_dataset(test, "iris", expected, depths);
@@ -275,10 +277,10 @@ namespace mdlp {
// Set min_length to 75 // Set min_length to 75
auto test = CPPFImdlp(75, 2, 0); auto test = CPPFImdlp(75, 2, 0);
vector<cutPoints_t> expected = { vector<cutPoints_t> expected = {
{5.45f, 5.75f}, {4.3, 5.45f, 5.75f, 7.9},
{2.85f, 3.35f}, {2, 2.85f, 3.35f, 4.4},
{2.45f, 4.75f}, {1, 2.45f, 4.75f, 6.9},
{0.8f, 1.75f} {0.1, 0.8f, 1.75f, 2.5}
}; };
vector<int> depths = { 2, 2, 2, 2 }; vector<int> depths = { 2, 2, 2, 2 };
test_dataset(test, "iris", expected, depths); test_dataset(test, "iris", expected, depths);
@@ -289,10 +291,10 @@ namespace mdlp {
// Set min_length to 75 // Set min_length to 75
auto test = CPPFImdlp(75, 2, 1); auto test = CPPFImdlp(75, 2, 1);
vector<cutPoints_t> expected = { vector<cutPoints_t> expected = {
{5.45f}, {4.3, 5.45f, 7.9},
{2.85f}, {2, 2.85f, 4.4},
{2.45f}, {1, 2.45f, 6.9},
{0.8f} {0.1, 0.8f, 2.5}
}; };
vector<int> depths = { 2, 2, 2, 2 }; vector<int> depths = { 2, 2, 2, 2 };
test_dataset(test, "iris", expected, depths); test_dataset(test, "iris", expected, depths);
@@ -304,10 +306,10 @@ namespace mdlp {
// Set min_length to 75 // Set min_length to 75
auto test = CPPFImdlp(75, 2, 0.2f); auto test = CPPFImdlp(75, 2, 0.2f);
vector<cutPoints_t> expected = { vector<cutPoints_t> expected = {
{5.45f, 5.75f}, {4.3, 5.45f, 5.75f, 7.9},
{2.85f, 3.35f}, {2, 2.85f, 3.35f, 4.4},
{2.45f, 4.75f}, {1, 2.45f, 4.75f, 6.9},
{0.8f, 1.75f} {0.1, 0.8f, 1.75f, 2.5}
}; };
vector<int> depths = { 2, 2, 2, 2 }; vector<int> depths = { 2, 2, 2, 2 };
test_dataset(test, "iris", expected, depths); test_dataset(test, "iris", expected, depths);
@@ -327,7 +329,6 @@ namespace mdlp {
computed = compute_max_num_cut_points(); computed = compute_max_num_cut_points();
ASSERT_EQ(expected, computed); ASSERT_EQ(expected, computed);
} }
} }
TEST_F(TestFImdlp, TransformTest) TEST_F(TestFImdlp, TransformTest)
{ {
@@ -345,15 +346,15 @@ namespace mdlp {
vector<samples_t>& X = file.getX(); vector<samples_t>& X = file.getX();
labels_t& y = file.getY(); labels_t& y = file.getY();
fit(X[1], y); fit(X[1], y);
// auto computed = transform(X[1]); auto computed = transform(X[1]);
// EXPECT_EQ(computed.size(), expected.size()); EXPECT_EQ(computed.size(), expected.size());
// for (unsigned long i = 0; i < computed.size(); i++) { for (unsigned long i = 0; i < computed.size(); i++) {
// EXPECT_EQ(computed[i], expected[i]); EXPECT_EQ(computed[i], expected[i]);
// } }
// auto computed_ft = fit_transform(X[1], y); auto computed_ft = fit_transform(X[1], y);
// EXPECT_EQ(computed_ft.size(), expected.size()); EXPECT_EQ(computed_ft.size(), expected.size());
// for (unsigned long i = 0; i < computed_ft.size(); i++) { for (unsigned long i = 0; i < computed_ft.size(); i++) {
// EXPECT_EQ(computed_ft[i], expected[i]); EXPECT_EQ(computed_ft[i], expected[i]);
// } }
} }
} }

View File

@@ -3,33 +3,147 @@
# discretized data # discretized data
# cut points # cut points
# #
RANGE
0, 100, 1, 4, Q 0, 100, 1, 4, Q
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
0.0, 24.75, 49.5, 74.25, 99.0 0.0, 24.75, 49.5, 74.25, 99.0
RANGE
0, 50, 1, 4, Q 0, 50, 1, 4, Q
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
0.0, 12.25, 24.5, 36.75, 49.0 0.0, 12.25, 24.5, 36.75, 49.0
RANGE
0, 100, 1, 3, Q 0, 100, 1, 3, Q
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
0.0, 33.0, 66.0, 99.0 0.0, 33.0, 66.0, 99.0
RANGE
0, 50, 1, 3, Q 0, 50, 1, 3, Q
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
0.0, 16.33333, 32.66667, 49.0 0.0, 16.33333, 32.66667, 49.0
RANGE
0, 10, 1, 3, Q 0, 10, 1, 3, Q
0, 0, 0, 0, 1, 1, 1, 2, 2, 2 0, 0, 0, 0, 1, 1, 1, 2, 2, 2
0.0, 3.0, 6.0, 9.0 0.0, 3.0, 6.0, 9.0
RANGE
0, 100, 1, 4, U 0, 100, 1, 4, U
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
0.0, 24.75, 49.5, 74.25, 99.0 0.0, 24.75, 49.5, 74.25, 99.0
RANGE
0, 50, 1, 4, U 0, 50, 1, 4, U
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
0.0, 12.25, 24.5, 36.75, 49.0 0.0, 12.25, 24.5, 36.75, 49.0
RANGE
0, 100, 1, 3, U 0, 100, 1, 3, U
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
0.0, 33.0, 66.0, 99.0 0.0, 33.0, 66.0, 99.0
RANGE
0, 50, 1, 3, U 0, 50, 1, 3, U
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
0.0, 16.33333, 32.66667, 49.0 0.0, 16.33333, 32.66667, 49.0
RANGE
0, 10, 1, 3, U 0, 10, 1, 3, U
0, 0, 0, 1, 1, 1, 2, 2, 2, 2 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
0.0, 3.0, 6.0, 9.0 0.0, 3.0, 6.0, 9.0
RANGE
1, 10, 1, 3, Q
0, 0, 0, 1, 1, 1, 2, 2, 2
1.0, 3.66667, 6.33333, 9.0
RANGE
1, 10, 1, 3, U
0, 0, 0, 1, 1, 1, 2, 2, 2
1.0, 3.66667, 6.33333, 9.0
RANGE
1, 11, 1, 3, Q
0, 0, 0, 1, 1, 1, 1, 2, 2, 2
1.0, 4.0, 7.0, 10.0
RANGE
1, 11, 1, 3, U
0, 0, 0, 1, 1, 1, 2, 2, 2, 2
1.0, 4.0, 7.0, 10.0
RANGE
1, 12, 1, 3, Q
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
1.0, 4.33333, 7.66667, 11.0
RANGE
1, 12, 1, 3, U
0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
1.0, 4.33333, 7.66667, 11.0
RANGE
1, 13, 1, 3, Q
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
1.0, 4.66667, 8.33333, 12.0
RANGE
1, 13, 1, 3, U
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
1.0, 4.66667, 8.33333, 12.0
RANGE
1, 14, 1, 3, Q
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.0, 9.0, 13.0
RANGE
1, 14, 1, 3, U
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.0, 9.0, 13.0
RANGE
1, 15, 1, 3, Q
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.33333, 9.66667, 14.0
RANGE
1, 15, 1, 3, U
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.33333, 9.66667, 14.0
VECTOR
Q3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
1, 0, 0, 1, 0, 0, 1, 0, 0
1.0, 1.66667, 3.0
VECTOR
U3[3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]
2, 0, 0, 2, 0, 0, 2, 0, 0
1.0, 1.66667, 2.33333, 3.0
VECTOR
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
1.0, 4.66667, 8.33333, 12.0
VECTOR
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2
1.0, 4.66667, 8.33333, 12.0
VECTOR
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.0, 9.0, 13.0
VECTOR
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.0, 9.0, 13.0
VECTOR
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.33333, 9.66667, 14.0
VECTOR
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]
0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.33333, 9.66667, 14.0
VECTOR
Q3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.66667, 10.33333, 15.0
VECTOR
U3[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2
1.0, 5.66667, 10.33333, 15.0
VECTOR
Q3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
1.0, 5.66667, 10.33333, 15.0
VECTOR
U3[15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]
2, 1, 2, 2, 1, 0, 2, 2, 1, 1, 1, 0, 0, 0, 0
1.0, 5.66667, 10.33333, 15.0
VECTOR
Q3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
0, 0, 0, 0, 1, 1, 2, 2, 2, 2
0.0, 1.0, 3.0, 4.0
VECTOR
U3[0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]
0, 0, 0, 0, 1, 1, 2, 2, 2, 2
0.0, 1.33333, 2.66667, 4.0

View File

@@ -1,3 +1,4 @@
import json
from sklearn.preprocessing import KBinsDiscretizer from sklearn.preprocessing import KBinsDiscretizer
with open("datasets/tests.txt") as f: with open("datasets/tests.txt") as f:
@@ -5,27 +6,37 @@ with open("datasets/tests.txt") as f:
data = [x.strip() for x in data if x[0] != "#"] data = [x.strip() for x in data if x[0] != "#"]
for i in range(0, len(data), 3): for i in range(0, len(data), 4):
print("Experiment:", data[i]) experiment_type = data[i]
from_, to_, step_, n_bins_, strategy_ = data[i].split(",") print("Experiment:", data[i + 1])
if experiment_type == "RANGE":
range_data = data[i + 1]
from_, to_, step_, n_bins_, strategy_ = range_data.split(",")
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
else:
strategy_ = data[i + 1][0]
n_bins_ = data[i + 1][1]
vector = data[i + 1][2:]
X = [[float(x)] for x in json.loads(vector)]
strategy = "quantile" if strategy_.strip() == "Q" else "uniform" strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
disc = KBinsDiscretizer( disc = KBinsDiscretizer(
n_bins=int(n_bins_), n_bins=int(n_bins_),
encode="ordinal", encode="ordinal",
strategy=strategy, strategy=strategy,
) )
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))] expected_data = data[i + 2]
# result = disc.fit_transform(X) cuts_data = data[i + 3]
disc.fit(X) disc.fit(X)
result = disc.transform(X) result = disc.transform(X)
result = [int(x) for x in result.flatten()] result = [int(x) for x in result.flatten()]
expected = [int(x) for x in data[i + 1].split(",")] expected = [int(x) for x in expected_data.split(",")]
assert len(result) == len(expected) assert len(result) == len(expected)
for j in range(len(result)): for j in range(len(result)):
if result[j] != expected[j]: if result[j] != expected[j]:
print("Error at", j, "Expected=", expected[j], "Result=", result[j]) print("Error at", j, "Expected=", expected[j], "Result=", result[j])
expected_cuts = disc.bin_edges_[0] expected_cuts = disc.bin_edges_[0]
computed_cuts = [float(x) for x in data[i + 2].split(",")] computed_cuts = [float(x) for x in cuts_data.split(",")]
assert len(expected_cuts) == len(computed_cuts) assert len(expected_cuts) == len(computed_cuts)
for j in range(len(expected_cuts)): for j in range(len(expected_cuts)):
if round(expected_cuts[j], 5) != computed_cuts[j]: if round(expected_cuts[j], 5) != computed_cuts[j]:

View File

@@ -15,7 +15,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"experiments = [\n", "experiments_range = [\n",
" [0, 100, 1, 4, \"Q\"],\n", " [0, 100, 1, 4, \"Q\"],\n",
" [0, 50, 1, 4, \"Q\"],\n", " [0, 50, 1, 4, \"Q\"],\n",
" [0, 100, 1, 3, \"Q\"],\n", " [0, 100, 1, 3, \"Q\"],\n",
@@ -25,7 +25,29 @@
" [0, 50, 1, 4, \"U\"],\n", " [0, 50, 1, 4, \"U\"],\n",
" [0, 100, 1, 3, \"U\"],\n", " [0, 100, 1, 3, \"U\"],\n",
" [0, 50, 1, 3, \"U\"],\n", " [0, 50, 1, 3, \"U\"],\n",
"# \n",
" [0, 10, 1, 3, \"U\"],\n", " [0, 10, 1, 3, \"U\"],\n",
" [1, 10, 1, 3, \"Q\"],\n",
" [1, 10, 1, 3, \"U\"],\n",
" [1, 11, 1, 3, \"Q\"],\n",
" [1, 11, 1, 3, \"U\"],\n",
" [1, 12, 1, 3, \"Q\"],\n",
" [1, 12, 1, 3, \"U\"],\n",
" [1, 13, 1, 3, \"Q\"],\n",
" [1, 13, 1, 3, \"U\"],\n",
" [1, 14, 1, 3, \"Q\"],\n",
" [1, 14, 1, 3, \"U\"],\n",
" [1, 15, 1, 3, \"Q\"],\n",
" [1, 15, 1, 3, \"U\"]\n",
"]\n",
"experiments_vectors = [\n",
" (3, [3.0, 1.0, 1.0, 3.0, 1.0, 1.0, 3.0, 1.0, 1.0]),\n",
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]),\n",
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0]),\n",
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0]),\n",
" (3, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]),\n",
" (3, [15.0, 8.0, 12.0, 14.0, 6.0, 1.0, 13.0, 11.0, 10.0, 9.0, 7.0, 4.0, 3.0, 5.0, 2.0]),\n",
" (3, [0.0, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0])\n",
"]" "]"
] ]
}, },
@@ -33,31 +55,57 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/rmontanana/miniconda3/lib/python3.11/site-packages/sklearn/preprocessing/_discretization.py:307: UserWarning: Bins whose width are too small (i.e., <= 1e-8) in feature 0 are removed. Consider decreasing the number of bins.\n",
" warnings.warn(\n"
]
}
],
"source": [ "source": [
"def write_lists(file, data, cuts):\n",
" sep = \"\"\n",
" for res in data:\n",
" file.write(f\"{sep}{int(res):d}\")\n",
" sep= \", \"\n",
" file.write(\"\\n\")\n",
" sep = \"\"\n",
" for res in cuts:\n",
" file.write(sep + str(round(res,5)))\n",
" sep = \", \"\n",
" file.write(\"\\n\")\n",
"\n",
"with open(\"datasets/tests.txt\", \"w\") as file:\n", "with open(\"datasets/tests.txt\", \"w\") as file:\n",
" file.write(\"#\\n\")\n", " file.write(\"#\\n\")\n",
" file.write(\"# from, to, step, #bins, Q/U\\n\")\n", " file.write(\"# from, to, step, #bins, Q/U\\n\")\n",
" file.write(\"# discretized data\\n\")\n", " file.write(\"# discretized data\\n\")\n",
" file.write(\"# cut points\\n\")\n", " file.write(\"# cut points\\n\")\n",
" file.write(\"#\\n\")\n", " file.write(\"#\\n\")\n",
" for experiment in experiments:\n", " for experiment in experiments_range:\n",
" file.write(\"RANGE\\n\")\n",
" (from_, to_, step_, bins_, strategy) = experiment\n", " (from_, to_, step_, bins_, strategy) = experiment\n",
" disc = KBinsDiscretizer(n_bins=bins_, encode='ordinal', strategy='quantile' if strategy.strip() == \"Q\" else 'uniform')\n", " disc = KBinsDiscretizer(n_bins=bins_, encode='ordinal', strategy='quantile' if strategy.strip() == \"Q\" else 'uniform')\n",
" data = [[x] for x in range(from_, to_, step_)]\n", " data = [[x] for x in range(from_, to_, step_)]\n",
" disc.fit(data)\n", " disc.fit(data)\n",
" result = disc.transform(data)\n", " result = disc.transform(data)\n",
" file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n", " file.write(f\"{from_}, {to_}, {step_}, {bins_}, {strategy}\\n\")\n",
" sep = \"\"\n", " write_lists(file, result, disc.bin_edges_[0])\n",
" for res in result:\n", " for n_bins, experiment in experiments_vectors:\n",
" file.write(f\"{sep}{int(res):d}\")\n", " for strategy in [\"Q\", \"U\"]:\n",
" sep= \", \"\n", " file.write(\"VECTOR\\n\")\n",
" file.write(\"\\n\")\n", " file.write(f\"{strategy}{n_bins}{experiment}\\n\")\n",
" sep = \"\"\n", " disc = KBinsDiscretizer(\n",
" for res in disc.bin_edges_[0]:\n", " n_bins=n_bins,\n",
" file.write(sep + str(round(res,5)))\n", " encode=\"ordinal\",\n",
" sep = \", \"\n", " \n",
" file.write(\"\\n\")" " strategy=\"quantile\" if strategy.strip() == \"Q\" else \"uniform\",\n",
" )\n",
" data = [[x] for x in experiment]\n",
" result = disc.fit_transform(data)\n",
" write_lists(file, result, disc.bin_edges_[0])"
] ]
} }
], ],