Fix BinDisc quantile mistakes (#9)

* Fix BinDisc quantile mistakes * Fix FImdlp tests * Fix tests, samples and remove uneeded support files * Add coypright header to sources Fix coverage report Add coverage badge to README * Update sonar github action * Move sources to a folder and change ArffFiles files to library * Add recursive submodules to github action
2025-08-15 23:45:57 +00:00 · 2024-07-04 17:27:39 +02:00
parent 7b0673fd4b
commit e36d9af8f9
35 changed files with 1383 additions and 923 deletions
--- a/tests/tests_do.py
+++ b/tests/tests_do.py
@@ -0,0 +1,71 @@
+# ***************************************************************
+# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+# SPDX-FileType: SOURCE
+# SPDX-License-Identifier: MIT
+# ***************************************************************
+
+import json
+from sklearn.preprocessing import KBinsDiscretizer
+
+with open("datasets/tests.txt") as f:
+    data = f.readlines()
+
+data = [x.strip() for x in data if x[0] != "#"]
+
+errors = False
+for i in range(0, len(data), 4):
+    experiment_type = data[i]
+    print("Experiment:", data[i + 1])
+    if experiment_type == "RANGE":
+        range_data = data[i + 1]
+        from_, to_, step_, n_bins_, strategy_ = range_data.split(",")
+        X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
+    else:
+        strategy_ = data[i + 1][0]
+        n_bins_ = data[i + 1][1]
+        vector = data[i + 1][2:]
+        X = [[float(x)] for x in json.loads(vector)]
+
+    strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
+    disc = KBinsDiscretizer(
+        n_bins=int(n_bins_),
+        encode="ordinal",
+        strategy=strategy,
+    )
+    expected_data = data[i + 2]
+    cuts_data = data[i + 3]
+    disc.fit(X)
+    #
+    # Normalize the cutpoints to remove numerical errors such as 33.0000000001
+    # instead of 33
+    #
+    for j in range(len(disc.bin_edges_[0])):
+        disc.bin_edges_[0][j] = round(disc.bin_edges_[0][j], 5)
+    result = disc.transform(X)
+    result = [int(x) for x in result.flatten()]
+    expected = [int(x) for x in expected_data.split(",")]
+    #
+    # Check the Results
+    #
+    assert len(result) == len(expected)
+    for j in range(len(result)):
+        if result[j] != expected[j]:
+            print("* Error at", j, "Expected=", expected[j], "Result=", result[j])
+            errors = True
+    expected_cuts = disc.bin_edges_[0]
+    computed_cuts = [float(x) for x in cuts_data.split(",")]
+    assert len(expected_cuts) == len(computed_cuts)
+    for j in range(len(expected_cuts)):
+        if round(expected_cuts[j], 5) != computed_cuts[j]:
+            print(
+                "* Error at",
+                j,
+                "Expected=",
+                expected_cuts[j],
+                "Result=",
+                computed_cuts[j],
+            )
+            errors = True
+if errors:
+    raise Exception("There were errors!")
+print("*** All tests run succesfully! ***")