mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-20 18:06:00 +00:00
Fix BinDisc quantile mistakes
This commit is contained in:
39
tests/tests_do.py
Normal file
39
tests/tests_do.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from sklearn.preprocessing import KBinsDiscretizer
|
||||
|
||||
with open("datasets/tests.txt") as f:
|
||||
data = f.readlines()
|
||||
|
||||
data = [x.strip() for x in data if x[0] != "#"]
|
||||
|
||||
for i in range(0, len(data), 3):
|
||||
print("Experiment:", data[i])
|
||||
from_, to_, step_, n_bins_, strategy_ = data[i].split(",")
|
||||
strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
|
||||
disc = KBinsDiscretizer(
|
||||
n_bins=int(n_bins_),
|
||||
encode="ordinal",
|
||||
strategy=strategy,
|
||||
)
|
||||
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
|
||||
# result = disc.fit_transform(X)
|
||||
disc.fit(X)
|
||||
result = disc.transform(X)
|
||||
result = [int(x) for x in result.flatten()]
|
||||
expected = [int(x) for x in data[i + 1].split(",")]
|
||||
assert len(result) == len(expected)
|
||||
for j in range(len(result)):
|
||||
if result[j] != expected[j]:
|
||||
print("Error at", j, "Expected=", expected[j], "Result=", result[j])
|
||||
expected_cuts = disc.bin_edges_[0]
|
||||
computed_cuts = [float(x) for x in data[i + 2].split(",")]
|
||||
assert len(expected_cuts) == len(computed_cuts)
|
||||
for j in range(len(expected_cuts)):
|
||||
if round(expected_cuts[j], 5) != computed_cuts[j]:
|
||||
print(
|
||||
"Error at",
|
||||
j,
|
||||
"Expected=",
|
||||
expected_cuts[j],
|
||||
"Result=",
|
||||
computed_cuts[j],
|
||||
)
|
Reference in New Issue
Block a user