Files
mdlp/tests/tests_do.py

40 lines
1.4 KiB
Python

from sklearn.preprocessing import KBinsDiscretizer
with open("datasets/tests.txt") as f:
data = f.readlines()
data = [x.strip() for x in data if x[0] != "#"]
for i in range(0, len(data), 3):
print("Experiment:", data[i])
from_, to_, step_, n_bins_, strategy_ = data[i].split(",")
strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
disc = KBinsDiscretizer(
n_bins=int(n_bins_),
encode="ordinal",
strategy=strategy,
)
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
# result = disc.fit_transform(X)
disc.fit(X)
result = disc.transform(X)
result = [int(x) for x in result.flatten()]
expected = [int(x) for x in data[i + 1].split(",")]
assert len(result) == len(expected)
for j in range(len(result)):
if result[j] != expected[j]:
print("Error at", j, "Expected=", expected[j], "Result=", result[j])
expected_cuts = disc.bin_edges_[0]
computed_cuts = [float(x) for x in data[i + 2].split(",")]
assert len(expected_cuts) == len(computed_cuts)
for j in range(len(expected_cuts)):
if round(expected_cuts[j], 5) != computed_cuts[j]:
print(
"Error at",
j,
"Expected=",
expected_cuts[j],
"Result=",
computed_cuts[j],
)