mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-20 18:06:00 +00:00
40 lines
1.4 KiB
Python
40 lines
1.4 KiB
Python
from sklearn.preprocessing import KBinsDiscretizer
|
|
|
|
with open("datasets/tests.txt") as f:
|
|
data = f.readlines()
|
|
|
|
data = [x.strip() for x in data if x[0] != "#"]
|
|
|
|
for i in range(0, len(data), 3):
|
|
print("Experiment:", data[i])
|
|
from_, to_, step_, n_bins_, strategy_ = data[i].split(",")
|
|
strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
|
|
disc = KBinsDiscretizer(
|
|
n_bins=int(n_bins_),
|
|
encode="ordinal",
|
|
strategy=strategy,
|
|
)
|
|
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
|
|
# result = disc.fit_transform(X)
|
|
disc.fit(X)
|
|
result = disc.transform(X)
|
|
result = [int(x) for x in result.flatten()]
|
|
expected = [int(x) for x in data[i + 1].split(",")]
|
|
assert len(result) == len(expected)
|
|
for j in range(len(result)):
|
|
if result[j] != expected[j]:
|
|
print("Error at", j, "Expected=", expected[j], "Result=", result[j])
|
|
expected_cuts = disc.bin_edges_[0]
|
|
computed_cuts = [float(x) for x in data[i + 2].split(",")]
|
|
assert len(expected_cuts) == len(computed_cuts)
|
|
for j in range(len(expected_cuts)):
|
|
if round(expected_cuts[j], 5) != computed_cuts[j]:
|
|
print(
|
|
"Error at",
|
|
j,
|
|
"Expected=",
|
|
expected_cuts[j],
|
|
"Result=",
|
|
computed_cuts[j],
|
|
)
|