mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 07:25:56 +00:00
* Fix BinDisc quantile mistakes * Fix FImdlp tests * Fix tests, samples and remove uneeded support files * Add coypright header to sources Fix coverage report Add coverage badge to README * Update sonar github action * Move sources to a folder and change ArffFiles files to library * Add recursive submodules to github action
72 lines
2.3 KiB
Python
72 lines
2.3 KiB
Python
# ***************************************************************
|
|
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
|
# SPDX-FileType: SOURCE
|
|
# SPDX-License-Identifier: MIT
|
|
# ***************************************************************
|
|
|
|
import json
|
|
from sklearn.preprocessing import KBinsDiscretizer
|
|
|
|
with open("datasets/tests.txt") as f:
|
|
data = f.readlines()
|
|
|
|
data = [x.strip() for x in data if x[0] != "#"]
|
|
|
|
errors = False
|
|
for i in range(0, len(data), 4):
|
|
experiment_type = data[i]
|
|
print("Experiment:", data[i + 1])
|
|
if experiment_type == "RANGE":
|
|
range_data = data[i + 1]
|
|
from_, to_, step_, n_bins_, strategy_ = range_data.split(",")
|
|
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
|
|
else:
|
|
strategy_ = data[i + 1][0]
|
|
n_bins_ = data[i + 1][1]
|
|
vector = data[i + 1][2:]
|
|
X = [[float(x)] for x in json.loads(vector)]
|
|
|
|
strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
|
|
disc = KBinsDiscretizer(
|
|
n_bins=int(n_bins_),
|
|
encode="ordinal",
|
|
strategy=strategy,
|
|
)
|
|
expected_data = data[i + 2]
|
|
cuts_data = data[i + 3]
|
|
disc.fit(X)
|
|
#
|
|
# Normalize the cutpoints to remove numerical errors such as 33.0000000001
|
|
# instead of 33
|
|
#
|
|
for j in range(len(disc.bin_edges_[0])):
|
|
disc.bin_edges_[0][j] = round(disc.bin_edges_[0][j], 5)
|
|
result = disc.transform(X)
|
|
result = [int(x) for x in result.flatten()]
|
|
expected = [int(x) for x in expected_data.split(",")]
|
|
#
|
|
# Check the Results
|
|
#
|
|
assert len(result) == len(expected)
|
|
for j in range(len(result)):
|
|
if result[j] != expected[j]:
|
|
print("* Error at", j, "Expected=", expected[j], "Result=", result[j])
|
|
errors = True
|
|
expected_cuts = disc.bin_edges_[0]
|
|
computed_cuts = [float(x) for x in cuts_data.split(",")]
|
|
assert len(expected_cuts) == len(computed_cuts)
|
|
for j in range(len(expected_cuts)):
|
|
if round(expected_cuts[j], 5) != computed_cuts[j]:
|
|
print(
|
|
"* Error at",
|
|
j,
|
|
"Expected=",
|
|
expected_cuts[j],
|
|
"Result=",
|
|
computed_cuts[j],
|
|
)
|
|
errors = True
|
|
if errors:
|
|
raise Exception("There were errors!")
|
|
print("*** All tests run succesfully! ***")
|