mirror of
https://github.com/rmontanana/mdlp.git
synced 2025-08-15 23:45:57 +00:00
Fix BinDisc quantile mistakes (#9)
* Fix BinDisc quantile mistakes * Fix FImdlp tests * Fix tests, samples and remove uneeded support files * Add coypright header to sources Fix coverage report Add coverage badge to README * Update sonar github action * Move sources to a folder and change ArffFiles files to library * Add recursive submodules to github action
This commit is contained in:
committed by
GitHub
parent
7b0673fd4b
commit
e36d9af8f9
71
tests/tests_do.py
Normal file
71
tests/tests_do.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# ***************************************************************
|
||||
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
# SPDX-FileType: SOURCE
|
||||
# SPDX-License-Identifier: MIT
|
||||
# ***************************************************************
|
||||
|
||||
import json
|
||||
from sklearn.preprocessing import KBinsDiscretizer
|
||||
|
||||
with open("datasets/tests.txt") as f:
|
||||
data = f.readlines()
|
||||
|
||||
data = [x.strip() for x in data if x[0] != "#"]
|
||||
|
||||
errors = False
|
||||
for i in range(0, len(data), 4):
|
||||
experiment_type = data[i]
|
||||
print("Experiment:", data[i + 1])
|
||||
if experiment_type == "RANGE":
|
||||
range_data = data[i + 1]
|
||||
from_, to_, step_, n_bins_, strategy_ = range_data.split(",")
|
||||
X = [[float(x)] for x in range(int(from_), int(to_), int(step_))]
|
||||
else:
|
||||
strategy_ = data[i + 1][0]
|
||||
n_bins_ = data[i + 1][1]
|
||||
vector = data[i + 1][2:]
|
||||
X = [[float(x)] for x in json.loads(vector)]
|
||||
|
||||
strategy = "quantile" if strategy_.strip() == "Q" else "uniform"
|
||||
disc = KBinsDiscretizer(
|
||||
n_bins=int(n_bins_),
|
||||
encode="ordinal",
|
||||
strategy=strategy,
|
||||
)
|
||||
expected_data = data[i + 2]
|
||||
cuts_data = data[i + 3]
|
||||
disc.fit(X)
|
||||
#
|
||||
# Normalize the cutpoints to remove numerical errors such as 33.0000000001
|
||||
# instead of 33
|
||||
#
|
||||
for j in range(len(disc.bin_edges_[0])):
|
||||
disc.bin_edges_[0][j] = round(disc.bin_edges_[0][j], 5)
|
||||
result = disc.transform(X)
|
||||
result = [int(x) for x in result.flatten()]
|
||||
expected = [int(x) for x in expected_data.split(",")]
|
||||
#
|
||||
# Check the Results
|
||||
#
|
||||
assert len(result) == len(expected)
|
||||
for j in range(len(result)):
|
||||
if result[j] != expected[j]:
|
||||
print("* Error at", j, "Expected=", expected[j], "Result=", result[j])
|
||||
errors = True
|
||||
expected_cuts = disc.bin_edges_[0]
|
||||
computed_cuts = [float(x) for x in cuts_data.split(",")]
|
||||
assert len(expected_cuts) == len(computed_cuts)
|
||||
for j in range(len(expected_cuts)):
|
||||
if round(expected_cuts[j], 5) != computed_cuts[j]:
|
||||
print(
|
||||
"* Error at",
|
||||
j,
|
||||
"Expected=",
|
||||
expected_cuts[j],
|
||||
"Result=",
|
||||
computed_cuts[j],
|
||||
)
|
||||
errors = True
|
||||
if errors:
|
||||
raise Exception("There were errors!")
|
||||
print("*** All tests run succesfully! ***")
|
Reference in New Issue
Block a user