mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-17 08:25:53 +00:00
101 lines
3.3 KiB
Python
101 lines
3.3 KiB
Python
from .TestBase import TestBase
|
|
from ..Experiments import Randomized
|
|
from ..Datasets import Datasets
|
|
|
|
|
|
class DatasetTest(TestBase):
|
|
def setUp(self):
|
|
self.datasets_values = {
|
|
"balance-scale": (625, 4, 3),
|
|
"balloons": (16, 4, 2),
|
|
"iris": (150, 4, 3),
|
|
"wine": (178, 13, 3),
|
|
}
|
|
|
|
def tearDown(self) -> None:
|
|
self.set_env(".env.dist")
|
|
return super().tearDown()
|
|
|
|
def test_Randomized(self):
|
|
expected = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
|
self.assertSequenceEqual(Randomized.seeds(), expected)
|
|
|
|
def test_Randomized_3_seeds(self):
|
|
self.set_env(".env.arff")
|
|
expected = [271, 314, 171]
|
|
self.assertSequenceEqual(Randomized.seeds(), expected)
|
|
|
|
def test_load_dataframe(self):
|
|
self.set_env(".env.arff")
|
|
dt = Datasets()
|
|
X, y = dt.load("iris", dataframe=False)
|
|
dataset = dt.load("iris", dataframe=True)
|
|
class_name = dt.get_class_name()
|
|
features = dt.get_features()
|
|
self.assertListEqual(y.tolist(), dataset[class_name].tolist())
|
|
for i in range(len(features)):
|
|
self.assertListEqual(
|
|
X[:, i].tolist(), dataset[features[i]].tolist()
|
|
)
|
|
|
|
def test_Datasets_iterator(self):
|
|
test = {
|
|
".env.dist": ["balance-scale", "balloons"],
|
|
".env.surcov": ["iris", "wine"],
|
|
".env.arff": ["iris", "wine"],
|
|
}
|
|
for key, value in test.items():
|
|
self.set_env(key)
|
|
dt = Datasets()
|
|
computed = []
|
|
for dataset in dt:
|
|
computed.append(dataset)
|
|
X, y = dt.load(dataset)
|
|
m, n = X.shape
|
|
c = max(y) + 1
|
|
# Check dataset integrity
|
|
self.assertSequenceEqual(
|
|
(m, n, c), self.datasets_values[dataset]
|
|
)
|
|
self.assertSequenceEqual(computed, value)
|
|
self.set_env(".env.dist")
|
|
|
|
def test_load_dataset(self):
|
|
dt = Datasets()
|
|
X, y = dt.load("balance-scale")
|
|
self.assertSequenceEqual(X.shape, (625, 4))
|
|
self.assertSequenceEqual(y.shape, (625,))
|
|
|
|
def test_create_with_unknown_dataset(self):
|
|
with self.assertRaises(ValueError) as msg:
|
|
Datasets("unknown")
|
|
self.assertEqual(str(msg.exception), "Unknown dataset: unknown")
|
|
|
|
def test_load_unknown_dataset(self):
|
|
dt = Datasets()
|
|
with self.assertRaises(ValueError) as msg:
|
|
dt.load("unknown")
|
|
self.assertEqual(str(msg.exception), "Unknown dataset: unknown")
|
|
|
|
def test_Datasets_subset(self):
|
|
test = {
|
|
".env.dist": "balloons",
|
|
".env.surcov": "wine",
|
|
".env.arff": "iris",
|
|
}
|
|
for key, value in test.items():
|
|
self.set_env(key)
|
|
dt = Datasets(value)
|
|
computed = []
|
|
for dataset in dt:
|
|
computed.append(dataset)
|
|
X, y = dt.load(dataset)
|
|
m, n = X.shape
|
|
c = max(y) + 1
|
|
# Check dataset integrity
|
|
self.assertSequenceEqual(
|
|
(m, n, c), self.datasets_values[dataset]
|
|
)
|
|
self.assertSequenceEqual(computed, [value])
|
|
self.set_env(".env.dist")
|