diff --git a/benchmark/Experiments.py b/benchmark/Experiments.py index 3696ffe..6865f98 100644 --- a/benchmark/Experiments.py +++ b/benchmark/Experiments.py @@ -227,14 +227,11 @@ class Experiment: if not self.ignore_nan: print(res["test_score"]) raise ValueError("NaN in results") - results = [] - for item in res["test_score"]: - if not np.isnan(item): - results.append(item) + results = res["test_score"][~np.isnan(res["test_score"])] else: results = res["test_score"] - self.scores.append(results) - self.times.append(res["fit_time"]) + self.scores.extend(results) + self.times.extend(res["fit_time"]) for result_item in res["estimator"]: nodes_item, leaves_item, depth_item = Models.get_complexity( self.model_name, result_item diff --git a/benchmark/Models.py b/benchmark/Models.py index 68df171..8c03ae1 100644 --- a/benchmark/Models.py +++ b/benchmark/Models.py @@ -15,6 +15,19 @@ from xgboost import XGBClassifier import sklearn import xgboost +import random + + +class MockModel(SVC): + # Only used for testing + def predict(self, X): + if random.random() < 0.1: + return [float("NaN")] * len(X) + return super().predict(X) + + def nodes_leaves(self): + return 0, 0 + class Models: @staticmethod @@ -50,6 +63,7 @@ class Models: ), "GBC": GradientBoostingClassifier(random_state=random_state), "RandomForest": RandomForestClassifier(random_state=random_state), + "Mock": MockModel(random_state=random_state), } @staticmethod diff --git a/benchmark/tests/Experiment_test.py b/benchmark/tests/Experiment_test.py index 722052b..7ef9ff7 100644 --- a/benchmark/tests/Experiment_test.py +++ b/benchmark/tests/Experiment_test.py @@ -1,4 +1,6 @@ import json +from io import StringIO +from unittest.mock import patch from .TestBase import TestBase from ..Experiments import Experiment from ..Datasets import Datasets @@ -8,7 +10,9 @@ class ExperimentTest(TestBase): def setUp(self): self.exp = self.build_exp() - def build_exp(self, hyperparams=False, grid=False, model="STree"): + def build_exp( + self, hyperparams=False, grid=False, model="STree", ignore_nan=False + ): params = { "score_name": "accuracy", "model_name": model, @@ -21,7 +25,7 @@ class ExperimentTest(TestBase): "title": "Test", "progress_bar": False, "folds": 2, - "ignore_nan": False, + "ignore_nan": ignore_nan, } return Experiment(**params) @@ -157,3 +161,15 @@ class ExperimentTest(TestBase): self.assertEqual(computed["state_names"][key], value) for feature in expected["features"]: self.assertIn(feature, computed["features"]) + + @patch("sys.stdout", new_callable=StringIO) + def test_experiment_with_nan_not_ignored(self, mock_output): + exp = self.build_exp(model="Mock") + self.assertRaises(ValueError, exp.do_experiment) + output_text = mock_output.getvalue().splitlines() + expected = "[ nan 0.8974359]" + self.assertEqual(expected, output_text[0]) + + def test_experiment_with_nan_ignored(self): + self.exp = self.build_exp(model="Mock", ignore_nan=True) + self.exp.do_experiment()