From 1cc14b16a5394e98bd64ad0aa39562af5c89b94d Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sun, 5 May 2024 21:42:00 +0200 Subject: [PATCH] feat: `Regressor.summarize_metrics` and `Classifier.summarize_metrics` (#729) Closes #713 ### Summary of Changes Add `Regressor.summarize_metrics` and `Classifier.summarize_metrics` to quickly check suitable metrics. --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> --- .../classical/classification/_classifier.py | 38 +++++++++++- .../ml/classical/regression/_regressor.py | 58 ++++++++++++++----- .../classification/test_classifier.py | 46 +++++++++++++++ .../ml/classical/regression/test_regressor.py | 46 +++++++++++++++ 4 files changed, 174 insertions(+), 14 deletions(-) diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index 1ad5792b3..c9a05cff3 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -92,7 +92,43 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: The sklearn Classifier. """ - # noinspection PyProtectedMember + # ------------------------------------------------------------------------------------------------------------------ + # Metrics + # ------------------------------------------------------------------------------------------------------------------ + + def summarize_metrics(self, validation_or_test_set: TabularDataset, positive_class: Any) -> Table: + """ + Summarize the classifier's metrics on the given data. + + Parameters + ---------- + validation_or_test_set: + The validation or test set. + positive_class: + The class to be considered positive. All other classes are considered negative. + + Returns + ------- + metrics: + A table containing the classifier's metrics. + + Raises + ------ + TypeError + If a table is passed instead of a tabular dataset. + """ + accuracy = self.accuracy(validation_or_test_set) + precision = self.precision(validation_or_test_set, positive_class) + recall = self.recall(validation_or_test_set, positive_class) + f1_score = self.f1_score(validation_or_test_set, positive_class) + + return Table( + { + "metric": ["accuracy", "precision", "recall", "f1_score"], + "value": [accuracy, precision, recall, f1_score], + }, + ) + def accuracy(self, validation_or_test_set: TabularDataset) -> float: """ Compute the accuracy of the classifier on the given data. diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 618f68ce7..1779bbb0e 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -90,10 +90,13 @@ def _get_sklearn_regressor(self) -> RegressorMixin: The sklearn Regressor. """ - # noinspection PyProtectedMember - def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float: + # ------------------------------------------------------------------------------------------------------------------ + # Metrics + # ------------------------------------------------------------------------------------------------------------------ + + def summarize_metrics(self, validation_or_test_set: TabularDataset) -> Table: """ - Compute the mean squared error (MSE) on the given data. + Summarize the regressor's metrics on the given data. Parameters ---------- @@ -102,15 +105,44 @@ def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float: Returns ------- - mean_squared_error: - The calculated mean squared error (the average of the distance of each individual row squared). + metrics: + A table containing the regressor's metrics. Raises ------ TypeError If a table is passed instead of a tabular dataset. """ - from sklearn.metrics import mean_squared_error as sk_mean_squared_error + mean_absolute_error = self.mean_absolute_error(validation_or_test_set) + mean_squared_error = self.mean_squared_error(validation_or_test_set) + + return Table( + { + "metric": ["mean_absolute_error", "mean_squared_error"], + "value": [mean_absolute_error, mean_squared_error], + }, + ) + + def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float: + """ + Compute the mean absolute error (MAE) of the regressor on the given data. + + Parameters + ---------- + validation_or_test_set: + The validation or test set. + + Returns + ------- + mean_absolute_error: + The calculated mean absolute error (the average of the distance of each individual row). + + Raises + ------ + TypeError + If a table is passed instead of a tabular dataset. + """ + from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table): raise PlainTableError @@ -118,12 +150,12 @@ def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float: predicted = self.predict(validation_or_test_set.features).target _check_metrics_preconditions(predicted, expected) - return sk_mean_squared_error(expected._data, predicted._data) + return sk_mean_absolute_error(expected._data, predicted._data) # noinspection PyProtectedMember - def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float: + def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float: """ - Compute the mean absolute error (MAE) of the regressor on the given data. + Compute the mean squared error (MSE) on the given data. Parameters ---------- @@ -132,15 +164,15 @@ def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float: Returns ------- - mean_absolute_error: - The calculated mean absolute error (the average of the distance of each individual row). + mean_squared_error: + The calculated mean squared error (the average of the distance of each individual row squared). Raises ------ TypeError If a table is passed instead of a tabular dataset. """ - from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error + from sklearn.metrics import mean_squared_error as sk_mean_squared_error if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table): raise PlainTableError @@ -148,7 +180,7 @@ def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float: predicted = self.predict(validation_or_test_set.features).target _check_metrics_preconditions(predicted, expected) - return sk_mean_absolute_error(expected._data, predicted._data) + return sk_mean_squared_error(expected._data, predicted._data) # noinspection PyProtectedMember diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py index 5cba32cc7..4eb86da73 100644 --- a/tests/safeds/ml/classical/classification/test_classifier.py +++ b/tests/safeds/ml/classical/classification/test_classifier.py @@ -336,6 +336,52 @@ def _get_sklearn_classifier(self) -> ClassifierMixin: pass +class TestSummarizeMetrics: + @pytest.mark.parametrize( + ("predicted", "expected", "result"), + [ + ( + [1, 2], + [1, 2], + Table( + { + "metric": ["accuracy", "precision", "recall", "f1_score"], + "value": [1.0, 1.0, 1.0, 1.0], + }, + ), + ), + ], + ) + def test_valid_data(self, predicted: list[float], expected: list[float], result: Table) -> None: + table = Table( + { + "predicted": predicted, + "expected": expected, + }, + ).to_tabular_dataset( + target_name="expected", + ) + + assert DummyClassifier().summarize_metrics(table, 1) == result + + @pytest.mark.parametrize( + "table", + [ + Table( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": [0.0, 1.0, 1.0, 0.0], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ), + ], + ids=["table"], + ) + def test_should_raise_if_given_normal_table(self, table: Table) -> None: + with pytest.raises(PlainTableError): + DummyClassifier().summarize_metrics(table, 1) # type: ignore[arg-type] + + class TestAccuracy: def test_with_same_type(self) -> None: table = Table( diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py index 1a073883f..90af36e63 100644 --- a/tests/safeds/ml/classical/regression/test_regressor.py +++ b/tests/safeds/ml/classical/regression/test_regressor.py @@ -343,6 +343,52 @@ def _get_sklearn_regressor(self) -> RegressorMixin: pass +class TestSummarizeMetrics: + @pytest.mark.parametrize( + ("predicted", "expected", "result"), + [ + ( + [1, 2], + [1, 2], + Table( + { + "metric": ["mean_absolute_error", "mean_squared_error"], + "value": [0.0, 0.0], + }, + ), + ), + ], + ) + def test_valid_data(self, predicted: list[float], expected: list[float], result: Table) -> None: + table = Table( + { + "predicted": predicted, + "expected": expected, + }, + ).to_tabular_dataset( + target_name="expected", + ) + + assert DummyRegressor().summarize_metrics(table) == result + + @pytest.mark.parametrize( + "table", + [ + Table( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": [0.0, 1.0, 1.0, 0.0], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ), + ], + ids=["table"], + ) + def test_should_raise_if_given_normal_table(self, table: Table) -> None: + with pytest.raises(PlainTableError): + DummyRegressor().summarize_metrics(table) # type: ignore[arg-type] + + class TestMeanAbsoluteError: @pytest.mark.parametrize( ("predicted", "expected", "result"),