Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return raw outputs in TextClassificationPipeline #8328

Merged
merged 5 commits into from
Aug 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/transformers/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def __init__(self, **kwargs):
allowed_problem_types = ("regression", "single_label_classification", "multi_label_classification")
if self.problem_type is not None and self.problem_type not in allowed_problem_types:
raise ValueError(
f"The config parameter `problem_type` wasnot understood: received {self.problem_type}"
f"The config parameter `problem_type` was not understood: received {self.problem_type}"
"but only 'regression', 'single_label_classification' and 'multi_label_classification' are valid."
)

Expand Down
91 changes: 83 additions & 8 deletions src/transformers/pipelines/text_classification.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Optional

import numpy as np

from ..file_utils import add_end_docstrings, is_tf_available, is_torch_available
from ..file_utils import ExplicitEnum, add_end_docstrings, is_tf_available, is_torch_available
from .base import PIPELINE_INIT_ARGS, Pipeline


Expand All @@ -11,11 +13,35 @@
from ..models.auto.modeling_auto import MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING


def sigmoid(_outputs):
return 1.0 / (1.0 + np.exp(-_outputs))


def softmax(_outputs):
maxes = np.max(_outputs, axis=-1, keepdims=True)
shifted_exp = np.exp(_outputs - maxes)
return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)


class ClassificationFunction(ExplicitEnum):
SIGMOID = "sigmoid"
SOFTMAX = "softmax"
NONE = "none"


@add_end_docstrings(
PIPELINE_INIT_ARGS,
r"""
return_all_scores (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether to return all prediction scores or just the one of the predicted class.
function_to_apply (:obj:`str`, `optional`, defaults to :obj:`"default"`):
The function to apply to the model outputs in order to retrieve the scores. Accepts four different values:

- :obj:`"default"`: if the model has a single label, will apply the sigmoid function on the output. If the
model has several labels, will apply the softmax function on the output.
- :obj:`"sigmoid"`: Applies the sigmoid function on the output.
- :obj:`"softmax"`: Applies the softmax function on the output.
- :obj:`"none"`: Does not apply any function on the output.
""",
)
class TextClassificationPipeline(Pipeline):
Expand All @@ -35,7 +61,9 @@ class TextClassificationPipeline(Pipeline):
<https://huggingface.co/models?filter=text-classification>`__.
"""

def __init__(self, return_all_scores: bool = False, **kwargs):
task = "text-classification"

def __init__(self, return_all_scores: bool = None, function_to_apply: str = None, **kwargs):
super().__init__(**kwargs)

self.check_model_type(
Expand All @@ -44,15 +72,45 @@ def __init__(self, return_all_scores: bool = False, **kwargs):
else MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
)

self.return_all_scores = return_all_scores
if hasattr(self.model.config, "return_all_scores") and return_all_scores is None:
return_all_scores = self.model.config.return_all_scores

if hasattr(self.model.config, "function_to_apply") and function_to_apply is None:
function_to_apply = self.model.config.function_to_apply

def __call__(self, *args, **kwargs):
self.return_all_scores = return_all_scores if return_all_scores is not None else False
self.function_to_apply = function_to_apply if function_to_apply is not None else None

def __call__(
self,
*args,
return_all_scores: Optional[bool] = None,
function_to_apply: Optional[ClassificationFunction] = None,
**kwargs
):
"""
Classify the text(s) given as inputs.

Args:
args (:obj:`str` or :obj:`List[str]`):
One or several texts (or one list of prompts) to classify.
return_all_scores (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether to return scores for all labels.
function_to_apply (:obj:`str`, `optional`, defaults to :obj:`"default"`):
The function to apply to the model outputs in order to retrieve the scores. Accepts four different
values:

If this argument is not specified, then it will apply the following functions according to the number
of labels:

- If the model has a single label, will apply the sigmoid function on the output.
- If the model has several labels, will apply the softmax function on the output.

Possible values are:

- :obj:`"sigmoid"`: Applies the sigmoid function on the output.
- :obj:`"softmax"`: Applies the softmax function on the output.
- :obj:`"none"`: Does not apply any function on the output.

Return:
A list or a list of list of :obj:`dict`: Each result comes as list of dictionaries with the following keys:
Expand All @@ -64,11 +122,28 @@ def __call__(self, *args, **kwargs):
"""
outputs = super().__call__(*args, **kwargs)

if self.model.config.num_labels == 1:
scores = 1.0 / (1.0 + np.exp(-outputs))
return_all_scores = return_all_scores if return_all_scores is not None else self.return_all_scores
function_to_apply = function_to_apply if function_to_apply is not None else self.function_to_apply

if function_to_apply is None:
if self.model.config.problem_type == "multi_label_classification" or self.model.config.num_labels == 1:
function_to_apply = ClassificationFunction.SIGMOID
elif self.model.config.problem_type == "single_label_classification" or self.model.config.num_labels > 1:
function_to_apply = ClassificationFunction.SOFTMAX

if isinstance(function_to_apply, str):
function_to_apply = ClassificationFunction[function_to_apply.upper()]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we try except with clean error message, or is the current exception good enough ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking the exception is good enough, but happy to update if you feel strongly

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No no I think it's fine, I didn't check what the actual message was, in the previous iteration it was explicit so I was wondering if it was ok here. I trust you there.


if function_to_apply == ClassificationFunction.SIGMOID:
scores = sigmoid(outputs)
elif function_to_apply == ClassificationFunction.SOFTMAX:
scores = softmax(outputs)
elif function_to_apply == ClassificationFunction.NONE:
scores = outputs
else:
scores = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True)
if self.return_all_scores:
raise ValueError(f"Unrecognized `function_to_apply` argument: {function_to_apply}")

if return_all_scores:
return [
[{"label": self.model.config.id2label[i], "score": score.item()} for i, score in enumerate(item)]
for item in scores
Expand Down