Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow parenthesized implicitly concatenated strings inside calls, to be more compatible with Black. #8590

Merged
merged 8 commits into from
Jun 18, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whatsnew/fragments/8552.false_positive
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Allow parenthesized implicitly concatenated strings when `check-str-concat-over-line-jumps` is enabled.

Closes #8552.
74 changes: 70 additions & 4 deletions pylint/checkers/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@
"Rb",
"RB",
}
_PAREN_IGNORE_TOKEN_TYPES = (
tokenize.NEWLINE,
tokenize.NL,
tokenize.COMMENT,
)
SINGLE_QUOTED_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?'''")
DOUBLE_QUOTED_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?\"\"\"")
QUOTE_DELIMITER_REGEX = re.compile(f"({'|'.join(_PREFIXES)})?(\"|')", re.DOTALL)
Expand Down Expand Up @@ -716,6 +721,7 @@ def __init__(self, linter: PyLinter) -> None:
tuple[int, int], tuple[str, tokenize.TokenInfo | None]
] = {}
"""Token position -> (token value, next token)."""
self._parenthesized_string_tokens: dict[tuple[int, int], bool] = {}

def process_module(self, node: nodes.Module) -> None:
self._unicode_literals = "unicode_literals" in node.future_imports
Expand Down Expand Up @@ -744,10 +750,62 @@ def process_tokens(self, tokens: list[tokenize.TokenInfo]) -> None:
# to match with astroid `.col_offset`
start = (start[0], len(line[: start[1]].encode(encoding)))
self.string_tokens[start] = (str_eval(token), next_token)
is_parenthesized = self._is_initial_string_token(
i, tokens
) and self._is_parenthesised(i, tokens)
jacobtylerwalls marked this conversation as resolved.
Show resolved Hide resolved
self._parenthesized_string_tokens[start] = is_parenthesized

if self.linter.config.check_quote_consistency:
self.check_for_consistent_string_delimiters(tokens)

def _is_initial_string_token(
self, index: int, tokens: Sequence[tokenize.TokenInfo]
) -> bool:
# Must NOT be preceded by a string literal
prev_token = self._find_prev_token(index, tokens)
if prev_token and prev_token.type == tokenize.STRING:
return False
# Must be followed by a string literal token.
next_token = self._find_next_token(index, tokens)
return bool(next_token and next_token.type == tokenize.STRING)

def _is_parenthesised(self, index: int, tokens: list[tokenize.TokenInfo]) -> bool:
jacobtylerwalls marked this conversation as resolved.
Show resolved Hide resolved
prev_token = self._find_prev_token(
index, tokens, ignore=(*_PAREN_IGNORE_TOKEN_TYPES, tokenize.STRING)
)
if not prev_token or prev_token.type != tokenize.OP or prev_token[1] != "(":
return False
next_token = self._find_next_token(
index, tokens, ignore=(*_PAREN_IGNORE_TOKEN_TYPES, tokenize.STRING)
)
return bool(
next_token and next_token.type == tokenize.OP and next_token[1] == ")"
)

def _find_prev_token(
self,
index: int,
tokens: Sequence[tokenize.TokenInfo],
*,
ignore: tuple[int, ...] = _PAREN_IGNORE_TOKEN_TYPES,
) -> tokenize.TokenInfo | None:
i = index - 1
while i >= 0 and tokens[i].type in ignore:
i -= 1
return tokens[i] if i >= 0 else None

def _find_next_token(
self,
index: int,
tokens: Sequence[tokenize.TokenInfo],
*,
ignore: tuple[int, ...] = _PAREN_IGNORE_TOKEN_TYPES,
) -> tokenize.TokenInfo | None:
i = index + 1
while i < len(tokens) and tokens[i].type in ignore:
i += 1
return tokens[i] if i < len(tokens) else None

@only_required_for_messages("implicit-str-concat")
def visit_call(self, node: nodes.Call) -> None:
self.check_for_concatenated_strings(node.args, "call")
Expand Down Expand Up @@ -821,10 +879,18 @@ def check_for_concatenated_strings(
matching_token, next_token = self.string_tokens[token_index]
# We detect string concatenation: the AST Const is the
# combination of 2 string tokens
if matching_token != elt.value and next_token is not None:
if next_token.type == tokenize.STRING and (
next_token.start[0] == elt.lineno
or self.linter.config.check_str_concat_over_line_jumps
if (
matching_token != elt.value
and next_token is not None
and next_token.type == tokenize.STRING
):
if next_token.start[0] == elt.lineno or (
self.linter.config.check_str_concat_over_line_jumps
# Allow implicitly concatenated strings in in parens.
jacobtylerwalls marked this conversation as resolved.
Show resolved Hide resolved
# See https://github.com/pylint-dev/pylint/issues/8552.
and not self._parenthesized_string_tokens.get(
(elt.lineno, elt.col_offset)
)
):
self.add_message(
"implicit-str-concat",
Expand Down
39 changes: 38 additions & 1 deletion tests/functional/i/implicit/implicit_str_concat_multiline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,46 @@
TEST_TUPLE = ('a', 'b' # [implicit-str-concat]
'c')

# See https://github.com/pylint-dev/pylint/issues/8552.
PARENTHESIZED_IS_OK = [
"a",
(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
" sed do eiusmod tempor incididunt ut labore et dolore "
),
]

# Single argument without trailing comma is OK:
print(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit," # [implicit-str-concat]
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
" sed do eiusmod tempor incididunt ut labore et dolore "
"magna aliqua. Ut enim ad minim veniam, quis nostrud "
"exercitation ullamco laboris nisi ut aliquip ex ea "
)

# Implicit concatenated strings on the same line always raises:
print(
"Lorem ipsum dolor sit amet, ""consectetur adipiscing elit," # [implicit-str-concat]
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
" sed do eiusmod tempor incididunt ut labore et dolore "
"magna aliqua. Ut enim ad minim veniam, quis nostrud "
"exercitation ullamco laboris nisi ut aliquip ex ea "
)

# Explicitly wrapping in parens with a trailing comma is OK:
print(
(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
" sed do eiusmod tempor incididunt ut labore et dolore "
"magna aliqua. Ut enim ad minim veniam, quis nostrud "
"exercitation ullamco laboris nisi ut aliquip ex ea "
),
)

# But NOT OK when there is a trailing comma and NOT wrapped in parens:
print(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit," # [implicit-str-concat]
" sed do eiusmod tempor incididunt ut labore et dolore "
"magna aliqua. Ut enim ad minim veniam, quis nostrud "
"exercitation ullamco laboris nisi ut aliquip ex ea ",
)
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
implicit-str-concat:3:0:None:None::Implicit string concatenation found in tuple:HIGH
implicit-str-concat:7:0:None:None::Implicit string concatenation found in call:HIGH
implicit-str-concat:25:0:None:None::Implicit string concatenation found in call:HIGH
implicit-str-concat:44:0:None:None::Implicit string concatenation found in call:HIGH