Refactor split_class in _parsing.py

Instead of using the tokenizer to find the end of the "class ... :", i.e. before the begining of the class body, use the information from the node itself. This works because: * node.lineno will contain the line where the "class ... :" is, even if the class has comments or decorators above * node.body[0].lineno contains the line where the body begins
bwhmather · May 3, 2024 · 85c7bf6 · 85c7bf6
1 parent 2025243
commit 85c7bf6
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 59 deletions.
diff --git a/src/ssort/_parsing.py b/src/ssort/_parsing.py
@@ -1,13 +1,23 @@
 import ast
 import warnings
-from io import StringIO
-from token import NAME
-from tokenize import generate_tokens
 
 from ssort._exceptions import ParseError
 from ssort._statements import Statement
 
 
+def _build_row_lengths_offsets(text):
+    # Build an index of row lengths and start offsets to enable fast string
+    # indexing using ast row/column coordinates.
+    row_lengths = []
+    row_offsets = [0]
+    for offset, char in enumerate(text):
+        if char == "\n":
+            row_lengths.append(offset - row_offsets[-1])
+            row_offsets.append(offset + 1)
+    row_lengths.append(len(text) - row_offsets[-1])
+    return row_lengths, row_offsets
+
+
 def _find_start(node):
     if (
         isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef))
@@ -29,17 +39,8 @@ def split(
     nodes,
     next_row=0,
     next_col=0,
-    indent=0,
 ):
-    # Build an index of row lengths and start offsets to enable fast string
-    # indexing using ast row/column coordinates.
-    row_lengths = []
-    row_offsets = [0]
-    for offset, char in enumerate(root_text):
-        if char == "\n":
-            row_lengths.append(offset - row_offsets[-1])
-            row_offsets.append(offset + 1)
-    row_lengths.append(len(root_text) - row_offsets[-1])
+    row_lengths, row_offsets = _build_row_lengths_offsets(root_text)
 
     nodes = iter(nodes)
 
@@ -106,53 +107,13 @@ def split_class(statement):
     text = statement.text
     text_padded = statement.text_padded()
 
-    # Build an index of row lengths and start offsets to enable fast string
-    # indexing using ast row/column coordinates.
-    row_lengths = []
-    row_offsets = [0]
-    for offset, char in enumerate(text_padded):
-        if char == "\n":
-            row_lengths.append(offset - row_offsets[-1])
-            row_offsets.append(offset + 1)
-    row_lengths.append(len(text_padded) - row_offsets[-1])
-
-    tokens = iter(generate_tokens(StringIO(text_padded).readline))
-
-    for token in tokens:
-        lineno, col_offset = token.start
-        if lineno == node.lineno and col_offset == node.col_offset:
-            assert token.string == "class"
-            break
-
-    token = next(tokens)
-    assert token.type == NAME
-
-    token = next(tokens)
-    if token.string == "[":
-        token = next(tokens)
-        depth = 1
-        while depth:
-            if token.string == "[":
-                depth += 1
-            if token.string == "]":
-                depth -= 1
-            token = next(tokens)
-    if token.string == "(":
-        token = next(tokens)
-        depth = 1
-        while depth:
-            if token.string == "(":
-                depth += 1
-            if token.string == ")":
-                depth -= 1
-            token = next(tokens)
-
-    assert token.string == ":"
-
-    if node.body[0].lineno == token.end[0]:
+    _, row_offsets = _build_row_lengths_offsets(text_padded)
+
+    if node.lineno == node.body[0].lineno:
         # All tokens are on the same line.  `split` won't know how to indent
         # them so we do it ourselves.
-        head_end_lineno, head_end_col = token.end
+        head_end_lineno = node.lineno
+        head_end_col = node.body[0].col_offset - 1
         head_end_row = head_end_lineno - 1
 
         head_end_offset = row_offsets[head_end_row] + head_end_col
@@ -180,7 +141,8 @@ def split_class(statement):
             )
 
     else:
-        head_end_lineno, head_end_col = token.end[0] + 1, 0
+        head_end_lineno = node.lineno + 1
+        head_end_col = 0
         head_end_row = head_end_lineno - 1
 
         head_end_offset = row_offsets[head_end_row] + head_end_col

diff --git a/tests/test_split.py b/tests/test_split.py
@@ -117,12 +117,24 @@ def test_split_class_decorators():
     assert actual == expected
 
 
+def test_split_class_decorator_single_line():
+    actual = _split_class("@decorator()\nclass A: key: int")
+    expected = "@decorator()\nclass A:", ["    key: int"]
+    assert actual == expected
+
+
 def test_split_class_leading_comment():
     actual = _split_class("# Comment.\nclass A:\n    pass")
     expected = "# Comment.\nclass A:", ["    pass"]
     assert actual == expected
 
 
+def test_split_class_leading_comment_single_line():
+    actual = _split_class("# Comment.\nclass A: pass")
+    expected = "# Comment.\nclass A:", ["    pass"]
+    assert actual == expected
+
+
 def test_split_class_multiple():
     actual = _split_class("def a():\n    pass\n\nclass A:\n    pass")
     expected = "\nclass A:", ["    pass"]