Skip to content

Commit b6bde9f

Browse files
authored
bpo-44667: Treat correctly lines ending with comments and no newlines in the Python tokenizer (GH-27499)
1 parent e63e631 commit b6bde9f

File tree

3 files changed

+15
-1
lines changed

3 files changed

+15
-1
lines changed

Lib/test/test_tokenize.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,6 +1458,16 @@ def test_pathological_trailing_whitespace(self):
14581458
# See http://bugs.python.org/issue16152
14591459
self.assertExactTypeEqual('@ ', token.AT)
14601460

1461+
def test_comment_at_the_end_of_the_source_without_newline(self):
1462+
# See http://bugs.python.org/issue44667
1463+
source = 'b = 1\n\n#test'
1464+
expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT]
1465+
1466+
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
1467+
self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING])
1468+
for i in range(6):
1469+
self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]])
1470+
self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER])
14611471

14621472
class UntokenizeTest(TestCase):
14631473

Lib/tokenize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ def _tokenize(readline, encoding):
604604
pos += 1
605605

606606
# Add an implicit NEWLINE if the input doesn't end in one
607-
if last_line and last_line[-1] not in '\r\n':
607+
if last_line and last_line[-1] not in '\r\n' and not last_line.strip().startswith("#"):
608608
yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
609609
for indent in indents[1:]: # pop remaining indent levels
610610
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
The :func:`tokenize.tokenize` doesn't incorrectly generate a ``NEWLINE``
2+
token if the source doesn't end with a new line character but the last line
3+
is a comment, as the function is already generating a ``NL`` token. Patch by
4+
Pablo Galindo

0 commit comments

Comments
 (0)