Skip to content

Commit d7f46bc

Browse files
authored
gh-105564: Don't include artificial newlines in the line attribute of tokens (#105565)
1 parent 1dd267a commit d7f46bc

File tree

4 files changed

+30
-25
lines changed

4 files changed

+30
-25
lines changed

Lib/test/test_peg_generator/test_pegen.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -552,34 +552,34 @@ def test_mutually_left_recursive(self) -> None:
552552
string="D",
553553
start=(1, 0),
554554
end=(1, 1),
555-
line="D A C A E\n",
555+
line="D A C A E",
556556
),
557557
TokenInfo(
558558
type=NAME,
559559
string="A",
560560
start=(1, 2),
561561
end=(1, 3),
562-
line="D A C A E\n",
562+
line="D A C A E",
563563
),
564564
],
565565
TokenInfo(
566566
type=NAME,
567567
string="C",
568568
start=(1, 4),
569569
end=(1, 5),
570-
line="D A C A E\n",
570+
line="D A C A E",
571571
),
572572
],
573573
TokenInfo(
574574
type=NAME,
575575
string="A",
576576
start=(1, 6),
577577
end=(1, 7),
578-
line="D A C A E\n",
578+
line="D A C A E",
579579
),
580580
],
581581
TokenInfo(
582-
type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E\n"
582+
type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
583583
),
584584
],
585585
)
@@ -594,22 +594,22 @@ def test_mutually_left_recursive(self) -> None:
594594
string="B",
595595
start=(1, 0),
596596
end=(1, 1),
597-
line="B C A E\n",
597+
line="B C A E",
598598
),
599599
TokenInfo(
600600
type=NAME,
601601
string="C",
602602
start=(1, 2),
603603
end=(1, 3),
604-
line="B C A E\n",
604+
line="B C A E",
605605
),
606606
],
607607
TokenInfo(
608-
type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E\n"
608+
type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
609609
),
610610
],
611611
TokenInfo(
612-
type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E\n"
612+
type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
613613
),
614614
],
615615
)
@@ -655,18 +655,18 @@ def test_lookahead(self) -> None:
655655
node,
656656
[
657657
TokenInfo(
658-
NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .\n"
658+
NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
659659
),
660660
TokenInfo(
661-
OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 .\n"
661+
OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
662662
),
663663
[
664664
TokenInfo(
665665
NUMBER,
666666
string="12",
667667
start=(1, 6),
668668
end=(1, 8),
669-
line="foo = 12 + 12 .\n",
669+
line="foo = 12 + 12 .",
670670
),
671671
[
672672
[
@@ -675,14 +675,14 @@ def test_lookahead(self) -> None:
675675
string="+",
676676
start=(1, 9),
677677
end=(1, 10),
678-
line="foo = 12 + 12 .\n",
678+
line="foo = 12 + 12 .",
679679
),
680680
TokenInfo(
681681
NUMBER,
682682
string="12",
683683
start=(1, 11),
684684
end=(1, 13),
685-
line="foo = 12 + 12 .\n",
685+
line="foo = 12 + 12 .",
686686
),
687687
]
688688
],
@@ -734,9 +734,9 @@ def test_cut(self) -> None:
734734
self.assertEqual(
735735
node,
736736
[
737-
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)\n"),
738-
TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)\n"),
739-
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)\n"),
737+
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
738+
TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
739+
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
740740
],
741741
)
742742

Lib/test/test_tokenize.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,7 +1229,7 @@ def readline():
12291229
# skip the initial encoding token and the end tokens
12301230
tokens = list(_generate_tokens_from_c_tokenizer(readline().__next__, encoding='utf-8',
12311231
extra_tokens=True))[:-2]
1232-
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
1232+
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
12331233
self.assertEqual(tokens, expected_tokens,
12341234
"bytes not decoded with encoding")
12351235

@@ -1638,8 +1638,8 @@ def test_comment_at_the_end_of_the_source_without_newline(self):
16381638
TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
16391639
TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
16401640
TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
1641-
TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test\n'),
1642-
TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test\n'),
1641+
TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
1642+
TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
16431643
TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
16441644
]
16451645

@@ -1653,7 +1653,7 @@ def test_newline_and_space_at_the_end_of_the_source_without_newline(self):
16531653
TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
16541654
TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
16551655
TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
1656-
TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' \n'),
1656+
TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
16571657
TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
16581658
]
16591659

@@ -1889,10 +1889,10 @@ def readline(encoding):
18891889
yield "1+1".encode(encoding)
18901890

18911891
expected = [
1892-
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1\n'),
1893-
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1\n'),
1894-
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1\n'),
1895-
TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1\n'),
1892+
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
1893+
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
1894+
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
1895+
TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
18961896
TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
18971897
]
18981898
for encoding in ["utf-8", "latin-1", "utf-16"]:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Don't include artificil newlines in the ``line`` attribute of tokens in the
2+
APIs of the :mod:`tokenize` module. Patch by Pablo Galindo

Python/Python-tokenize.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,9 @@ tokenizeriter_next(tokenizeriterobject *it)
206206
line = PyUnicode_FromString("");
207207
} else {
208208
Py_ssize_t size = it->tok->inp - line_start;
209+
if (size >= 1 && it->tok->implicit_newline) {
210+
size -= 1;
211+
}
209212
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
210213
}
211214
if (line == NULL) {

0 commit comments

Comments
 (0)