Skip to content

Commit 3a5d4cb

Browse files
committed
Issue #13748: Raw bytes literals can now be written with the rb prefix as well as br.
1 parent b63a450 commit 3a5d4cb

File tree

6 files changed

+54
-19
lines changed

6 files changed

+54
-19
lines changed

Doc/reference/lexical_analysis.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ String literals are described by the following lexical definitions:
412412

413413
.. productionlist::
414414
bytesliteral: `bytesprefix`(`shortbytes` | `longbytes`)
415-
bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR"
415+
bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB"
416416
shortbytes: "'" `shortbytesitem`* "'" | '"' `shortbytesitem`* '"'
417417
longbytes: "'''" `longbytesitem`* "'''" | '"""' `longbytesitem`* '"""'
418418
shortbytesitem: `shortbyteschar` | `bytesescapeseq`
@@ -446,6 +446,10 @@ or ``'R'``; such strings are called :dfn:`raw strings` and treat backslashes as
446446
literal characters. As a result, in string literals, ``'\U'`` and ``'\u'``
447447
escapes in raw strings are not treated specially.
448448

449+
.. versionadded:: 3.3
450+
The ``'rb'`` prefix of raw bytes literals has been added as a synonym
451+
of ``'br'``.
452+
449453
In triple-quoted strings, unescaped newlines and quotes are allowed (and are
450454
retained), except that three unescaped quotes in a row terminate the string. (A
451455
"quote" is the character used to open the string, i.e. either ``'`` or ``"``.)

Lib/test/test_strlit.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
33
There are four types of string literals:
44
5-
'abc' -- normal str
6-
r'abc' -- raw str
7-
b'xyz' -- normal bytes
8-
br'xyz' -- raw bytes
5+
'abc' -- normal str
6+
r'abc' -- raw str
7+
b'xyz' -- normal bytes
8+
br'xyz' | rb'xyz' -- raw bytes
99
1010
The difference between normal and raw strings is of course that in a
1111
raw string, \ escapes (while still used to determine the end of the
@@ -103,12 +103,25 @@ def test_eval_bytes_normal(self):
103103

104104
def test_eval_bytes_raw(self):
105105
self.assertEqual(eval(""" br'x' """), b'x')
106+
self.assertEqual(eval(""" rb'x' """), b'x')
106107
self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
108+
self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
107109
self.assertEqual(eval(""" br'\x01' """), byte(1))
110+
self.assertEqual(eval(""" rb'\x01' """), byte(1))
108111
self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
112+
self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
109113
self.assertRaises(SyntaxError, eval, """ br'\x81' """)
114+
self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
110115
self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
116+
self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
111117
self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
118+
self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
119+
self.assertRaises(SyntaxError, eval, """ bb'' """)
120+
self.assertRaises(SyntaxError, eval, """ rr'' """)
121+
self.assertRaises(SyntaxError, eval, """ brr'' """)
122+
self.assertRaises(SyntaxError, eval, """ bbr'' """)
123+
self.assertRaises(SyntaxError, eval, """ rrb'' """)
124+
self.assertRaises(SyntaxError, eval, """ rbb'' """)
112125

113126
def check_encoding(self, encoding, extra=""):
114127
modname = "xx_" + encoding.replace("-", "_")

Lib/test/tokenize_tests.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,12 @@ x = b'abc' + B'ABC'
114114
y = b"abc" + B"ABC"
115115
x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC'
116116
y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC"
117+
x = rb'abc' + rB'ABC' + Rb'ABC' + RB'ABC'
118+
y = rb"abc" + rB"ABC" + Rb"ABC" + RB"ABC"
117119
x = br'\\' + BR'\\'
120+
x = rb'\\' + RB'\\'
118121
x = br'\'' + ''
122+
x = rb'\'' + ''
119123
y = br'''
120124
foo bar \\
121125
baz''' + BR'''
@@ -124,6 +128,10 @@ y = Br"""foo
124128
bar \\ baz
125129
""" + bR'''spam
126130
'''
131+
y = rB"""foo
132+
bar \\ baz
133+
""" + Rb'''spam
134+
'''
127135

128136
# Indentation
129137
if 1:

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #13748: Raw bytes literals can now be written with the ``rb`` prefix
14+
as well as ``br``.
15+
1316
- Issue #12736: Use full unicode case mappings for upper, lower, and title case.
1417

1518
- Issue #12760: Add a create mode to open(). Patch by David Townshend.

Parser/tokenizer.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,13 +1412,15 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
14121412
/* Identifier (most frequent token!) */
14131413
nonascii = 0;
14141414
if (is_potential_identifier_start(c)) {
1415-
/* Process b"", r"" and br"" */
1416-
if (c == 'b' || c == 'B') {
1417-
c = tok_nextc(tok);
1418-
if (c == '"' || c == '\'')
1419-
goto letter_quote;
1420-
}
1421-
if (c == 'r' || c == 'R') {
1415+
/* Process b"", r"", br"" and rb"" */
1416+
int saw_b = 0, saw_r = 0;
1417+
while (1) {
1418+
if (!saw_b && (c == 'b' || c == 'B'))
1419+
saw_b = 1;
1420+
else if (!saw_r && (c == 'r' || c == 'R'))
1421+
saw_r = 1;
1422+
else
1423+
break;
14221424
c = tok_nextc(tok);
14231425
if (c == '"' || c == '\'')
14241426
goto letter_quote;

Python/ast.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3744,13 +3744,18 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)
37443744
int rawmode = 0;
37453745
int need_encoding;
37463746
if (isalpha(quote)) {
3747-
if (quote == 'b' || quote == 'B') {
3748-
quote = *++s;
3749-
*bytesmode = 1;
3750-
}
3751-
if (quote == 'r' || quote == 'R') {
3752-
quote = *++s;
3753-
rawmode = 1;
3747+
while (!*bytesmode || !rawmode) {
3748+
if (quote == 'b' || quote == 'B') {
3749+
quote = *++s;
3750+
*bytesmode = 1;
3751+
}
3752+
else if (quote == 'r' || quote == 'R') {
3753+
quote = *++s;
3754+
rawmode = 1;
3755+
}
3756+
else {
3757+
break;
3758+
}
37543759
}
37553760
}
37563761
if (quote != '\'' && quote != '\"') {

0 commit comments

Comments
 (0)