Skip to content

Commit a9e4785

Browse files
committed
Fix string concatenation
The concatenation of a FormattedValue and an empty Contant should lead to the FormattedValue itself. Thus, we will not take any empty constants into account, just as in `_PyPegen_joined_str`
1 parent 270b661 commit a9e4785

File tree

1 file changed

+58
-35
lines changed

1 file changed

+58
-35
lines changed

Parser/action_helpers.c

Lines changed: 58 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,7 +1511,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
15111511
if (elem->kind == Constant_kind) {
15121512
asdl_seq_SET(flattened, current_pos++, elem);
15131513
} else {
1514-
for (j=0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
1514+
for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
15151515
expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
15161516
if (subvalue == NULL) {
15171517
return NULL;
@@ -1526,6 +1526,15 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
15261526
int prev_is_constant = 0;
15271527
for (i = 0; i < n_flattened_elements; i++) {
15281528
expr_ty elem = asdl_seq_GET(flattened, i);
1529+
1530+
/* The concatenation of a FormattedValue and an empty Contant should
1531+
lead to the FormattedValue itself. Thus, we will not take any empty
1532+
constants into account, just as in `_PyPegen_joined_str` */
1533+
if (f_string_found && elem->kind == Constant_kind &&
1534+
PyUnicode_CheckExact(elem->v.Constant.value) &&
1535+
PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0)
1536+
continue;
1537+
15291538
if (!prev_is_constant || elem->kind != Constant_kind) {
15301539
n_elements++;
15311540
}
@@ -1545,44 +1554,57 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
15451554

15461555
/* if the current elem and the following are constants,
15471556
fold them and all consequent constants */
1548-
if (elem->kind == Constant_kind && i+1 < n_flattened_elements
1549-
&& asdl_seq_GET(flattened, i+1)->kind == Constant_kind) {
1550-
expr_ty first_elem = elem;
1551-
1552-
/* When a string is getting concatenated, the kind of the string
1553-
is determined by the first string in the concatenation sequence.
1554-
1555-
u"abc" "def" -> u"abcdef"
1556-
"abc" u"abc" -> "abcabc" */
1557-
PyObject *kind = elem->v.Constant.kind;
1558-
1559-
_PyUnicodeWriter_Init(&writer);
1560-
expr_ty last_elem = elem;
1561-
for (j = i; j < n_flattened_elements; j++) {
1562-
expr_ty current_elem = asdl_seq_GET(flattened, j);
1563-
if (current_elem->kind == Constant_kind) {
1564-
if (_PyUnicodeWriter_WriteStr(&writer, current_elem->v.Constant.value)) {
1565-
_PyUnicodeWriter_Dealloc(&writer);
1566-
return NULL;
1557+
if (elem->kind == Constant_kind) {
1558+
if (i + 1 < n_flattened_elements &&
1559+
asdl_seq_GET(flattened, i + 1)->kind == Constant_kind) {
1560+
expr_ty first_elem = elem;
1561+
1562+
/* When a string is getting concatenated, the kind of the string
1563+
is determined by the first string in the concatenation
1564+
sequence.
1565+
1566+
u"abc" "def" -> u"abcdef"
1567+
"abc" u"abc" -> "abcabc" */
1568+
PyObject *kind = elem->v.Constant.kind;
1569+
1570+
_PyUnicodeWriter_Init(&writer);
1571+
expr_ty last_elem = elem;
1572+
for (j = i; j < n_flattened_elements; j++) {
1573+
expr_ty current_elem = asdl_seq_GET(flattened, j);
1574+
if (current_elem->kind == Constant_kind) {
1575+
if (_PyUnicodeWriter_WriteStr(
1576+
&writer, current_elem->v.Constant.value)) {
1577+
_PyUnicodeWriter_Dealloc(&writer);
1578+
return NULL;
1579+
}
1580+
last_elem = current_elem;
1581+
} else {
1582+
break;
15671583
}
1568-
last_elem = current_elem;
1569-
} else {
1570-
break;
15711584
}
1572-
}
1573-
i = j-1;
1585+
i = j - 1;
15741586

1575-
PyObject *concat_str = _PyUnicodeWriter_Finish(&writer);
1576-
if (concat_str == NULL) {
1577-
_PyUnicodeWriter_Dealloc(&writer);
1578-
return NULL;
1587+
PyObject *concat_str = _PyUnicodeWriter_Finish(&writer);
1588+
if (concat_str == NULL) {
1589+
_PyUnicodeWriter_Dealloc(&writer);
1590+
return NULL;
1591+
}
1592+
1593+
elem = _PyAST_Constant(concat_str, kind, first_elem->lineno,
1594+
first_elem->col_offset,
1595+
last_elem->end_lineno,
1596+
last_elem->end_col_offset, p->arena);
1597+
if (elem == NULL) {
1598+
Py_DECREF(concat_str);
1599+
return NULL;
1600+
}
15791601
}
15801602

1581-
elem = _PyAST_Constant(concat_str, kind, first_elem->lineno, first_elem->col_offset,
1582-
last_elem->end_lineno, last_elem->end_col_offset, p->arena);
1583-
if (elem == NULL) {
1584-
Py_DECREF(concat_str);
1585-
return NULL;
1603+
/* Drop all empty contanst strings */
1604+
if (f_string_found &&
1605+
PyUnicode_CheckExact(elem->v.Constant.value) &&
1606+
PyUnicode_GET_LENGTH(elem->v.Constant.value) == 0) {
1607+
continue;
15861608
}
15871609
}
15881610

@@ -1594,7 +1616,8 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
15941616
expr_ty elem = asdl_seq_GET(values, 0);
15951617
assert(elem->kind == Constant_kind);
15961618
return elem;
1597-
}
1619+
}
15981620

1621+
assert(current_pos == n_elements);
15991622
return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena);
16001623
}

0 commit comments

Comments
 (0)