Skip to content

Commit 50b2261

Browse files
authored
gh-91146: Reduce allocation size of list from str.split()/rsplit() (gh-95473)
1 parent 53357b3 commit 50b2261

File tree

2 files changed

+22
-19
lines changed

2 files changed

+22
-19
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Reduce allocation size of :class:`list` from :meth:`str.split`
2+
and :meth:`str.rsplit`. Patch by Dong-hee Na.

Objects/unicodeobject.c

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9696,40 +9696,40 @@ split(PyObject *self,
96969696
const void *buf1, *buf2;
96979697
Py_ssize_t len1, len2;
96989698
PyObject* out;
9699-
9700-
if (maxcount < 0)
9701-
maxcount = PY_SSIZE_T_MAX;
9699+
len1 = PyUnicode_GET_LENGTH(self);
9700+
kind1 = PyUnicode_KIND(self);
9701+
if (maxcount < 0) {
9702+
maxcount = len1;
9703+
}
97029704

97039705
if (substring == NULL)
9704-
switch (PyUnicode_KIND(self)) {
9706+
switch (kind1) {
97059707
case PyUnicode_1BYTE_KIND:
97069708
if (PyUnicode_IS_ASCII(self))
97079709
return asciilib_split_whitespace(
97089710
self, PyUnicode_1BYTE_DATA(self),
9709-
PyUnicode_GET_LENGTH(self), maxcount
9711+
len1, maxcount
97109712
);
97119713
else
97129714
return ucs1lib_split_whitespace(
97139715
self, PyUnicode_1BYTE_DATA(self),
9714-
PyUnicode_GET_LENGTH(self), maxcount
9716+
len1, maxcount
97159717
);
97169718
case PyUnicode_2BYTE_KIND:
97179719
return ucs2lib_split_whitespace(
97189720
self, PyUnicode_2BYTE_DATA(self),
9719-
PyUnicode_GET_LENGTH(self), maxcount
9721+
len1, maxcount
97209722
);
97219723
case PyUnicode_4BYTE_KIND:
97229724
return ucs4lib_split_whitespace(
97239725
self, PyUnicode_4BYTE_DATA(self),
9724-
PyUnicode_GET_LENGTH(self), maxcount
9726+
len1, maxcount
97259727
);
97269728
default:
97279729
Py_UNREACHABLE();
97289730
}
97299731

9730-
kind1 = PyUnicode_KIND(self);
97319732
kind2 = PyUnicode_KIND(substring);
9732-
len1 = PyUnicode_GET_LENGTH(self);
97339733
len2 = PyUnicode_GET_LENGTH(substring);
97349734
if (kind1 < kind2 || len1 < len2) {
97359735
out = PyList_New(1);
@@ -9783,39 +9783,40 @@ rsplit(PyObject *self,
97839783
Py_ssize_t len1, len2;
97849784
PyObject* out;
97859785

9786-
if (maxcount < 0)
9787-
maxcount = PY_SSIZE_T_MAX;
9786+
len1 = PyUnicode_GET_LENGTH(self);
9787+
kind1 = PyUnicode_KIND(self);
9788+
if (maxcount < 0) {
9789+
maxcount = len1;
9790+
}
97889791

97899792
if (substring == NULL)
9790-
switch (PyUnicode_KIND(self)) {
9793+
switch (kind1) {
97919794
case PyUnicode_1BYTE_KIND:
97929795
if (PyUnicode_IS_ASCII(self))
97939796
return asciilib_rsplit_whitespace(
97949797
self, PyUnicode_1BYTE_DATA(self),
9795-
PyUnicode_GET_LENGTH(self), maxcount
9798+
len1, maxcount
97969799
);
97979800
else
97989801
return ucs1lib_rsplit_whitespace(
97999802
self, PyUnicode_1BYTE_DATA(self),
9800-
PyUnicode_GET_LENGTH(self), maxcount
9803+
len1, maxcount
98019804
);
98029805
case PyUnicode_2BYTE_KIND:
98039806
return ucs2lib_rsplit_whitespace(
98049807
self, PyUnicode_2BYTE_DATA(self),
9805-
PyUnicode_GET_LENGTH(self), maxcount
9808+
len1, maxcount
98069809
);
98079810
case PyUnicode_4BYTE_KIND:
98089811
return ucs4lib_rsplit_whitespace(
98099812
self, PyUnicode_4BYTE_DATA(self),
9810-
PyUnicode_GET_LENGTH(self), maxcount
9813+
len1, maxcount
98119814
);
98129815
default:
98139816
Py_UNREACHABLE();
98149817
}
98159818

9816-
kind1 = PyUnicode_KIND(self);
98179819
kind2 = PyUnicode_KIND(substring);
9818-
len1 = PyUnicode_GET_LENGTH(self);
98199820
len2 = PyUnicode_GET_LENGTH(substring);
98209821
if (kind1 < kind2 || len1 < len2) {
98219822
out = PyList_New(1);

0 commit comments

Comments
 (0)