Skip to content

Commit d3eaf0c

Browse files
authored
bpo-44945: Specialize BINARY_ADD (GH-27967)
1 parent 245f1f2 commit d3eaf0c

File tree

9 files changed

+253
-78
lines changed

9 files changed

+253
-78
lines changed

Include/internal/pycore_code.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *nam
307307
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
308308
int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
309309
int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
310+
int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
310311

311312
#define PRINT_SPECIALIZATION_STATS 0
312313
#define PRINT_SPECIALIZATION_STATS_DETAILED 0

Include/internal/pycore_long.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ static inline PyObject* _PyLong_GetZero(void)
3434
static inline PyObject* _PyLong_GetOne(void)
3535
{ return __PyLong_GetSmallInt_internal(1); }
3636

37+
PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right);
38+
3739
#ifdef __cplusplus
3840
}
3941
#endif

Include/opcode.h

Lines changed: 31 additions & 26 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/opcode.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,11 @@ def jabs_op(name, op):
220220
del def_op, name_op, jrel_op, jabs_op
221221

222222
_specialized_instructions = [
223+
"BINARY_ADD_ADAPTIVE",
224+
"BINARY_ADD_INT",
225+
"BINARY_ADD_FLOAT",
226+
"BINARY_ADD_UNICODE",
227+
"BINARY_ADD_UNICODE_INPLACE_FAST",
223228
"BINARY_SUBSCR_ADAPTIVE",
224229
"BINARY_SUBSCR_LIST_INT",
225230
"BINARY_SUBSCR_TUPLE_INT",
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Specialize the BINARY_ADD instruction using the PEP 659 machinery. Adds five new instructions:
2+
3+
* BINARY_ADD_ADAPTIVE
4+
* BINARY_ADD_FLOAT
5+
* BINARY_ADD_INT
6+
* BINARY_ADD_UNICODE
7+
* BINARY_ADD_UNICODE_INPLACE_FAST

Objects/longobject.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3119,16 +3119,14 @@ x_sub(PyLongObject *a, PyLongObject *b)
31193119
return maybe_small_long(long_normalize(z));
31203120
}
31213121

3122-
static PyObject *
3123-
long_add(PyLongObject *a, PyLongObject *b)
3122+
PyObject *
3123+
_PyLong_Add(PyLongObject *a, PyLongObject *b)
31243124
{
3125-
PyLongObject *z;
3126-
3127-
CHECK_BINOP(a, b);
3128-
31293125
if (IS_MEDIUM_VALUE(a) && IS_MEDIUM_VALUE(b)) {
31303126
return _PyLong_FromSTwoDigits(medium_value(a) + medium_value(b));
31313127
}
3128+
3129+
PyLongObject *z;
31323130
if (Py_SIZE(a) < 0) {
31333131
if (Py_SIZE(b) < 0) {
31343132
z = x_add(a, b);
@@ -3153,6 +3151,14 @@ long_add(PyLongObject *a, PyLongObject *b)
31533151
return (PyObject *)z;
31543152
}
31553153

3154+
static PyObject *
3155+
long_add(PyLongObject *a, PyLongObject *b)
3156+
{
3157+
CHECK_BINOP(a, b);
3158+
return _PyLong_Add(a, b);
3159+
}
3160+
3161+
31563162
static PyObject *
31573163
long_sub(PyLongObject *a, PyLongObject *b)
31583164
{

Python/ceval.c

Lines changed: 114 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,6 +1435,12 @@ eval_frame_handle_pending(PyThreadState *tstate)
14351435

14361436
#define UPDATE_PREV_INSTR_OPARG(instr, oparg) ((uint8_t*)(instr))[-1] = (oparg)
14371437

1438+
static inline void
1439+
record_hit_inline(_Py_CODEUNIT *next_instr, int oparg)
1440+
{
1441+
UPDATE_PREV_INSTR_OPARG(next_instr, saturating_increment(oparg));
1442+
}
1443+
14381444
#define GLOBALS() frame->f_globals
14391445
#define BUILTINS() frame->f_builtins
14401446
#define LOCALS() frame->f_locals
@@ -1980,28 +1986,120 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
19801986
}
19811987

19821988
TARGET(BINARY_ADD): {
1989+
PREDICTED(BINARY_ADD);
1990+
STAT_INC(BINARY_ADD, unquickened);
19831991
PyObject *right = POP();
19841992
PyObject *left = TOP();
1985-
PyObject *sum;
1986-
/* NOTE(vstinner): Please don't try to micro-optimize int+int on
1987-
CPython using bytecode, it is simply worthless.
1988-
See http://bugs.python.org/issue21955 and
1989-
http://bugs.python.org/issue10044 for the discussion. In short,
1990-
no patch shown any impact on a realistic benchmark, only a minor
1991-
speedup on microbenchmarks. */
1992-
if (PyUnicode_CheckExact(left) &&
1993-
PyUnicode_CheckExact(right)) {
1994-
sum = unicode_concatenate(tstate, left, right, frame, next_instr);
1995-
/* unicode_concatenate consumed the ref to left */
1993+
PyObject *sum = PyNumber_Add(left, right);
1994+
SET_TOP(sum);
1995+
Py_DECREF(left);
1996+
Py_DECREF(right);
1997+
if (sum == NULL) {
1998+
goto error;
1999+
}
2000+
DISPATCH();
2001+
}
2002+
2003+
TARGET(BINARY_ADD_ADAPTIVE): {
2004+
if (oparg == 0) {
2005+
PyObject *left = SECOND();
2006+
PyObject *right = TOP();
2007+
next_instr--;
2008+
if (_Py_Specialize_BinaryAdd(left, right, next_instr) < 0) {
2009+
goto error;
2010+
}
2011+
DISPATCH();
19962012
}
19972013
else {
1998-
sum = PyNumber_Add(left, right);
1999-
Py_DECREF(left);
2014+
STAT_INC(BINARY_ADD, deferred);
2015+
UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1);
2016+
STAT_DEC(BINARY_ADD, unquickened);
2017+
JUMP_TO_INSTRUCTION(BINARY_ADD);
20002018
}
2019+
}
2020+
2021+
TARGET(BINARY_ADD_UNICODE): {
2022+
PyObject *left = SECOND();
2023+
PyObject *right = TOP();
2024+
DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD);
2025+
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD);
2026+
STAT_INC(BINARY_ADD, hit);
2027+
record_hit_inline(next_instr, oparg);
2028+
PyObject *res = PyUnicode_Concat(left, right);
2029+
STACK_SHRINK(1);
2030+
SET_TOP(res);
2031+
Py_DECREF(left);
20012032
Py_DECREF(right);
2002-
SET_TOP(sum);
2003-
if (sum == NULL)
2033+
if (TOP() == NULL) {
2034+
goto error;
2035+
}
2036+
DISPATCH();
2037+
}
2038+
2039+
TARGET(BINARY_ADD_UNICODE_INPLACE_FAST): {
2040+
PyObject *left = SECOND();
2041+
PyObject *right = TOP();
2042+
DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD);
2043+
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD);
2044+
DEOPT_IF(Py_REFCNT(left) != 2, BINARY_ADD);
2045+
int next_oparg = _Py_OPARG(*next_instr);
2046+
assert(_Py_OPCODE(*next_instr) == STORE_FAST);
2047+
/* In the common case, there are 2 references to the value
2048+
* stored in 'variable' when the v = v + ... is performed: one
2049+
* on the value stack (in 'v') and one still stored in the
2050+
* 'variable'. We try to delete the variable now to reduce
2051+
* the refcnt to 1.
2052+
*/
2053+
PyObject *var = GETLOCAL(next_oparg);
2054+
DEOPT_IF(var != left, BINARY_ADD);
2055+
STAT_INC(BINARY_ADD, hit);
2056+
record_hit_inline(next_instr, oparg);
2057+
GETLOCAL(next_oparg) = NULL;
2058+
Py_DECREF(left);
2059+
STACK_SHRINK(1);
2060+
PyUnicode_Append(&TOP(), right);
2061+
Py_DECREF(right);
2062+
if (TOP() == NULL) {
20042063
goto error;
2064+
}
2065+
DISPATCH();
2066+
}
2067+
2068+
TARGET(BINARY_ADD_FLOAT): {
2069+
PyObject *left = SECOND();
2070+
PyObject *right = TOP();
2071+
DEOPT_IF(!PyFloat_CheckExact(left), BINARY_ADD);
2072+
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD);
2073+
STAT_INC(BINARY_ADD, hit);
2074+
record_hit_inline(next_instr, oparg);
2075+
double dsum = ((PyFloatObject *)left)->ob_fval +
2076+
((PyFloatObject *)right)->ob_fval;
2077+
PyObject *sum = PyFloat_FromDouble(dsum);
2078+
SET_SECOND(sum);
2079+
Py_DECREF(right);
2080+
Py_DECREF(left);
2081+
STACK_SHRINK(1);
2082+
if (sum == NULL) {
2083+
goto error;
2084+
}
2085+
DISPATCH();
2086+
}
2087+
2088+
TARGET(BINARY_ADD_INT): {
2089+
PyObject *left = SECOND();
2090+
PyObject *right = TOP();
2091+
DEOPT_IF(!PyLong_CheckExact(left), BINARY_ADD);
2092+
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD);
2093+
STAT_INC(BINARY_ADD, hit);
2094+
record_hit_inline(next_instr, oparg);
2095+
PyObject *sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right);
2096+
SET_SECOND(sum);
2097+
Py_DECREF(right);
2098+
Py_DECREF(left);
2099+
STACK_SHRINK(1);
2100+
if (sum == NULL) {
2101+
goto error;
2102+
}
20052103
DISPATCH();
20062104
}
20072105

@@ -4761,6 +4859,7 @@ MISS_WITH_CACHE(STORE_ATTR)
47614859
MISS_WITH_CACHE(LOAD_GLOBAL)
47624860
MISS_WITH_CACHE(LOAD_METHOD)
47634861
MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR)
4862+
MISS_WITH_OPARG_COUNTER(BINARY_ADD)
47644863

47654864
binary_subscr_dict_error:
47664865
{

0 commit comments

Comments
 (0)