Skip to content

Commit 82cbf4b

Browse files
committed
bpo-42093: Add opcode cache for LOAD_ATTR
1 parent de73d43 commit 82cbf4b

File tree

7 files changed

+296
-7
lines changed

7 files changed

+296
-7
lines changed

Doc/whatsnew/3.10.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,9 @@ Optimizations
252252
average.
253253
(Contributed by Victor Stinner in :issue:`41006`.)
254254

255+
* The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism.
256+
It is about 36% faster now. (Contributed by Pablo Galindo and Yury Selivanov
257+
in :issue:`42093`.)
255258

256259
Deprecated
257260
==========

Include/cpython/dictobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ PyAPI_FUNC(void) _PyDict_DebugMallocStats(FILE *out);
7171

7272
int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
7373
PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
74+
Py_ssize_t _PyDict_GetItemHint(PyDictObject *, PyObject *, Py_ssize_t, PyObject **);
7475

7576
/* _PyDictView */
7677

Include/internal/pycore_code.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,16 @@ typedef struct {
1010
uint64_t builtins_ver; /* ma_version of builtin dict */
1111
} _PyOpcache_LoadGlobal;
1212

13+
typedef struct {
14+
PyTypeObject *type;
15+
Py_ssize_t hint;
16+
unsigned int tp_version_tag;
17+
} _PyOpCodeOpt_LoadAttr;
18+
1319
struct _PyOpcache {
1420
union {
1521
_PyOpcache_LoadGlobal lg;
22+
_PyOpCodeOpt_LoadAttr la;
1623
} u;
1724
char optimized;
1825
};
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism and
2+
it is about 36% faster now. Patch by Pablo Galindo and Yury Selivanov.

Objects/codeobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,8 @@ _PyCode_InitOpcache(PyCodeObject *co)
301301
unsigned char opcode = _Py_OPCODE(opcodes[i]);
302302
i++; // 'i' is now aligned to (next_instr - first_instr)
303303

304-
// TODO: LOAD_METHOD, LOAD_ATTR
305-
if (opcode == LOAD_GLOBAL) {
304+
// TODO: LOAD_METHOD
305+
if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) {
306306
opts++;
307307
co->co_opcache_map[i] = (unsigned char)opts;
308308
if (opts > 254) {

Objects/dictobject.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,6 +1437,71 @@ PyDict_GetItem(PyObject *op, PyObject *key)
14371437
return value;
14381438
}
14391439

1440+
Py_ssize_t
1441+
_PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
1442+
Py_ssize_t hint, PyObject **value)
1443+
{
1444+
Py_hash_t hash;
1445+
PyThreadState *tstate;
1446+
1447+
assert(*value == NULL);
1448+
assert(PyDict_CheckExact((PyObject*)mp));
1449+
assert(PyUnicode_CheckExact(key));
1450+
1451+
if (hint >= 0 && hint < _PyDict_KeysSize(mp->ma_keys)) {
1452+
PyObject *res = NULL;
1453+
1454+
PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + (size_t)hint;
1455+
if (ep->me_key == key) {
1456+
if (mp->ma_keys->dk_lookup == lookdict_split) {
1457+
assert(mp->ma_values != NULL);
1458+
res = mp->ma_values[(size_t)hint];
1459+
}
1460+
else {
1461+
res = ep->me_value;
1462+
}
1463+
if (res != NULL) {
1464+
*value = res;
1465+
return hint;
1466+
}
1467+
}
1468+
}
1469+
1470+
if ((hash = ((PyASCIIObject *) key)->hash) == -1)
1471+
{
1472+
hash = PyObject_Hash(key);
1473+
if (hash == -1) {
1474+
PyErr_Clear();
1475+
return -1;
1476+
}
1477+
}
1478+
1479+
// We can arrive here with a NULL tstate during initialization: try
1480+
// running "python -Wi" for an example related to string interning
1481+
tstate = _PyThreadState_UncheckedGet();
1482+
Py_ssize_t ix = 0;
1483+
if (tstate != NULL && tstate->curexc_type != NULL) {
1484+
/* preserve the existing exception */
1485+
PyObject *err_type, *err_value, *err_tb;
1486+
PyErr_Fetch(&err_type, &err_value, &err_tb);
1487+
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
1488+
/* ignore errors */
1489+
PyErr_Restore(err_type, err_value, err_tb);
1490+
if (ix < 0) {
1491+
return -1;
1492+
}
1493+
}
1494+
else {
1495+
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
1496+
if (ix < 0) {
1497+
PyErr_Clear();
1498+
return -1;
1499+
}
1500+
}
1501+
1502+
return ix;
1503+
}
1504+
14401505
/* Same as PyDict_GetItemWithError() but with hash supplied by caller.
14411506
This returns NULL *with* an exception set if an exception occurred.
14421507
It returns NULL *without* an exception set if the key wasn't present.

Python/ceval.c

Lines changed: 216 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ static long dxp[256];
111111
#else
112112
#define OPCACHE_MIN_RUNS 1024 /* create opcache when code executed this time */
113113
#endif
114+
#define OPCODE_CACHE_MAX_TRIES 20
114115
#define OPCACHE_STATS 0 /* Enable stats */
115116

116117
#if OPCACHE_STATS
@@ -120,6 +121,12 @@ static size_t opcache_code_objects_extra_mem = 0;
120121
static size_t opcache_global_opts = 0;
121122
static size_t opcache_global_hits = 0;
122123
static size_t opcache_global_misses = 0;
124+
125+
static size_t opcache_attr_opts = 0;
126+
static size_t opcache_attr_hits = 0;
127+
static size_t opcache_attr_misses = 0;
128+
static size_t opcache_attr_deopts = 0;
129+
static size_t opcache_attr_total = 0;
123130
#endif
124131

125132

@@ -365,6 +372,25 @@ _PyEval_Fini(void)
365372
opcache_global_opts);
366373

367374
fprintf(stderr, "\n");
375+
376+
fprintf(stderr, "-- Opcode cache LOAD_ATTR hits = %zd (%d%%)\n",
377+
opcache_attr_hits,
378+
(int) (100.0 * opcache_attr_hits /
379+
opcache_attr_total));
380+
381+
fprintf(stderr, "-- Opcode cache LOAD_ATTR misses = %zd (%d%%)\n",
382+
opcache_attr_misses,
383+
(int) (100.0 * opcache_attr_misses /
384+
opcache_attr_total));
385+
386+
fprintf(stderr, "-- Opcode cache LOAD_ATTR opts = %zd\n",
387+
opcache_attr_opts);
388+
389+
fprintf(stderr, "-- Opcode cache LOAD_ATTR deopts = %zd\n",
390+
opcache_attr_deopts);
391+
392+
fprintf(stderr, "-- Opcode cache LOAD_ATTR total = %zd\n",
393+
opcache_attr_total);
368394
#endif
369395
}
370396

@@ -1224,16 +1250,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
12241250
do { \
12251251
co_opcache = NULL; \
12261252
if (co->co_opcache != NULL) { \
1227-
unsigned char co_opt_offset = \
1253+
unsigned char co_opcache_offset = \
12281254
co->co_opcache_map[next_instr - first_instr]; \
1229-
if (co_opt_offset > 0) { \
1230-
assert(co_opt_offset <= co->co_opcache_size); \
1231-
co_opcache = &co->co_opcache[co_opt_offset - 1]; \
1255+
if (co_opcache_offset > 0) { \
1256+
assert(co_opcache_offset <= co->co_opcache_size); \
1257+
co_opcache = &co->co_opcache[co_opcache_offset - 1]; \
12321258
assert(co_opcache != NULL); \
12331259
} \
12341260
} \
12351261
} while (0)
12361262

1263+
#define OPCACHE_DEOPT() \
1264+
do { \
1265+
if (co_opcache != NULL) { \
1266+
co_opcache->optimized = -1; \
1267+
unsigned char co_opcache_offset = \
1268+
co->co_opcache_map[next_instr - first_instr]; \
1269+
assert(co_opcache_offset <= co->co_opcache_size); \
1270+
co->co_opcache_map[co_opcache_offset] = 0; \
1271+
co_opcache = NULL; \
1272+
} \
1273+
} while (0)
1274+
1275+
#define OPCACHE_DEOPT_LOAD_ATTR() \
1276+
do { \
1277+
if (co_opcache != NULL) { \
1278+
OPCACHE_STAT_ATTR_DEOPT(); \
1279+
OPCACHE_DEOPT(); \
1280+
} \
1281+
} while (0)
1282+
1283+
#define OPCACHE_MAYBE_DEOPT_LOAD_ATTR() \
1284+
do { \
1285+
if (co_opcache != NULL && --co_opcache->optimized <= 0) { \
1286+
OPCACHE_DEOPT_LOAD_ATTR(); \
1287+
} \
1288+
} while (0)
1289+
12371290
#if OPCACHE_STATS
12381291

12391292
#define OPCACHE_STAT_GLOBAL_HIT() \
@@ -1251,12 +1304,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
12511304
if (co->co_opcache != NULL) opcache_global_opts++; \
12521305
} while (0)
12531306

1307+
#define OPCACHE_STAT_ATTR_HIT() \
1308+
do { \
1309+
if (co->co_opcache != NULL) opcache_attr_hits++; \
1310+
} while (0)
1311+
1312+
#define OPCACHE_STAT_ATTR_MISS() \
1313+
do { \
1314+
if (co->co_opcache != NULL) opcache_attr_misses++; \
1315+
} while (0)
1316+
1317+
#define OPCACHE_STAT_ATTR_OPT() \
1318+
do { \
1319+
if (co->co_opcache!= NULL) opcache_attr_opts++; \
1320+
} while (0)
1321+
1322+
#define OPCACHE_STAT_ATTR_DEOPT() \
1323+
do { \
1324+
if (co->co_opcache != NULL) opcache_attr_deopts++; \
1325+
} while (0)
1326+
1327+
#define OPCACHE_STAT_ATTR_TOTAL() \
1328+
do { \
1329+
if (co->co_opcache != NULL) opcache_attr_total++; \
1330+
} while (0)
1331+
12541332
#else /* OPCACHE_STATS */
12551333

12561334
#define OPCACHE_STAT_GLOBAL_HIT()
12571335
#define OPCACHE_STAT_GLOBAL_MISS()
12581336
#define OPCACHE_STAT_GLOBAL_OPT()
12591337

1338+
#define OPCACHE_STAT_ATTR_HIT()
1339+
#define OPCACHE_STAT_ATTR_MISS()
1340+
#define OPCACHE_STAT_ATTR_OPT()
1341+
#define OPCACHE_STAT_ATTR_DEOPT()
1342+
#define OPCACHE_STAT_ATTR_TOTAL()
1343+
12601344
#endif
12611345

12621346
/* Start of code */
@@ -3023,7 +3107,134 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
30233107
case TARGET(LOAD_ATTR): {
30243108
PyObject *name = GETITEM(names, oparg);
30253109
PyObject *owner = TOP();
3026-
PyObject *res = PyObject_GetAttr(owner, name);
3110+
3111+
PyTypeObject *type = Py_TYPE(owner);
3112+
PyObject *res;
3113+
PyObject **dictptr;
3114+
PyObject *dict;
3115+
_PyOpCodeOpt_LoadAttr *la;
3116+
3117+
OPCACHE_STAT_ATTR_TOTAL();
3118+
3119+
OPCACHE_CHECK();
3120+
if (co_opcache != NULL && PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG))
3121+
{
3122+
if (co_opcache->optimized > 0) {
3123+
/* Fast path -- cache hit makes LOAD_ATTR ~30% faster */
3124+
la = &co_opcache->u.la;
3125+
if (la->type == type && la->tp_version_tag == type->tp_version_tag)
3126+
{
3127+
assert(type->tp_dict != NULL);
3128+
assert(type->tp_dictoffset > 0);
3129+
3130+
dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
3131+
dict = *dictptr;
3132+
if (dict != NULL && PyDict_CheckExact(dict)) {
3133+
Py_ssize_t hint = la->hint;
3134+
Py_INCREF(dict);
3135+
res = NULL;
3136+
la->hint = _PyDict_GetItemHint((PyDictObject*)dict, name, hint, &res);
3137+
3138+
if (res != NULL) {
3139+
if (la->hint == hint && hint >= 0) {
3140+
/* Our hint has helped -- cache hit. */
3141+
OPCACHE_STAT_ATTR_HIT();
3142+
} else {
3143+
/* The hint we provided didn't work.
3144+
Maybe next time? */
3145+
OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
3146+
}
3147+
3148+
Py_INCREF(res);
3149+
SET_TOP(res);
3150+
Py_DECREF(owner);
3151+
Py_DECREF(dict);
3152+
DISPATCH();
3153+
} else {
3154+
// This attribute can be missing sometimes -- we
3155+
// don't want to optimize this lookup.
3156+
OPCACHE_DEOPT_LOAD_ATTR();
3157+
Py_DECREF(dict);
3158+
}
3159+
} else {
3160+
// There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
3161+
OPCACHE_DEOPT_LOAD_ATTR();
3162+
}
3163+
} else {
3164+
// The type of the object has either been updated,
3165+
// or is different. Maybe it will stabilize?
3166+
OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
3167+
}
3168+
3169+
OPCACHE_STAT_ATTR_MISS();
3170+
}
3171+
3172+
if (co_opcache != NULL && /* co_opcache can be NULL after a DEOPT() call. */
3173+
type->tp_getattro == PyObject_GenericGetAttr)
3174+
{
3175+
PyObject *descr;
3176+
Py_ssize_t ret;
3177+
3178+
if (type->tp_dictoffset > 0) {
3179+
if (type->tp_dict == NULL) {
3180+
if (PyType_Ready(type) < 0) {
3181+
Py_DECREF(owner);
3182+
SET_TOP(NULL);
3183+
goto error;
3184+
}
3185+
}
3186+
3187+
descr = _PyType_Lookup(type, name);
3188+
if (descr == NULL ||
3189+
descr->ob_type->tp_descr_get == NULL ||
3190+
!PyDescr_IsData(descr))
3191+
{
3192+
dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
3193+
dict = *dictptr;
3194+
3195+
if (dict != NULL && PyDict_CheckExact(dict)) {
3196+
Py_INCREF(dict);
3197+
res = NULL;
3198+
ret = _PyDict_GetItemHint((PyDictObject*)dict, name, -1, &res);
3199+
if (res != NULL) {
3200+
Py_INCREF(res);
3201+
Py_DECREF(dict);
3202+
Py_DECREF(owner);
3203+
SET_TOP(res);
3204+
3205+
if (co_opcache->optimized == 0) {
3206+
// First time we optimize this opcode. */
3207+
OPCACHE_STAT_ATTR_OPT();
3208+
co_opcache->optimized = OPCODE_CACHE_MAX_TRIES;
3209+
}
3210+
3211+
la = &co_opcache->u.la;
3212+
la->type = type;
3213+
la->tp_version_tag = type->tp_version_tag;
3214+
la->hint = ret;
3215+
3216+
DISPATCH();
3217+
}
3218+
Py_DECREF(dict);
3219+
} else {
3220+
// There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
3221+
OPCACHE_DEOPT_LOAD_ATTR();
3222+
}
3223+
} else {
3224+
// We failed to find an attribute without a data-like descriptor
3225+
OPCACHE_DEOPT_LOAD_ATTR();
3226+
}
3227+
} else {
3228+
// The object's class does not have a tp_dictoffset we can use
3229+
OPCACHE_DEOPT_LOAD_ATTR();
3230+
}
3231+
} else if (type->tp_getattro != PyObject_GenericGetAttr) {
3232+
OPCACHE_DEOPT_LOAD_ATTR();
3233+
}
3234+
}
3235+
3236+
/* slow path */
3237+
res = PyObject_GetAttr(owner, name);
30273238
Py_DECREF(owner);
30283239
SET_TOP(res);
30293240
if (res == NULL)

0 commit comments

Comments
 (0)