Skip to content

bpo-42093: Add opcode cache for LOAD_ATTR #22803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.10.rst
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ Optimizations
average.
(Contributed by Victor Stinner in :issue:`41006`.)

* The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism.
It is about 36% faster now. (Contributed by Pablo Galindo and Yury Selivanov
in :issue:`42093`.)

Deprecated
==========
Expand Down
1 change: 1 addition & 0 deletions Include/cpython/dictobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ PyAPI_FUNC(void) _PyDict_DebugMallocStats(FILE *out);

int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
Py_ssize_t _PyDict_GetItemHint(PyDictObject *, PyObject *, Py_ssize_t, PyObject **);

/* _PyDictView */

Expand Down
7 changes: 7 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@ typedef struct {
uint64_t builtins_ver; /* ma_version of builtin dict */
} _PyOpcache_LoadGlobal;

typedef struct {
PyTypeObject *type;
Py_ssize_t hint;
unsigned int tp_version_tag;
} _PyOpCodeOpt_LoadAttr;

struct _PyOpcache {
union {
_PyOpcache_LoadGlobal lg;
_PyOpCodeOpt_LoadAttr la;
} u;
char optimized;
};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism and
it is about 36% faster now. Patch by Pablo Galindo and Yury Selivanov.
4 changes: 2 additions & 2 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,8 @@ _PyCode_InitOpcache(PyCodeObject *co)
unsigned char opcode = _Py_OPCODE(opcodes[i]);
i++; // 'i' is now aligned to (next_instr - first_instr)

// TODO: LOAD_METHOD, LOAD_ATTR
if (opcode == LOAD_GLOBAL) {
// TODO: LOAD_METHOD
if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) {
opts++;
co->co_opcache_map[i] = (unsigned char)opts;
if (opts > 254) {
Expand Down
65 changes: 65 additions & 0 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1437,6 +1437,71 @@ PyDict_GetItem(PyObject *op, PyObject *key)
return value;
}

Py_ssize_t
_PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
Py_ssize_t hint, PyObject **value)
{
Py_hash_t hash;
PyThreadState *tstate;

assert(*value == NULL);
assert(PyDict_CheckExact((PyObject*)mp));
assert(PyUnicode_CheckExact(key));

if (hint >= 0 && hint < _PyDict_KeysSize(mp->ma_keys)) {
PyObject *res = NULL;

PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + (size_t)hint;
if (ep->me_key == key) {
if (mp->ma_keys->dk_lookup == lookdict_split) {
assert(mp->ma_values != NULL);
res = mp->ma_values[(size_t)hint];
}
else {
res = ep->me_value;
}
if (res != NULL) {
*value = res;
return hint;
}
}
}

if ((hash = ((PyASCIIObject *) key)->hash) == -1)
{
hash = PyObject_Hash(key);
if (hash == -1) {
PyErr_Clear();
return -1;
}
}

// We can arrive here with a NULL tstate during initialization: try
// running "python -Wi" for an example related to string interning
tstate = _PyThreadState_UncheckedGet();
Py_ssize_t ix = 0;
if (tstate != NULL && tstate->curexc_type != NULL) {
/* preserve the existing exception */
PyObject *err_type, *err_value, *err_tb;
PyErr_Fetch(&err_type, &err_value, &err_tb);
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
/* ignore errors */
PyErr_Restore(err_type, err_value, err_tb);
if (ix < 0) {
return -1;
}
}
else {
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
if (ix < 0) {
PyErr_Clear();
return -1;
}
}

return ix;
}

/* Same as PyDict_GetItemWithError() but with hash supplied by caller.
This returns NULL *with* an exception set if an exception occurred.
It returns NULL *without* an exception set if the key wasn't present.
Expand Down
221 changes: 216 additions & 5 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ static long dxp[256];
#else
#define OPCACHE_MIN_RUNS 1024 /* create opcache when code executed this time */
#endif
#define OPCODE_CACHE_MAX_TRIES 20
#define OPCACHE_STATS 0 /* Enable stats */

#if OPCACHE_STATS
Expand All @@ -120,6 +121,12 @@ static size_t opcache_code_objects_extra_mem = 0;
static size_t opcache_global_opts = 0;
static size_t opcache_global_hits = 0;
static size_t opcache_global_misses = 0;

static size_t opcache_attr_opts = 0;
static size_t opcache_attr_hits = 0;
static size_t opcache_attr_misses = 0;
static size_t opcache_attr_deopts = 0;
static size_t opcache_attr_total = 0;
#endif


Expand Down Expand Up @@ -365,6 +372,25 @@ _PyEval_Fini(void)
opcache_global_opts);

fprintf(stderr, "\n");

fprintf(stderr, "-- Opcode cache LOAD_ATTR hits = %zd (%d%%)\n",
opcache_attr_hits,
(int) (100.0 * opcache_attr_hits /
opcache_attr_total));

fprintf(stderr, "-- Opcode cache LOAD_ATTR misses = %zd (%d%%)\n",
opcache_attr_misses,
(int) (100.0 * opcache_attr_misses /
opcache_attr_total));

fprintf(stderr, "-- Opcode cache LOAD_ATTR opts = %zd\n",
opcache_attr_opts);

fprintf(stderr, "-- Opcode cache LOAD_ATTR deopts = %zd\n",
opcache_attr_deopts);

fprintf(stderr, "-- Opcode cache LOAD_ATTR total = %zd\n",
opcache_attr_total);
#endif
}

Expand Down Expand Up @@ -1224,16 +1250,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
do { \
co_opcache = NULL; \
if (co->co_opcache != NULL) { \
unsigned char co_opt_offset = \
unsigned char co_opcache_offset = \
co->co_opcache_map[next_instr - first_instr]; \
if (co_opt_offset > 0) { \
assert(co_opt_offset <= co->co_opcache_size); \
co_opcache = &co->co_opcache[co_opt_offset - 1]; \
if (co_opcache_offset > 0) { \
assert(co_opcache_offset <= co->co_opcache_size); \
co_opcache = &co->co_opcache[co_opcache_offset - 1]; \
assert(co_opcache != NULL); \
} \
} \
} while (0)

#define OPCACHE_DEOPT() \
do { \
if (co_opcache != NULL) { \
co_opcache->optimized = -1; \
unsigned char co_opcache_offset = \
co->co_opcache_map[next_instr - first_instr]; \
assert(co_opcache_offset <= co->co_opcache_size); \
co->co_opcache_map[co_opcache_offset] = 0; \
co_opcache = NULL; \
} \
} while (0)

#define OPCACHE_DEOPT_LOAD_ATTR() \
do { \
if (co_opcache != NULL) { \
OPCACHE_STAT_ATTR_DEOPT(); \
OPCACHE_DEOPT(); \
} \
} while (0)

#define OPCACHE_MAYBE_DEOPT_LOAD_ATTR() \
do { \
if (co_opcache != NULL && --co_opcache->optimized <= 0) { \
OPCACHE_DEOPT_LOAD_ATTR(); \
} \
} while (0)

#if OPCACHE_STATS

#define OPCACHE_STAT_GLOBAL_HIT() \
Expand All @@ -1251,12 +1304,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
if (co->co_opcache != NULL) opcache_global_opts++; \
} while (0)

#define OPCACHE_STAT_ATTR_HIT() \
do { \
if (co->co_opcache != NULL) opcache_attr_hits++; \
} while (0)

#define OPCACHE_STAT_ATTR_MISS() \
do { \
if (co->co_opcache != NULL) opcache_attr_misses++; \
} while (0)

#define OPCACHE_STAT_ATTR_OPT() \
do { \
if (co->co_opcache!= NULL) opcache_attr_opts++; \
} while (0)

#define OPCACHE_STAT_ATTR_DEOPT() \
do { \
if (co->co_opcache != NULL) opcache_attr_deopts++; \
} while (0)

#define OPCACHE_STAT_ATTR_TOTAL() \
do { \
if (co->co_opcache != NULL) opcache_attr_total++; \
} while (0)

#else /* OPCACHE_STATS */

#define OPCACHE_STAT_GLOBAL_HIT()
#define OPCACHE_STAT_GLOBAL_MISS()
#define OPCACHE_STAT_GLOBAL_OPT()

#define OPCACHE_STAT_ATTR_HIT()
#define OPCACHE_STAT_ATTR_MISS()
#define OPCACHE_STAT_ATTR_OPT()
#define OPCACHE_STAT_ATTR_DEOPT()
#define OPCACHE_STAT_ATTR_TOTAL()

#endif

/* Start of code */
Expand Down Expand Up @@ -3023,7 +3107,134 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
case TARGET(LOAD_ATTR): {
PyObject *name = GETITEM(names, oparg);
PyObject *owner = TOP();
PyObject *res = PyObject_GetAttr(owner, name);

PyTypeObject *type = Py_TYPE(owner);
PyObject *res;
PyObject **dictptr;
PyObject *dict;
_PyOpCodeOpt_LoadAttr *la;

OPCACHE_STAT_ATTR_TOTAL();

OPCACHE_CHECK();
if (co_opcache != NULL && PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG))
{
if (co_opcache->optimized > 0) {
/* Fast path -- cache hit makes LOAD_ATTR ~30% faster */
la = &co_opcache->u.la;
if (la->type == type && la->tp_version_tag == type->tp_version_tag)
{
assert(type->tp_dict != NULL);
assert(type->tp_dictoffset > 0);

dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
dict = *dictptr;
if (dict != NULL && PyDict_CheckExact(dict)) {
Py_ssize_t hint = la->hint;
Py_INCREF(dict);
res = NULL;
la->hint = _PyDict_GetItemHint((PyDictObject*)dict, name, hint, &res);

if (res != NULL) {
if (la->hint == hint && hint >= 0) {
/* Our hint has helped -- cache hit. */
OPCACHE_STAT_ATTR_HIT();
} else {
/* The hint we provided didn't work.
Maybe next time? */
OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
}

Py_INCREF(res);
SET_TOP(res);
Py_DECREF(owner);
Py_DECREF(dict);
DISPATCH();
} else {
// This attribute can be missing sometimes -- we
// don't want to optimize this lookup.
OPCACHE_DEOPT_LOAD_ATTR();
Py_DECREF(dict);
}
} else {
// There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
OPCACHE_DEOPT_LOAD_ATTR();
}
} else {
// The type of the object has either been updated,
// or is different. Maybe it will stabilize?
OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
}

OPCACHE_STAT_ATTR_MISS();
}

if (co_opcache != NULL && /* co_opcache can be NULL after a DEOPT() call. */
type->tp_getattro == PyObject_GenericGetAttr)
{
PyObject *descr;
Py_ssize_t ret;

if (type->tp_dictoffset > 0) {
if (type->tp_dict == NULL) {
if (PyType_Ready(type) < 0) {
Py_DECREF(owner);
SET_TOP(NULL);
goto error;
}
}

descr = _PyType_Lookup(type, name);
if (descr == NULL ||
descr->ob_type->tp_descr_get == NULL ||
!PyDescr_IsData(descr))
{
dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
dict = *dictptr;

if (dict != NULL && PyDict_CheckExact(dict)) {
Py_INCREF(dict);
res = NULL;
ret = _PyDict_GetItemHint((PyDictObject*)dict, name, -1, &res);
if (res != NULL) {
Py_INCREF(res);
Py_DECREF(dict);
Py_DECREF(owner);
SET_TOP(res);

if (co_opcache->optimized == 0) {
// First time we optimize this opcode. */
OPCACHE_STAT_ATTR_OPT();
co_opcache->optimized = OPCODE_CACHE_MAX_TRIES;
}

la = &co_opcache->u.la;
la->type = type;
la->tp_version_tag = type->tp_version_tag;
la->hint = ret;

DISPATCH();
}
Py_DECREF(dict);
} else {
// There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
OPCACHE_DEOPT_LOAD_ATTR();
}
} else {
// We failed to find an attribute without a data-like descriptor
OPCACHE_DEOPT_LOAD_ATTR();
}
} else {
// The object's class does not have a tp_dictoffset we can use
OPCACHE_DEOPT_LOAD_ATTR();
}
} else if (type->tp_getattro != PyObject_GenericGetAttr) {
OPCACHE_DEOPT_LOAD_ATTR();
}
}

/* slow path */
res = PyObject_GetAttr(owner, name);
Py_DECREF(owner);
SET_TOP(res);
if (res == NULL)
Expand Down