Skip to content

bpo-44338: Port LOAD_GLOBAL to PEP 659 adaptive interpreter #26638

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 14, 2021
Merged
14 changes: 0 additions & 14 deletions Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,6 @@ struct PyCodeObject {
interpreter. */
union _cache_or_instruction *co_quickened;

/* Per opcodes just-in-time cache
*
* To reduce cache size, we use indirect mapping from opcode index to
* cache object:
* cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1]
*/

// co_opcache_map is indexed by (next_instr - first_instr).
// * 0 means there is no cache for this opcode.
// * n > 0 means there is cache in co_opcache[n-1].
unsigned char *co_opcache_map;
_PyOpcache *co_opcache;
int co_opcache_flag; // used to determine when create a cache.
unsigned char co_opcache_size; // length of co_opcache.
};

/* Masks for co_flags above */
Expand Down
25 changes: 15 additions & 10 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ typedef struct {
uint32_t dk_version_or_hint;
} _PyLoadAttrCache;

typedef struct {
uint32_t module_keys_version;
uint32_t builtin_keys_version;
} _PyLoadGlobalCache;

/* Add specialized versions of entries to this union.
*
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
Expand All @@ -62,6 +67,7 @@ typedef union {
_PyEntryZero zero;
_PyAdaptiveEntry adaptive;
_PyLoadAttrCache load_attr;
_PyLoadGlobalCache load_global;
} SpecializedCacheEntry;

#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
Expand Down Expand Up @@ -254,8 +260,6 @@ PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);

/* Private API */

int _PyCode_InitOpcache(PyCodeObject *co);

/* Getters for internal PyCodeObject data. */
PyAPI_FUNC(PyObject *) _PyCode_GetVarnames(PyCodeObject *);
PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *);
Expand Down Expand Up @@ -318,24 +322,25 @@ cache_backoff(_PyAdaptiveEntry *entry) {
/* Specialization functions */

int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);

#define SPECIALIZATION_STATS 0
#if SPECIALIZATION_STATS

typedef struct _specialization_stats {
typedef struct _stats {
uint64_t specialization_success;
uint64_t specialization_failure;
uint64_t loadattr_hit;
uint64_t loadattr_deferred;
uint64_t loadattr_miss;
uint64_t loadattr_deopt;
uint64_t hit;
uint64_t deferred;
uint64_t miss;
uint64_t deopt;
} SpecializationStats;

extern SpecializationStats _specialization_stats;
#define STAT_INC(name) _specialization_stats.name++
extern SpecializationStats _specialization_stats[256];
#define STAT_INC(opname, name) _specialization_stats[opname].name++
void _Py_PrintSpecializationStats(void);
#else
#define STAT_INC(name) ((void)0)
#define STAT_INC(opname, name) ((void)0)
#endif


Expand Down
3 changes: 3 additions & 0 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,4 +226,7 @@ def jabs_op(name, op):
"LOAD_ATTR_WITH_HINT",
"LOAD_ATTR_SLOT",
"LOAD_ATTR_MODULE",
"LOAD_GLOBAL_ADAPTIVE",
"LOAD_GLOBAL_MODULE",
"LOAD_GLOBAL_BUILTIN",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Implement adaptive specialization for LOAD_GLOBAL

Two specialized forms of LOAD_GLOBAL are added:

* LOAD_GLOBAL_MODULE

* LOAD_GLOBAL_BUILTIN
74 changes: 6 additions & 68 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,10 +350,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
/* not set */
co->co_weakreflist = NULL;
co->co_extra = NULL;
co->co_opcache_map = NULL;
co->co_opcache = NULL;
co->co_opcache_flag = 0;
co->co_opcache_size = 0;

co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE;
co->co_quickened = NULL;
}
Expand Down Expand Up @@ -912,55 +909,6 @@ new_linesiterator(PyCodeObject *code)
return li;
}


/******************
* the opcache
******************/

int
_PyCode_InitOpcache(PyCodeObject *co)
{
Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT);
co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1);
if (co->co_opcache_map == NULL) {
return -1;
}

const _Py_CODEUNIT *opcodes = (const _Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code);
Py_ssize_t opts = 0;

for (Py_ssize_t i = 0; i < co_size;) {
unsigned char opcode = _Py_OPCODE(opcodes[i]);
i++; // 'i' is now aligned to (next_instr - first_instr)

// TODO: LOAD_METHOD
if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) {
opts++;
co->co_opcache_map[i] = (unsigned char)opts;
if (opts > 254) {
break;
}
}
}

if (opts) {
co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache));
if (co->co_opcache == NULL) {
PyMem_Free(co->co_opcache_map);
return -1;
}
}
else {
PyMem_Free(co->co_opcache_map);
co->co_opcache_map = NULL;
co->co_opcache = NULL;
}

co->co_opcache_size = (unsigned char)opts;
return 0;
}


/******************
* "extra" frame eval info (see PEP 523)
******************/
Expand Down Expand Up @@ -1207,15 +1155,6 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount,
static void
code_dealloc(PyCodeObject *co)
{
if (co->co_opcache != NULL) {
PyMem_Free(co->co_opcache);
}
if (co->co_opcache_map != NULL) {
PyMem_Free(co->co_opcache_map);
}
co->co_opcache_flag = 0;
co->co_opcache_size = 0;

if (co->co_extra != NULL) {
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyCodeObjectExtra *co_extra = co->co_extra;
Expand Down Expand Up @@ -1442,12 +1381,11 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
res += co->co_ncellvars * sizeof(Py_ssize_t);
}

if (co->co_opcache != NULL) {
assert(co->co_opcache_map != NULL);
// co_opcache_map
res += PyBytes_GET_SIZE(co->co_code) / sizeof(_Py_CODEUNIT);
// co_opcache
res += co->co_opcache_size * sizeof(_PyOpcache);
if (co->co_quickened != NULL) {
Py_ssize_t count = co->co_quickened[0].entry.zero.cache_count;
count += (PyBytes_GET_SIZE(co->co_code)+sizeof(SpecializedCacheEntry)-1)/
sizeof(SpecializedCacheEntry);
res += count * sizeof(SpecializedCacheEntry);
}

return PyLong_FromSsize_t(res);
Expand Down
Loading