Skip to content

Commit 762cb25

Browse files
ericsnowcurrentlymiss-islington
authored andcommitted
gh-116510: Fix a Crash Due to Shared Immortal Interned Strings (gh-124865)
Fix a crash caused by immortal interned strings being shared between sub-interpreters that use basic single-phase init. In that case, the string can be used by an interpreter that outlives the interpreter that created and interned it. For interpreters that share obmalloc state, also share the interned dict with the main interpreter. This is an un-revert of gh-124646 that then addresses the Py_TRACE_REFS failures identified by gh-124785. (cherry picked from commit f2cb399) Co-authored-by: Eric Snow <[email protected]>
1 parent 046687c commit 762cb25

File tree

3 files changed

+91
-6
lines changed

3 files changed

+91
-6
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a crash caused by immortal interned strings being shared between
2+
sub-interpreters that use basic single-phase init. In that case, the string
3+
can be used by an interpreter that outlives the interpreter that created and
4+
interned it. For interpreters that share obmalloc state, also share the
5+
interned dict with the main interpreter.

Objects/object.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,6 +2218,42 @@ _Py_NewReferenceNoTotal(PyObject *op)
22182218

22192219

22202220
#ifdef Py_TRACE_REFS
2221+
/* Make sure the ref is associated with the right interpreter.
2222+
* This only needs special attention for heap-allocated objects
2223+
* that have been immortalized, and only when the object might
2224+
* outlive the interpreter where it was created. That means the
2225+
* object was necessarily created using a global allocator
2226+
* (i.e. from the main interpreter). Thus in that specific case
2227+
* we move the object over to the main interpreter's refchain.
2228+
*
2229+
* This was added for the sake of the immortal interned strings,
2230+
* where legacy subinterpreters share the main interpreter's
2231+
* interned dict (and allocator), and therefore the strings can
2232+
* outlive the subinterpreter.
2233+
*
2234+
* It may make sense to fold this into _Py_SetImmortalUntracked(),
2235+
* but that requires further investigation. In the meantime, it is
2236+
* up to the caller to know if this is needed. There should be
2237+
* very few cases.
2238+
*/
2239+
void
2240+
_Py_NormalizeImmortalReference(PyObject *op)
2241+
{
2242+
assert(_Py_IsImmortal(op));
2243+
PyInterpreterState *interp = _PyInterpreterState_GET();
2244+
if (!_PyRefchain_IsTraced(interp, op)) {
2245+
return;
2246+
}
2247+
PyInterpreterState *main_interp = _PyInterpreterState_Main();
2248+
if (interp != main_interp
2249+
&& interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC)
2250+
{
2251+
assert(!_PyRefchain_IsTraced(main_interp, op));
2252+
_PyRefchain_Remove(interp, op);
2253+
_PyRefchain_Trace(main_interp, op);
2254+
}
2255+
}
2256+
22212257
void
22222258
_Py_ForgetReference(PyObject *op)
22232259
{

Objects/unicodeobject.c

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -287,13 +287,37 @@ hashtable_unicode_compare(const void *key1, const void *key2)
287287
}
288288
}
289289

290+
/* Return true if this interpreter should share the main interpreter's
291+
intern_dict. That's important for interpreters which load basic
292+
single-phase init extension modules (m_size == -1). There could be interned
293+
immortal strings that are shared between interpreters, due to the
294+
PyDict_Update(mdict, m_copy) call in import_find_extension().
295+
296+
It's not safe to deallocate those strings until all interpreters that
297+
potentially use them are freed. By storing them in the main interpreter, we
298+
ensure they get freed after all other interpreters are freed.
299+
*/
300+
static bool
301+
has_shared_intern_dict(PyInterpreterState *interp)
302+
{
303+
PyInterpreterState *main_interp = _PyInterpreterState_Main();
304+
return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
305+
}
306+
290307
static int
291308
init_interned_dict(PyInterpreterState *interp)
292309
{
293310
assert(get_interned_dict(interp) == NULL);
294-
PyObject *interned = interned = PyDict_New();
295-
if (interned == NULL) {
296-
return -1;
311+
PyObject *interned;
312+
if (has_shared_intern_dict(interp)) {
313+
interned = get_interned_dict(_PyInterpreterState_Main());
314+
Py_INCREF(interned);
315+
}
316+
else {
317+
interned = PyDict_New();
318+
if (interned == NULL) {
319+
return -1;
320+
}
297321
}
298322
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
299323
return 0;
@@ -304,7 +328,10 @@ clear_interned_dict(PyInterpreterState *interp)
304328
{
305329
PyObject *interned = get_interned_dict(interp);
306330
if (interned != NULL) {
307-
PyDict_Clear(interned);
331+
if (!has_shared_intern_dict(interp)) {
332+
// only clear if the dict belongs to this interpreter
333+
PyDict_Clear(interned);
334+
}
308335
Py_DECREF(interned);
309336
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
310337
}
@@ -14939,6 +14966,10 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p)
1493914966
assert(*p);
1494014967
}
1494114968

14969+
#ifdef Py_TRACE_REFS
14970+
extern void _Py_NormalizeImmortalReference(PyObject *);
14971+
#endif
14972+
1494214973
static void
1494314974
immortalize_interned(PyObject *s)
1494414975
{
@@ -14954,6 +14985,10 @@ immortalize_interned(PyObject *s)
1495414985
#endif
1495514986
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
1495614987
_Py_SetImmortal(s);
14988+
#ifdef Py_TRACE_REFS
14989+
/* Make sure the ref is associated with the right interpreter. */
14990+
_Py_NormalizeImmortalReference(s);
14991+
#endif
1495714992
}
1495814993

1495914994
static /* non-null */ PyObject*
@@ -15140,6 +15175,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1514015175
}
1514115176
assert(PyDict_CheckExact(interned));
1514215177

15178+
if (has_shared_intern_dict(interp)) {
15179+
// the dict doesn't belong to this interpreter, skip the debug
15180+
// checks on it and just clear the pointer to it
15181+
clear_interned_dict(interp);
15182+
return;
15183+
}
15184+
1514315185
#ifdef INTERNED_STATS
1514415186
fprintf(stderr, "releasing %zd interned strings\n",
1514515187
PyDict_GET_SIZE(interned));
@@ -15658,8 +15700,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1565815700
{
1565915701
struct _Py_unicode_state *state = &interp->unicode;
1566015702

15661-
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
15662-
assert(get_interned_dict(interp) == NULL);
15703+
if (!has_shared_intern_dict(interp)) {
15704+
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
15705+
assert(get_interned_dict(interp) == NULL);
15706+
}
1566315707

1566415708
_PyUnicode_FiniEncodings(&state->fs_codec);
1566515709

0 commit comments

Comments
 (0)