Skip to content

[3.13] gh-128679: Fix tracemalloc.stop() race conditions #128897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 18, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions Lib/test/test_tracemalloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
from test.support.script_helper import (assert_python_ok, assert_python_failure,
interpreter_requires_environment)
from test import support
from test.support import os_helper
from test.support import force_not_colorized
from test.support import os_helper
from test.support import threading_helper

try:
import _testcapi
Expand Down Expand Up @@ -952,7 +953,6 @@ def check_env_var_invalid(self, nframe):
return
self.fail(f"unexpected output: {stderr!a}")


def test_env_var_invalid(self):
for nframe in INVALID_NFRAME:
with self.subTest(nframe=nframe):
Expand Down Expand Up @@ -1101,6 +1101,12 @@ def test_stop_untrack(self):
with self.assertRaises(RuntimeError):
self.untrack()

@unittest.skipIf(_testcapi is None, 'need _testcapi')
@threading_helper.requires_working_threading()
def test_tracemalloc_track_race(self):
# gh-128679: Test fix for tracemalloc.stop() race condition
_testcapi.tracemalloc_track_race()


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix :func:`tracemalloc.stop` race condition. Fix :mod:`tracemalloc` to
support calling :func:`tracemalloc.stop` in one thread, while another thread
is tracing memory allocations. Patch by Victor Stinner.
99 changes: 99 additions & 0 deletions Modules/_testcapimodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -3386,6 +3386,104 @@ test_atexit(PyObject *self, PyObject *Py_UNUSED(args))
Py_RETURN_NONE;
}


static void
tracemalloc_track_race_thread(void *data)
{
PyTraceMalloc_Track(123, 10, 1);

PyThread_type_lock lock = (PyThread_type_lock)data;
PyThread_release_lock(lock);
}

// gh-128679: Test fix for tracemalloc.stop() race condition
static PyObject *
tracemalloc_track_race(PyObject *self, PyObject *args)
{
#define NTHREAD 50
PyObject *tracemalloc = NULL;
PyObject *stop = NULL;
PyThread_type_lock locks[NTHREAD];
memset(locks, 0, sizeof(locks));

// Call tracemalloc.start()
tracemalloc = PyImport_ImportModule("tracemalloc");
if (tracemalloc == NULL) {
goto error;
}
PyObject *start = PyObject_GetAttrString(tracemalloc, "start");
if (start == NULL) {
goto error;
}
PyObject *res = PyObject_CallNoArgs(start);
Py_DECREF(start);
if (res == NULL) {
goto error;
}
Py_DECREF(res);

stop = PyObject_GetAttrString(tracemalloc, "stop");
Py_CLEAR(tracemalloc);
if (stop == NULL) {
goto error;
}

// Start threads
for (size_t i = 0; i < NTHREAD; i++) {
PyThread_type_lock lock = PyThread_allocate_lock();
if (!lock) {
PyErr_NoMemory();
goto error;
}
locks[i] = lock;
PyThread_acquire_lock(lock, 1);

unsigned long thread;
thread = PyThread_start_new_thread(tracemalloc_track_race_thread,
(void*)lock);
if (thread == (unsigned long)-1) {
PyErr_SetString(PyExc_RuntimeError, "can't start new thread");
goto error;
}
}

// Call tracemalloc.stop() while threads are running
res = PyObject_CallNoArgs(stop);
Py_CLEAR(stop);
if (res == NULL) {
goto error;
}
Py_DECREF(res);

// Wait until threads complete with the GIL released
Py_BEGIN_ALLOW_THREADS
for (size_t i = 0; i < NTHREAD; i++) {
PyThread_type_lock lock = locks[i];
PyThread_acquire_lock(lock, 1);
PyThread_release_lock(lock);
}
Py_END_ALLOW_THREADS

// Free threads locks
for (size_t i=0; i < NTHREAD; i++) {
PyThread_type_lock lock = locks[i];
PyThread_free_lock(lock);
}
Py_RETURN_NONE;

error:
Py_CLEAR(tracemalloc);
Py_CLEAR(stop);
for (size_t i=0; i < NTHREAD; i++) {
PyThread_type_lock lock = locks[i];
if (lock) {
PyThread_free_lock(lock);
}
}
return NULL;
#undef NTHREAD
}

static PyMethodDef TestMethods[] = {
{"set_errno", set_errno, METH_VARARGS},
{"test_config", test_config, METH_NOARGS},
Expand Down Expand Up @@ -3532,6 +3630,7 @@ static PyMethodDef TestMethods[] = {
{"test_critical_sections", test_critical_sections, METH_NOARGS},
{"pyeval_getlocals", pyeval_getlocals, METH_NOARGS},
{"test_atexit", test_atexit, METH_NOARGS},
{"tracemalloc_track_race", tracemalloc_track_race, METH_NOARGS},
{NULL, NULL} /* sentinel */
};

Expand Down
110 changes: 68 additions & 42 deletions Python/tracemalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -538,12 +538,16 @@ tracemalloc_alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
return NULL;

TABLES_LOCK();
if (ADD_TRACE(ptr, nelem * elsize) < 0) {
/* Failed to allocate a trace for the new memory block */
TABLES_UNLOCK();
alloc->free(alloc->ctx, ptr);
return NULL;

if (tracemalloc_config.tracing) {
if (ADD_TRACE(ptr, nelem * elsize) < 0) {
/* Failed to allocate a trace for the new memory block */
alloc->free(alloc->ctx, ptr);
ptr = NULL;
}
}
// else: gh-128679: tracemalloc.stop() was called by another thread

TABLES_UNLOCK();
return ptr;
}
Expand All @@ -559,11 +563,15 @@ tracemalloc_realloc(void *ctx, void *ptr, size_t new_size)
if (ptr2 == NULL)
return NULL;

TABLES_LOCK();
if (!tracemalloc_config.tracing) {
// gh-128679: tracemalloc.stop() was called by another thread
goto done;
}

if (ptr != NULL) {
/* an existing memory block has been resized */

TABLES_LOCK();

/* tracemalloc_add_trace() updates the trace if there is already
a trace at address ptr2 */
if (ptr2 != ptr) {
Expand All @@ -582,20 +590,19 @@ tracemalloc_realloc(void *ctx, void *ptr, size_t new_size)
allocating memory. */
Py_FatalError("tracemalloc_realloc() failed to allocate a trace");
}
TABLES_UNLOCK();
}
else {
/* new allocation */

TABLES_LOCK();
if (ADD_TRACE(ptr2, new_size) < 0) {
/* Failed to allocate a trace for the new memory block */
TABLES_UNLOCK();
alloc->free(alloc->ctx, ptr2);
return NULL;
ptr2 = NULL;
}
TABLES_UNLOCK();
}

done:
TABLES_UNLOCK();
return ptr2;
}

Expand All @@ -614,7 +621,12 @@ tracemalloc_free(void *ctx, void *ptr)
alloc->free(alloc->ctx, ptr);

TABLES_LOCK();
REMOVE_TRACE(ptr);

if (tracemalloc_config.tracing) {
REMOVE_TRACE(ptr);
}
// else: gh-128679: tracemalloc.stop() was called by another thread

TABLES_UNLOCK();
}

Expand Down Expand Up @@ -779,17 +791,15 @@ tracemalloc_clear_filename(void *value)

/* reentrant flag must be set to call this function and GIL must be held */
static void
tracemalloc_clear_traces(void)
tracemalloc_clear_traces_unlocked(void)
{
/* The GIL protects variables against concurrent access */
assert(PyGILState_Check());

TABLES_LOCK();
_Py_hashtable_clear(tracemalloc_traces);
_Py_hashtable_clear(tracemalloc_domains);
tracemalloc_traced_memory = 0;
tracemalloc_peak_traced_memory = 0;
TABLES_UNLOCK();

_Py_hashtable_clear(tracemalloc_tracebacks);

Expand Down Expand Up @@ -963,6 +973,10 @@ _PyTraceMalloc_Stop(void)
if (!tracemalloc_config.tracing)
return;

// Lock to synchronize with tracemalloc_free() which checks
// 'tracing' while holding the lock.
TABLES_LOCK();

/* stop tracing Python memory allocations */
tracemalloc_config.tracing = 0;

Expand All @@ -973,11 +987,13 @@ _PyTraceMalloc_Stop(void)
PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);

tracemalloc_clear_traces();
tracemalloc_clear_traces_unlocked();

/* release memory */
raw_free(tracemalloc_traceback);
tracemalloc_traceback = NULL;

TABLES_UNLOCK();
}


Expand Down Expand Up @@ -1307,20 +1323,19 @@ int
PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
size_t size)
{
int res;
PyGILState_STATE gil_state;
PyGILState_STATE gil_state = PyGILState_Ensure();
TABLES_LOCK();

if (!tracemalloc_config.tracing) {
/* tracemalloc is not tracing: do nothing */
return -2;
int res;
if (tracemalloc_config.tracing) {
res = tracemalloc_add_trace(domain, ptr, size);
}
else {
// gh-128679: tracemalloc.stop() was called by another thread
res = -2;
}

gil_state = PyGILState_Ensure();

TABLES_LOCK();
res = tracemalloc_add_trace(domain, ptr, size);
TABLES_UNLOCK();

PyGILState_Release(gil_state);
return res;
}
Expand All @@ -1329,16 +1344,20 @@ PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
int
PyTraceMalloc_Untrack(unsigned int domain, uintptr_t ptr)
{
if (!tracemalloc_config.tracing) {
TABLES_LOCK();

int result;
if (tracemalloc_config.tracing) {
tracemalloc_remove_trace(domain, ptr);
result = 0;
}
else {
/* tracemalloc is not tracing: do nothing */
return -2;
result = -2;
}

TABLES_LOCK();
tracemalloc_remove_trace(domain, ptr);
TABLES_UNLOCK();

return 0;
return result;
}


Expand Down Expand Up @@ -1376,16 +1395,21 @@ _PyTraceMalloc_TraceRef(PyObject *op, PyRefTracerEvent event, void* Py_UNUSED(ig
int res = -1;

TABLES_LOCK();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also read tracing without a lock before this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking tracing without the lock is a workaround for the Python finalization. TABLES_LOCK() can no longer be called after _PyTraceMalloc_Fini() (the lock is destroyed), whereas _PyTraceMalloc_TraceRef() is still called after _PyTraceMalloc_Fini().

tracing is checked again once we acquired the lock.

The issue was fixed properly in the main branch by calling (void)PyRefTracer_SetTracer(NULL, NULL); in _PyTraceMalloc_Stop(). Since this change is designed to be backported to Python 3.12, I didn't include this fix.

Once this change is merged in 3.13 and 3.12, we can remove the unprotected tracing read in 3.13 in _PyTraceMalloc_TraceRef() by adding (void)PyRefTracer_SetTracer(NULL, NULL);.

trace_t *trace = _Py_hashtable_get(tracemalloc_traces, TO_PTR(ptr));
if (trace != NULL) {
/* update the traceback of the memory block */
traceback_t *traceback = traceback_new();
if (traceback != NULL) {
trace->traceback = traceback;
res = 0;

if (tracemalloc_config.tracing) {
trace_t *trace = _Py_hashtable_get(tracemalloc_traces, TO_PTR(ptr));
if (trace != NULL) {
/* update the traceback of the memory block */
traceback_t *traceback = traceback_new();
if (traceback != NULL) {
trace->traceback = traceback;
res = 0;
}
}
/* else: cannot track the object, its memory block size is unknown */
}
/* else: cannot track the object, its memory block size is unknown */
// else: gh-128679: tracemalloc.stop() was called by another thread

TABLES_UNLOCK();

return res;
Expand Down Expand Up @@ -1418,7 +1442,9 @@ _PyTraceMalloc_ClearTraces(void)
return;
}
set_reentrant(1);
tracemalloc_clear_traces();
TABLES_LOCK();
tracemalloc_clear_traces_unlocked();
TABLES_UNLOCK();
set_reentrant(0);
}

Expand Down
Loading