Skip to content

Commit 5014d9f

Browse files
committed
gh-151475: Fix data race in faulthandler watchdog on free-threaded builds
Add a PyMutex to serialize dump_traceback_later() and cancel_dump_traceback_later() calls. Without this, concurrent arm/cancel from multiple threads corrupts the cancel_event/running lock handshake, causing an abort from unlocking an unheld lock.
1 parent a52f428 commit 5014d9f

3 files changed

Lines changed: 25 additions & 8 deletions

File tree

Include/internal/pycore_faulthandler.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ struct faulthandler_user_signal {
4848

4949

5050
struct _faulthandler_runtime_state {
51+
PyMutex mutex;
52+
5153
struct {
5254
int enabled;
5355
PyObject *file;
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix data race in :mod:`faulthandler` where concurrent calls to
2+
:func:`~faulthandler.dump_traceback_later` and
3+
:func:`~faulthandler.cancel_dump_traceback_later` could corrupt the
4+
watchdog lock handshake on free-threaded builds.

Modules/faulthandler.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ typedef struct {
5656
int all_threads;
5757
} fault_handler_t;
5858

59+
#define faulthandler_mutex _PyRuntime.faulthandler.mutex
5960
#define fatal_error _PyRuntime.faulthandler.fatal_error
6061
#define thread _PyRuntime.faulthandler.thread
6162

@@ -836,17 +837,31 @@ faulthandler_dump_traceback_later_impl(PyObject *module,
836837
return NULL;
837838
}
838839

840+
/* format the timeout before acquiring the lock (no shared state) */
841+
header = format_timeout(timeout_us);
842+
if (header == NULL) {
843+
Py_XDECREF(file);
844+
return PyErr_NoMemory();
845+
}
846+
header_len = strlen(header);
847+
848+
PyMutex_Lock(&faulthandler_mutex);
849+
839850
if (!thread.running) {
840851
thread.running = PyThread_allocate_lock();
841852
if (!thread.running) {
853+
PyMutex_Unlock(&faulthandler_mutex);
842854
Py_XDECREF(file);
855+
PyMem_Free(header);
843856
return PyErr_NoMemory();
844857
}
845858
}
846859
if (!thread.cancel_event) {
847860
thread.cancel_event = PyThread_allocate_lock();
848861
if (!thread.cancel_event || !thread.running) {
862+
PyMutex_Unlock(&faulthandler_mutex);
849863
Py_XDECREF(file);
864+
PyMem_Free(header);
850865
return PyErr_NoMemory();
851866
}
852867

@@ -855,14 +870,6 @@ faulthandler_dump_traceback_later_impl(PyObject *module,
855870
PyThread_acquire_lock(thread.cancel_event, 1);
856871
}
857872

858-
/* format the timeout */
859-
header = format_timeout(timeout_us);
860-
if (header == NULL) {
861-
Py_XDECREF(file);
862-
return PyErr_NoMemory();
863-
}
864-
header_len = strlen(header);
865-
866873
/* Cancel previous thread, if running */
867874
cancel_dump_traceback_later();
868875

@@ -885,11 +892,13 @@ faulthandler_dump_traceback_later_impl(PyObject *module,
885892
Py_CLEAR(thread.file);
886893
PyMem_Free(header);
887894
thread.header = NULL;
895+
PyMutex_Unlock(&faulthandler_mutex);
888896
PyErr_SetString(PyExc_RuntimeError,
889897
"unable to start watchdog thread");
890898
return NULL;
891899
}
892900

901+
PyMutex_Unlock(&faulthandler_mutex);
893902
Py_RETURN_NONE;
894903
}
895904

@@ -904,7 +913,9 @@ static PyObject *
904913
faulthandler_cancel_dump_traceback_later_py_impl(PyObject *module)
905914
/*[clinic end generated code: output=2cf303015d39c926 input=51ad64b6ca8412a4]*/
906915
{
916+
PyMutex_Lock(&faulthandler_mutex);
907917
cancel_dump_traceback_later();
918+
PyMutex_Unlock(&faulthandler_mutex);
908919
Py_RETURN_NONE;
909920
}
910921

0 commit comments

Comments
 (0)