@@ -313,10 +313,42 @@ void ddloader_logf(injected_ext *config, log_level level, const char *format, ..
313313 va_end (va );
314314}
315315
316- static void * ddloader_reap_child (void * arg ) {
317- pid_t pid = (pid_t )(intptr_t )arg ;
316+ typedef struct {
317+ pid_t pid ;
318+ void * self_handle ; // dlopen handle for this .so, closed by the thread
319+ } ddloader_reaper_arg ;
320+
321+ // Reaps the telemetry child process, then tail-calls dlclose() to release the
322+ // extra reference on this .so that was acquired before thread creation.
323+ // The tail call ensures dlclose() returns directly to libpthread's start_thread
324+ // without ever returning into this .so's code, which may be unmapped when
325+ // dlclose() releases the last reference and runs munmap.
326+ //
327+ // [[clang::musttail]] guarantees the tail call at the source level (compile
328+ // error if not possible). __attribute__((optimize("O2"))) is the GCC fallback
329+ // to enable sibling-call optimisation. Both are needed because musttail
330+ // requires a single CK_IntegralToPointer cast, while GCC -Wint-to-pointer-cast
331+ // requires the (intptr_t) intermediate; using __has_attribute lets us pick the
332+ // right form for each compiler.
333+ // musttail requires the callee to have a matching return type. Cast the
334+ // function pointer so the call expression itself returns void*. int and
335+ // void* share the same return register on every supported ABI, so the
336+ // cast is safe in practice (the dlclose return value is discarded anyway).
337+ #if defined(__has_attribute ) && __has_attribute (musttail )
338+ # define DDLOADER_MUSTTAIL __attribute__((musttail))
339+ #elif defined(__clang__ ) && __clang_major__ >= 13
340+ # define DDLOADER_MUSTTAIL [[clang::musttail]]
341+ #else
342+ # define DDLOADER_MUSTTAIL
343+ __attribute__((optimize ("O2" )))
344+ #endif
345+ static void * ddloader_reap_child (void * arg_ ) {
346+ ddloader_reaper_arg * arg = (ddloader_reaper_arg * )arg_ ;
347+ pid_t pid = arg -> pid ;
348+ void * handle = arg -> self_handle ;
349+ free (arg );
318350 waitpid (pid , NULL , 0 );
319- return NULL ;
351+ DDLOADER_MUSTTAIL return ( ( void * ( * )( void * )) dlclose )( handle ) ;
320352}
321353
322354/**
@@ -414,11 +446,20 @@ static void ddloader_telemetryf(telemetry_reason reason, injected_ext *config, c
414446 }
415447 if (pid > 0 ) {
416448 // reap the child in a background thread to avoid leaking it
449+ ddloader_reaper_arg * reaper_arg = malloc (sizeof (* reaper_arg ));
450+ reaper_arg -> pid = pid ;
451+ // Bump our own refcount so this .so stays mapped while the reaper
452+ // thread is running. The thread will tail-call dlclose() to release it.
453+ Dl_info info ;
454+ reaper_arg -> self_handle =
455+ (dladdr ((void * )ddloader_telemetryf , & info ) && info .dli_fname )
456+ ? dlopen (info .dli_fname , RTLD_LAZY )
457+ : NULL ;
417458 pthread_t reaper ;
418459 pthread_attr_t attr ;
419460 pthread_attr_init (& attr );
420461 pthread_attr_setdetachstate (& attr , PTHREAD_CREATE_DETACHED );
421- pthread_create (& reaper , & attr , ddloader_reap_child , ( void * )( intptr_t ) pid );
462+ pthread_create (& reaper , & attr , ddloader_reap_child , reaper_arg );
422463 pthread_attr_destroy (& attr );
423464 return ; // parent
424465 }
0 commit comments