From d6b57642f8f1cd6d43a6240f4a5470d4056fdbcd Mon Sep 17 00:00:00 2001 From: haixuanTao Date: Mon, 8 Jun 2026 14:42:10 +0200 Subject: [PATCH] feat(cuda): accept a pre-linked CUBIN in load_module_bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `load_module_bytes` assumed PTX text. Also accept a CUBIN, detected by the ELF magic (`\x7fELF`), loaded via `Ptx::from_binary`. This is required for modules that reference symbols the driver JIT cannot resolve on its own — e.g. libdevice `__nv_*` math — which a toolchain links into a self-contained binary ahead of time. PTX text continues to load unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/khal/src/backend/cuda.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/crates/khal/src/backend/cuda.rs b/crates/khal/src/backend/cuda.rs index b734819..cf715e9 100644 --- a/crates/khal/src/backend/cuda.rs +++ b/crates/khal/src/backend/cuda.rs @@ -271,9 +271,16 @@ impl Backend for Cuda { } } - // Expect PTX text bytes. - let ptx_str = std::str::from_utf8(bytes).map_err(|_| CudaBackendError::InvalidPtx)?; - let ptx = cudarc::nvrtc::Ptx::from_src(ptx_str.to_string()); + // Accept either PTX text or a pre-linked CUBIN (detected by ELF magic). + // A cubin is required when the module references symbols the driver JIT + // cannot resolve on its own (e.g. libdevice `__nv_*` math), which a + // toolchain links into a self-contained binary ahead of time. + let ptx = if bytes.starts_with(&[0x7f, b'E', b'L', b'F']) { + cudarc::nvrtc::Ptx::from_binary(bytes.to_vec()) + } else { + let ptx_str = std::str::from_utf8(bytes).map_err(|_| CudaBackendError::InvalidPtx)?; + cudarc::nvrtc::Ptx::from_src(ptx_str.to_string()) + }; let module = self.ctx.load_module(ptx)?; // Cache the loaded module.