From 526bb51f24fde9eb24f24e0eb9d4d2016139e2a0 Mon Sep 17 00:00:00 2001 From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com> Date: Thu, 7 May 2026 10:59:04 +0200 Subject: [PATCH 1/2] Defer AudioStream FFI native stream creation to first OnAudioRead Configuring the native stream from AudioSettings.speakerMode at construction time can disagree with what OnAudioFilterRead actually delivers (per-source mono routing, spatializers, mixer chains, headphone profiles), and the C# RemixAndResample path it covered for is heavy work on the OS-audio-scheduled FFI callback thread. Defer the NewAudioStreamRequest until the first audio callback so we configure with Unity's authoritative (channels, sampleRate) and let Rust's NativeAudioStream do the rate/channel conversion once. OnAudioStreamEvent becomes a memcpy into the ring buffer. Co-Authored-By: Claude Opus 4.7 (1M context) --- Runtime/Scripts/AudioStream.cs | 98 ++++++++++++++++------ Tests/EditMode/MediaStreamLifetimeTests.cs | 9 +- 2 files changed, 78 insertions(+), 29 deletions(-) diff --git a/Runtime/Scripts/AudioStream.cs b/Runtime/Scripts/AudioStream.cs index 834cc534..b16f6815 100644 --- a/Runtime/Scripts/AudioStream.cs +++ b/Runtime/Scripts/AudioStream.cs @@ -13,7 +13,14 @@ namespace LiveKit /// public sealed class AudioStream : IDisposable { - internal readonly FfiHandle Handle; + // FFI native stream is created lazily on the first OnAudioRead so we can pass + // Unity's actual delivered (channels, sampleRate) — not a system-speaker-mode + // guess. _handle is null until CreateFfiStream completes on the main thread. + private FfiHandle _handle; + internal FfiHandle Handle => _handle; + private readonly ulong _trackHandleId; + private bool _ffiCreatePosted; + private readonly AudioSource _audioSource; private AudioProbe _probe; private RingBuffer _buffer; @@ -21,7 +28,6 @@ public sealed class AudioStream : IDisposable private short[] _crossfadeScratch; private uint _numChannels; private uint _sampleRate; - private AudioResampler _resampler = new AudioResampler(); private readonly object _lock = new object(); private bool _disposed = false; @@ -56,15 +62,7 @@ public AudioStream(RemoteAudioTrack audioTrack, AudioSource source) if (!audioTrack.Participant.TryGetTarget(out var participant)) throw new InvalidOperationException("audiotrack's participant is invalid"); - using var request = FFIBridge.Instance.NewRequest(); - var newAudioStream = request.request; - newAudioStream.TrackHandle = (ulong)(audioTrack as ITrack).TrackHandle.DangerousGetHandle(); - newAudioStream.Type = AudioStreamType.AudioStreamNative; - - using var response = request.Send(); - FfiResponse res = response; - Handle = FfiHandle.FromOwnedHandle(res.NewAudioStream.Stream.Handle); - FfiClient.Instance.AudioStreamEventReceived += OnAudioStreamEvent; + _trackHandleId = (ulong)(audioTrack as ITrack).TrackHandle.DangerousGetHandle(); _audioSource = source; _probe = _audioSource.gameObject.AddComponent(); @@ -78,6 +76,45 @@ public AudioStream(RemoteAudioTrack audioTrack, AudioSource source) // (e.g. headphones unplugged). Without re-playing the source, OnAudioFilterRead // stops firing and the stream goes silent until the AudioStream is recreated. AudioSettings.OnAudioConfigurationChanged += OnAudioConfigurationChanged; + + // FFI stream creation is deferred to the first OnAudioRead. We subscribe to + // AudioStreamEventReceived only after the stream exists (CreateFfiStream). + } + + // Called on the main thread (posted from OnAudioRead via FfiClient._context) once + // we know Unity's actual playback (channels, sampleRate). Builds the native stream + // request so Rust delivers frames already at the right format — no resample on this + // side, no FFI reconfigure dance later. + private void CreateFfiStream(uint observedChannels, uint observedSampleRate) + { + lock (_lock) { if (_disposed) return; } + + FfiHandle newHandle; + try + { + using var request = FFIBridge.Instance.NewRequest(); + var req = request.request; + req.TrackHandle = _trackHandleId; + req.Type = AudioStreamType.AudioStreamNative; + req.SampleRate = observedSampleRate; + req.NumChannels = observedChannels; + + using var response = request.Send(); + FfiResponse res = response; + newHandle = FfiHandle.FromOwnedHandle(res.NewAudioStream.Stream.Handle); + } + catch (Exception ex) + { + Utils.Error($"AudioStream FFI create failed: {ex}"); + return; + } + + lock (_lock) + { + if (_disposed) { newHandle.Dispose(); return; } + _handle = newHandle; + FfiClient.Instance.AudioStreamEventReceived += OnAudioStreamEvent; + } } // Called on Unity audio thread @@ -91,6 +128,22 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) lock (_lock) { + // First OnAudioRead: post a one-shot CreateFfiStream to the main thread + // with Unity's actual (channels, sampleRate). Until the FFI stream exists + // we output silence — the priming window absorbs this gap. + if (_handle == null) + { + if (!_ffiCreatePosted) + { + _ffiCreatePosted = true; + uint observedCh = (uint)channels; + uint observedSr = (uint)sampleRate; + FfiClient.Instance._context?.Post(_ => CreateFfiStream(observedCh, observedSr), null); + } + Array.Clear(data, 0, data.Length); + return; + } + // Initialize or reinitialize buffer if audio format changed if (_buffer == null || channels != _numChannels || sampleRate != _sampleRate || data.Length != _tempBuffer.Length) { @@ -284,18 +337,13 @@ private void OnAudioStreamEvent(AudioStreamEvent e) lock (_lock) { - if (_numChannels == 0) + if (_buffer == null) return; unsafe { - using var uFrame = _resampler.RemixAndResample(frame, _numChannels, _sampleRate); - if (uFrame != null) - { - var data = new Span(uFrame.Data.ToPointer(), uFrame.Length); - _buffer?.Write(data); - } - + var data = new ReadOnlySpan(frame.Data.ToPointer(), frame.Length); + _buffer.Write(data); } } } @@ -315,8 +363,11 @@ private void Dispose(bool disposing) // Remove long-lived delegate references first so this instance can become collectible // as soon as user code drops it. This also prevents late native callbacks from - // touching partially disposed state. - FfiClient.Instance.AudioStreamEventReceived -= OnAudioStreamEvent; + // touching partially disposed state. AudioStreamEventReceived was only subscribed + // after CreateFfiStream succeeded; -= against an unsubscribed handler is a no-op, + // but the explicit guard documents the lifecycle. + if (_handle != null) + FfiClient.Instance.AudioStreamEventReceived -= OnAudioStreamEvent; MonoBehaviourContext.OnApplicationPauseEvent -= OnApplicationPause; AudioSettings.OnAudioConfigurationChanged -= OnAudioConfigurationChanged; @@ -339,9 +390,8 @@ private void Dispose(bool disposing) _buffer = null; _tempBuffer = null; _crossfadeScratch = null; - _resampler?.Dispose(); - _resampler = null; - Handle.Dispose(); + _handle?.Dispose(); + _handle = null; } _disposed = true; diff --git a/Tests/EditMode/MediaStreamLifetimeTests.cs b/Tests/EditMode/MediaStreamLifetimeTests.cs index 0209bfa2..f2ac7320 100644 --- a/Tests/EditMode/MediaStreamLifetimeTests.cs +++ b/Tests/EditMode/MediaStreamLifetimeTests.cs @@ -124,8 +124,7 @@ public void AudioStream_Dispose_UnsubscribesAndReleasesOwnedResources() StringAssert.Contains("_probe.AudioRead -= OnAudioRead;", source); StringAssert.Contains("AudioSettings.OnAudioConfigurationChanged -= OnAudioConfigurationChanged;", source); StringAssert.Contains("_buffer?.Dispose();", source); - StringAssert.Contains("_resampler?.Dispose();", source); - StringAssert.Contains("Handle.Dispose();", source); + StringAssert.Contains("_handle?.Dispose();", source); } [Test] @@ -133,10 +132,10 @@ public void AudioStream_AudioFrames_AreDisposedAfterProcessing() { var source = ReadSource(AudioStreamPaths); - // Both the inbound native frame and the remixed output frame should be scoped so their - // handles are released after each callback rather than accumulating over time. + // Native frames carry an FFI handle that must be released after each callback so they + // do not accumulate. With the Rust side delivering frames already at Unity's rate, we + // no longer wrap a resampled output frame. StringAssert.Contains("using var frame = new AudioFrame(e.FrameReceived.Frame);", source); - StringAssert.Contains("using var uFrame = _resampler.RemixAndResample(frame, _numChannels, _sampleRate);", source); } [Test] From 36efa755a7bd920851a6589996892006173debf0 Mon Sep 17 00:00:00 2001 From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com> Date: Thu, 7 May 2026 11:00:21 +0200 Subject: [PATCH 2/2] Recreate native stream when Unity audio config rate/channels change When the system output device switches mid-call (e.g. headphones unplugged), Unity tears down its audio engine and OnAudioFilterRead resumes with a (channels, sampleRate) that may differ from what Rust was configured for. Detect the mismatch in OnAudioRead, post a CreateOrRecreateFfiStream to the main thread, and gate OnAudioStreamEvent on a _pendingFfiRequest flag so we drop in-flight frames at the old format. After the new stream is built, swap _handle, clear the buffer, reset priming, and dispose the old handle so its Rust task exits. The handle-id filter remains as a second line of defense for old-stream stragglers arriving during the brief overlap. Co-Authored-By: Claude Opus 4.7 (1M context) --- Runtime/Scripts/AudioStream.cs | 59 ++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/Runtime/Scripts/AudioStream.cs b/Runtime/Scripts/AudioStream.cs index b16f6815..4d9ab588 100644 --- a/Runtime/Scripts/AudioStream.cs +++ b/Runtime/Scripts/AudioStream.cs @@ -15,11 +15,15 @@ public sealed class AudioStream : IDisposable { // FFI native stream is created lazily on the first OnAudioRead so we can pass // Unity's actual delivered (channels, sampleRate) — not a system-speaker-mode - // guess. _handle is null until CreateFfiStream completes on the main thread. + // guess. _handle is null until CreateOrRecreateFfiStream completes on the main + // thread. The same path runs again whenever Unity's delivered format changes + // mid-stream (e.g. after a system audio device switch). private FfiHandle _handle; internal FfiHandle Handle => _handle; private readonly ulong _trackHandleId; - private bool _ffiCreatePosted; + private uint _ffiNumChannels; + private uint _ffiSampleRate; + private bool _pendingFfiRequest; private readonly AudioSource _audioSource; private AudioProbe _probe; @@ -81,11 +85,12 @@ public AudioStream(RemoteAudioTrack audioTrack, AudioSource source) // AudioStreamEventReceived only after the stream exists (CreateFfiStream). } - // Called on the main thread (posted from OnAudioRead via FfiClient._context) once - // we know Unity's actual playback (channels, sampleRate). Builds the native stream - // request so Rust delivers frames already at the right format — no resample on this - // side, no FFI reconfigure dance later. - private void CreateFfiStream(uint observedChannels, uint observedSampleRate) + // Called on the main thread (posted from OnAudioRead via FfiClient._context) when + // either there is no FFI stream yet or Unity's delivered (channels, sampleRate) no + // longer matches what we asked Rust for. Builds a fresh native stream and swaps it + // in atomically. The old handle is disposed AFTER the swap so any in-flight frames + // from the old stream fail the handle-id filter in OnAudioStreamEvent. + private void CreateOrRecreateFfiStream(uint observedChannels, uint observedSampleRate) { lock (_lock) { if (_disposed) return; } @@ -105,16 +110,30 @@ private void CreateFfiStream(uint observedChannels, uint observedSampleRate) } catch (Exception ex) { - Utils.Error($"AudioStream FFI create failed: {ex}"); + Utils.Error($"AudioStream FFI (re)create failed: {ex}"); + lock (_lock) { _pendingFfiRequest = false; } return; } + FfiHandle oldHandle; + bool firstCreate; lock (_lock) { if (_disposed) { newHandle.Dispose(); return; } + oldHandle = _handle; + firstCreate = oldHandle == null; _handle = newHandle; - FfiClient.Instance.AudioStreamEventReceived += OnAudioStreamEvent; + _ffiNumChannels = observedChannels; + _ffiSampleRate = observedSampleRate; + _buffer?.Clear(); + _isPrimed = false; + _skipCooldown = 0; + _pendingFfiRequest = false; + + if (firstCreate) + FfiClient.Instance.AudioStreamEventReceived += OnAudioStreamEvent; } + oldHandle?.Dispose(); } // Called on Unity audio thread @@ -128,17 +147,18 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) lock (_lock) { - // First OnAudioRead: post a one-shot CreateFfiStream to the main thread - // with Unity's actual (channels, sampleRate). Until the FFI stream exists - // we output silence — the priming window absorbs this gap. - if (_handle == null) + // Single gate covering first-create and runtime format changes (e.g. after a + // system audio device switch). When the FFI stream is missing or what we asked + // Rust for no longer matches what Unity is delivering, post a (re)create to the + // main thread and output silence until it lands. The priming window absorbs this. + if (_handle == null || channels != _ffiNumChannels || sampleRate != _ffiSampleRate) { - if (!_ffiCreatePosted) + if (!_pendingFfiRequest) { - _ffiCreatePosted = true; + _pendingFfiRequest = true; uint observedCh = (uint)channels; uint observedSr = (uint)sampleRate; - FfiClient.Instance._context?.Post(_ => CreateFfiStream(observedCh, observedSr), null); + FfiClient.Instance._context?.Post(_ => CreateOrRecreateFfiStream(observedCh, observedSr), null); } Array.Clear(data, 0, data.Length); return; @@ -337,7 +357,12 @@ private void OnAudioStreamEvent(AudioStreamEvent e) lock (_lock) { - if (_buffer == null) + // _pendingFfiRequest gates writes during a (re)create: between the moment + // OnAudioRead detects a format mismatch and the swap landing, Rust is still + // emitting frames at the OLD format. Drop them to avoid corrupting the buffer. + // The handle-id filter above is the second line of defense for stragglers + // arriving from the old stream after the swap. + if (_buffer == null || _pendingFfiRequest) return; unsafe