Switch assert with @allocated == 0 for Dense path and keep the 5-arg mul!.

phjmsycc · phjmsycc · commit 22ad48d873b1 · 2025-09-12T16:02:29.000+09:00
diff --git a/src/perform_step/low_order.jl b/src/perform_step/low_order.jl
@@ -104,27 +104,13 @@ end
         # By linearity: 0.5*(gtmp1+gtmp2)*W.dW == 0.5*(gtmp2*W.dW) + 0.5*(gtmp1*W.dW).
         # Avoid forming (gtmp1 + gtmp2), which would allocate a temporary SparseMatrixCSC.
         # Use 5-arg mul! to accumulate directly into the cached vector (allocation-free).
-        _eh_accum_stage2!(nrtmp, gtmp2, W.dW)
+        mul!(nrtmp, gtmp2, W.dW, convert(eltype(nrtmp), 0.5), convert(eltype(nrtmp), 0.5))
     end
 
     dto2 = dt / 2
     @.. u = uprev + dto2 * (ftmp1 + ftmp2) + nrtmp
 end
 
-@inline function _eh_accum_stage2!(y, g2, dW)
-    mul!(y, g2, dW, convert(eltype(y), 0.5), convert(eltype(y), 0.5))
-    return nothing
-end
-
-@inline function _eh_accum_stage2!(
-        y::StridedVector{T},
-        g2::StridedMatrix{T},
-        dW::StridedVector{T}
-) where {T <: LinearAlgebra.BlasFloat}
-    LinearAlgebra.BLAS.gemv!('N', T(0.5), g2, dW, T(0.5), y)
-    return nothing
-end
-
 @muladd function perform_step!(integrator, cache::RandomEMConstantCache)
     @unpack t, dt, uprev, u, W, p, f = integrator
     u = uprev .+ dt .* integrator.f(uprev, p, t, W.curW)
diff --git a/test/nondiag_noise_eulerheun_test.jl b/test/nondiag_noise_eulerheun_test.jl
@@ -94,10 +94,13 @@ using DiffEqBase: @..
         integ = init(prob, EulerHeun(); dt = 0.01, adaptive = false, save_on = false)
 
         cache = integ.cache
-        allocs = AllocCheck.check_allocs(
-            StochasticDiffEq.perform_step!, (typeof(integ), typeof(cache))
-        )
-        @test isempty(allocs)
+
+        # Dense+BLAS: assert with `@allocated == 0` (not `check_allocs`).
+        # `check_allocs` flags throw-only branches in LinearAlgebra’s generic gemv!/mul!,
+        # while the BLAS hot path is allocation-free at runtime. We keep `check_allocs`
+        # for the sparse/non-diagonal tests.
+        StochasticDiffEq.perform_step!(integ, cache) # warm-up
+        @test @allocated(StochasticDiffEq.perform_step!(integ, cache)) == 0
     end
 
     make_integrator_dense(16)