Merge pull request #630 from phjmsycc/master

ChrisRackauckas · web-flow · commit f90d49da486f · 2025-09-12T12:15:38.000Z
Fix: Eliminate per-step allocations in EulerHeun with sparse non-diagonal noise by avoiding temporary `gtmp1 + gtmp2`
diff --git a/Project.toml b/Project.toml
@@ -67,6 +67,7 @@ UnPack = "0.1, 1.0"
 julia = "1.10"
 
 [extras]
+AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
 DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
 DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
@@ -80,4 +81,4 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["DiffEqCallbacks", "DiffEqDevTools", "SDEProblemLibrary", "LinearSolve", "ModelingToolkit", "Pkg", "SafeTestsets", "SparseArrays", "Statistics", "OrdinaryDiffEq", "Test"]
+test = ["AllocCheck", "DiffEqCallbacks", "DiffEqDevTools", "SDEProblemLibrary", "LinearSolve", "ModelingToolkit", "Pkg", "SafeTestsets", "SparseArrays", "Statistics", "OrdinaryDiffEq", "Test"]
diff --git a/src/perform_step/low_order.jl b/src/perform_step/low_order.jl
@@ -98,10 +98,13 @@ end
     integrator.g(gtmp2, tmp, p, t+dt)
 
     if is_diagonal_noise(integrator.sol.prob)
-        @.. nrtmp=(1/2)*W.dW*(gtmp1+gtmp2)
+        @.. nrtmp = W.dW * (gtmp1 + gtmp2) / 2
     else
-        @.. gtmp1 = (1/2)*(gtmp1+gtmp2)
-        mul!(nrtmp, gtmp1, W.dW)
+        # nrtmp already contains gtmp1 * W.dW from stage 1.
+        # By linearity: 0.5*(gtmp1+gtmp2)*W.dW == 0.5*(gtmp2*W.dW) + 0.5*(gtmp1*W.dW).
+        # Avoid forming (gtmp1 + gtmp2), which would allocate a temporary SparseMatrixCSC.
+        # Use 5-arg mul! to accumulate directly into the cached vector (allocation-free).
+        mul!(nrtmp, gtmp2, W.dW, convert(eltype(nrtmp), 0.5), convert(eltype(nrtmp), 0.5))
     end
 
     dto2 = dt / 2
diff --git a/test/nondiag_noise_eulerheun_test.jl b/test/nondiag_noise_eulerheun_test.jl
@@ -0,0 +1,108 @@
+using StochasticDiffEq, DiffEqNoiseProcess, SparseArrays, LinearAlgebra,
+      AllocCheck
+using DiffEqBase: @..
+
+@testset "EulerHeun sparse noise: no per-step alloc" begin
+
+    # Simple linear drift
+    f!(du, u, p, t) = (@.. du = 0.999 * u)
+
+    # 2×2 identical-column block structure; g! only writes nzval of an existing sparsity pattern
+    function sparse_proto(N)
+        I = Vector{Int}(undef, 4N)
+        J = similar(I)
+        V = ones(Float64, 4N)
+        @inbounds for i in 1:N
+            I[4i - 3] = 2i - 1
+            J[4i - 3] = 2i - 1
+
+            I[4i - 2] = 2i
+            J[4i - 2] = 2i - 1
+
+            I[4i - 1] = 2i - 1
+            J[4i - 1] = 2i
+
+            I[4i] = 2i
+            J[4i] = 2i
+        end
+        sparse(I, J, V, 2N, 2N)
+    end
+
+    @inline function ensure_pattern!(G::SparseMatrixCSC{T, Int}, N) where {T}
+        s = one(T)
+        @inbounds for i in 1:N
+            G[2i - 1, 2i - 1] = s
+            G[2i, 2i - 1] = s
+            G[2i - 1, 2i] = s
+            G[2i, 2i] = s
+        end
+        return nothing
+    end
+
+    # Dense g!
+    function g!(G::StridedMatrix{T}, u, p, t) where {T}
+        c012 = T(0.12)
+        c18 = T(1.8)
+        @inbounds for i in 1:(p.N)
+            off = 2i - 1
+            G[off, off] = c012 * u[2i - 1]
+            G[off + 1, off] = c18 * u[2i]
+
+            G[off, off + 1] = c012 * u[2i - 1]
+            G[off + 1, off + 1] = c18 * u[2i]
+        end
+        return nothing
+    end
+
+    # Sparse g!
+    function g!(G::SparseMatrixCSC{T}, u, p, t) where {T}
+        c012 = T(0.12)
+        c18 = T(1.8)
+        @inbounds for i in 1:(p.N)
+            off = G.colptr[2i - 1]
+            G.nzval[off] = c012 * u[2i - 1]
+            G.nzval[off + 1] = c18 * u[2i]
+
+            off = G.colptr[2i]
+            G.nzval[off] = c012 * u[2i - 1]
+            G.nzval[off + 1] = c18 * u[2i]
+        end
+        return nothing
+    end
+
+    function make_integrator_sparse(N)
+        A = sparse_proto(N)
+        p = (; N)
+        W = SimpleWienerProcess!(0.0, zeros(2N); save_everystep = false)
+        prob = SDEProblem(
+            f!, g!, ones(2N), (0.0, 1.0), p; noise_rate_prototype = A, noise = W)
+        integ = init(prob, EulerHeun(); dt = 0.01, adaptive = false, save_on = false)
+
+        cache = integ.cache
+        allocs = AllocCheck.check_allocs(
+            StochasticDiffEq.perform_step!, (typeof(integ), typeof(cache))
+        )
+        @test isempty(allocs)
+    end
+
+    function make_integrator_dense(N)
+        A = zeros(2N, 2N)
+        p = (; N)
+        W = SimpleWienerProcess!(0.0, zeros(2N); save_everystep = false)
+        prob = SDEProblem(
+            f!, g!, ones(2N), (0.0, 1.0), p; noise_rate_prototype = A, noise = W)
+        integ = init(prob, EulerHeun(); dt = 0.01, adaptive = false, save_on = false)
+
+        cache = integ.cache
+
+        # Dense+BLAS: assert with `@allocated == 0` (not `check_allocs`).
+        # `check_allocs` flags throw-only branches in LinearAlgebra’s generic gemv!/mul!,
+        # while the BLAS hot path is allocation-free at runtime. We keep `check_allocs`
+        # for the sparse/non-diagonal tests.
+        StochasticDiffEq.perform_step!(integ, cache) # warm-up
+        @test @allocated(StochasticDiffEq.perform_step!(integ, cache)) == 0
+    end
+
+    make_integrator_dense(16)
+    make_integrator_sparse(16)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -106,6 +106,9 @@ const is_APPVEYOR = Sys.iswindows() && haskey(ENV, "APPVEYOR")
         @time @safetestset "Non-diagonal SDE Tests" begin
             include("nondiagonal_tests.jl")
         end
+        @time @safetestset "Non-diagonal EulerHeun sparse alloc" begin
+            include("nondiag_noise_eulerheun_test.jl")
+        end
         @time @safetestset "No Index Tests" begin
             include("noindex_tests.jl")
         end