Skip to content

Commit f90d49d

Browse files
Merge pull request #630 from phjmsycc/master
Fix: Eliminate per-step allocations in EulerHeun with sparse non-diagonal noise by avoiding temporary `gtmp1 + gtmp2`
2 parents 89d9c5c + 22ad48d commit f90d49d

File tree

4 files changed

+119
-4
lines changed

4 files changed

+119
-4
lines changed

Project.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ UnPack = "0.1, 1.0"
6767
julia = "1.10"
6868

6969
[extras]
70+
AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
7071
DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
7172
DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d"
7273
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
@@ -80,4 +81,4 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
8081
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
8182

8283
[targets]
83-
test = ["DiffEqCallbacks", "DiffEqDevTools", "SDEProblemLibrary", "LinearSolve", "ModelingToolkit", "Pkg", "SafeTestsets", "SparseArrays", "Statistics", "OrdinaryDiffEq", "Test"]
84+
test = ["AllocCheck", "DiffEqCallbacks", "DiffEqDevTools", "SDEProblemLibrary", "LinearSolve", "ModelingToolkit", "Pkg", "SafeTestsets", "SparseArrays", "Statistics", "OrdinaryDiffEq", "Test"]

src/perform_step/low_order.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,13 @@ end
9898
integrator.g(gtmp2, tmp, p, t+dt)
9999

100100
if is_diagonal_noise(integrator.sol.prob)
101-
@.. nrtmp=(1/2)*W.dW*(gtmp1+gtmp2)
101+
@.. nrtmp = W.dW * (gtmp1 + gtmp2) / 2
102102
else
103-
@.. gtmp1 = (1/2)*(gtmp1+gtmp2)
104-
mul!(nrtmp, gtmp1, W.dW)
103+
# nrtmp already contains gtmp1 * W.dW from stage 1.
104+
# By linearity: 0.5*(gtmp1+gtmp2)*W.dW == 0.5*(gtmp2*W.dW) + 0.5*(gtmp1*W.dW).
105+
# Avoid forming (gtmp1 + gtmp2), which would allocate a temporary SparseMatrixCSC.
106+
# Use 5-arg mul! to accumulate directly into the cached vector (allocation-free).
107+
mul!(nrtmp, gtmp2, W.dW, convert(eltype(nrtmp), 0.5), convert(eltype(nrtmp), 0.5))
105108
end
106109

107110
dto2 = dt / 2
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
using StochasticDiffEq, DiffEqNoiseProcess, SparseArrays, LinearAlgebra,
2+
AllocCheck
3+
using DiffEqBase: @..
4+
5+
@testset "EulerHeun sparse noise: no per-step alloc" begin
6+
7+
# Simple linear drift
8+
f!(du, u, p, t) = (@.. du = 0.999 * u)
9+
10+
# 2×2 identical-column block structure; g! only writes nzval of an existing sparsity pattern
11+
function sparse_proto(N)
12+
I = Vector{Int}(undef, 4N)
13+
J = similar(I)
14+
V = ones(Float64, 4N)
15+
@inbounds for i in 1:N
16+
I[4i - 3] = 2i - 1
17+
J[4i - 3] = 2i - 1
18+
19+
I[4i - 2] = 2i
20+
J[4i - 2] = 2i - 1
21+
22+
I[4i - 1] = 2i - 1
23+
J[4i - 1] = 2i
24+
25+
I[4i] = 2i
26+
J[4i] = 2i
27+
end
28+
sparse(I, J, V, 2N, 2N)
29+
end
30+
31+
@inline function ensure_pattern!(G::SparseMatrixCSC{T, Int}, N) where {T}
32+
s = one(T)
33+
@inbounds for i in 1:N
34+
G[2i - 1, 2i - 1] = s
35+
G[2i, 2i - 1] = s
36+
G[2i - 1, 2i] = s
37+
G[2i, 2i] = s
38+
end
39+
return nothing
40+
end
41+
42+
# Dense g!
43+
function g!(G::StridedMatrix{T}, u, p, t) where {T}
44+
c012 = T(0.12)
45+
c18 = T(1.8)
46+
@inbounds for i in 1:(p.N)
47+
off = 2i - 1
48+
G[off, off] = c012 * u[2i - 1]
49+
G[off + 1, off] = c18 * u[2i]
50+
51+
G[off, off + 1] = c012 * u[2i - 1]
52+
G[off + 1, off + 1] = c18 * u[2i]
53+
end
54+
return nothing
55+
end
56+
57+
# Sparse g!
58+
function g!(G::SparseMatrixCSC{T}, u, p, t) where {T}
59+
c012 = T(0.12)
60+
c18 = T(1.8)
61+
@inbounds for i in 1:(p.N)
62+
off = G.colptr[2i - 1]
63+
G.nzval[off] = c012 * u[2i - 1]
64+
G.nzval[off + 1] = c18 * u[2i]
65+
66+
off = G.colptr[2i]
67+
G.nzval[off] = c012 * u[2i - 1]
68+
G.nzval[off + 1] = c18 * u[2i]
69+
end
70+
return nothing
71+
end
72+
73+
function make_integrator_sparse(N)
74+
A = sparse_proto(N)
75+
p = (; N)
76+
W = SimpleWienerProcess!(0.0, zeros(2N); save_everystep = false)
77+
prob = SDEProblem(
78+
f!, g!, ones(2N), (0.0, 1.0), p; noise_rate_prototype = A, noise = W)
79+
integ = init(prob, EulerHeun(); dt = 0.01, adaptive = false, save_on = false)
80+
81+
cache = integ.cache
82+
allocs = AllocCheck.check_allocs(
83+
StochasticDiffEq.perform_step!, (typeof(integ), typeof(cache))
84+
)
85+
@test isempty(allocs)
86+
end
87+
88+
function make_integrator_dense(N)
89+
A = zeros(2N, 2N)
90+
p = (; N)
91+
W = SimpleWienerProcess!(0.0, zeros(2N); save_everystep = false)
92+
prob = SDEProblem(
93+
f!, g!, ones(2N), (0.0, 1.0), p; noise_rate_prototype = A, noise = W)
94+
integ = init(prob, EulerHeun(); dt = 0.01, adaptive = false, save_on = false)
95+
96+
cache = integ.cache
97+
98+
# Dense+BLAS: assert with `@allocated == 0` (not `check_allocs`).
99+
# `check_allocs` flags throw-only branches in LinearAlgebra’s generic gemv!/mul!,
100+
# while the BLAS hot path is allocation-free at runtime. We keep `check_allocs`
101+
# for the sparse/non-diagonal tests.
102+
StochasticDiffEq.perform_step!(integ, cache) # warm-up
103+
@test @allocated(StochasticDiffEq.perform_step!(integ, cache)) == 0
104+
end
105+
106+
make_integrator_dense(16)
107+
make_integrator_sparse(16)
108+
end

test/runtests.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ const is_APPVEYOR = Sys.iswindows() && haskey(ENV, "APPVEYOR")
106106
@time @safetestset "Non-diagonal SDE Tests" begin
107107
include("nondiagonal_tests.jl")
108108
end
109+
@time @safetestset "Non-diagonal EulerHeun sparse alloc" begin
110+
include("nondiag_noise_eulerheun_test.jl")
111+
end
109112
@time @safetestset "No Index Tests" begin
110113
include("noindex_tests.jl")
111114
end

0 commit comments

Comments
 (0)