@@ -195,6 +195,7 @@ function henon(dz,z,p,t)
195195 dq₂ = p₂
196196
197197 dz .= [dp₁, dp₂, dq₁, dq₂]
198+ return nothing
198199end
199200
200201u₀ = [0.1, 0.0, 0.0, 0.5]
@@ -240,7 +241,7 @@ function generate_ics(E,n)
240241 # figures in the Henon-Heiles 1964 article
241242 qrange = range(-0.4, stop = 1.0, length = n)
242243 prange = range(-0.5, stop = 0.5, length = n)
243- z0 = Vector{Vector{Float64 }}()
244+ z0 = Vector{Vector{typeof(E) }}()
244245 for q in qrange
245246 V = H([0,0],[0,q],nothing)
246247 V ≥ E && continue
@@ -269,11 +270,28 @@ plot(sim, vars=(3,4), tspan=(0,10))
269270
270271## Part 4: Parallelized GPU Ensemble Solving
271272
273+ In order to use GPU parallelization we must make all inputs
274+ (initial conditions, tspan, etc.) `Float32` and the function
275+ definition should be in the in-place form, avoid bound checking and
276+ return `nothing`.
277+
272278```julia
273279using DiffEqGPU
274280
275- z0 = generate_ics(0.125, 100)
276- sim = solve(ensprob, Vern9(), EnsembleGPUArray(), trajectories=length(z0))
281+ function henon_gpu(dz,z,p,t)
282+ @inbounds begin
283+ dz[1] = -z[3]*(1 + 2z[4])
284+ dz[2] = -z[4]-(z[3]^2 - z[4]^2)
285+ dz[3] = z[1]
286+ dz[4] = z[2]
287+ end
288+ return nothing
289+ end
290+
291+ z0 = generate_ics(0.125f0, 50)
292+ prob_gpu = ODEProblem(henon_gpu, Float32.(u₀), (0.f0, 1000.f0))
293+ ensprob = EnsembleProblem(prob_gpu, prob_func=prob_func)
294+ sim = solve(ensprob, Tsit5(), EnsembleGPUArray(), trajectories=length(z0))
277295```
278296# Problem 6: Training Neural Stochastic Differential Equations with GPU acceleration (I)
279297
0 commit comments