Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ jobs:
continue-on-error: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Pin each parallel test worker to a distinct GPU tile instead of
# oversubscribing device 0 (see test/runtests.jl).
ONEAPI_TEST_SPREAD_GPUS: '1'
# Synchronize after every command-list submission to work around the
# Aurora LTS NEO dropped-tail corruption (see lib/level-zero/cmdlist.jl).
ONEAPI_SYNC_EACH_SUBMISSION: '1'
runs-on: [self-hosted, linux, X64]
strategy:
matrix:
Expand All @@ -30,5 +36,13 @@ jobs:
- uses: julia-actions/cache@v3
- uses: julia-actions/julia-buildpkg@latest
continue-on-error: true
- uses: julia-actions/julia-runtest@latest
# Disable AVX512-FP16 host codegen on the Aurora Sapphire Rapids nodes. Under concurrent
# oneMKL load the native AVX512-FP16 path silently miscomputes *host* Float16 (e.g. the
# GPUArrays `A .* B .+ c` broadcast reference), failing tests even though the GPU result
# is correct (single-process clean; MXCSR clean; only the native-FP16 path, not Float32).
# `-C native,-avx512fp16` routes Float16 through Float32 and propagates to the Pkg.test
# subprocess and its parallel workers via Base.julia_cmd(). `julia-runtest` cannot pass a
# cpu-target, so invoke Pkg.test() directly. See repro_bcast_mkl.jl.
- name: Run tests (AVX512-FP16 disabled)
continue-on-error: true
run: julia -C "native,-avx512fp16" --color=yes --project=. -e 'import Pkg; Pkg.test()'
12 changes: 6 additions & 6 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NEO_jll = "700fe977-ac61-5f37-bbc8-c6c4b2b6a9fd"
NEO_LTS_jll = "a724f90f-ce79-56dd-a1bd-b9de5a61085f"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand All @@ -26,8 +26,8 @@ SPIRV_Tools_jll = "6ac6d60f-d740-5983-97d7-a4482c0689f4"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d"
oneAPI_Level_Zero_Loader_jll = "13eca655-d68d-5b81-8367-6d99d727ab01"
oneAPI_Level_Zero_Headers_LTS_jll = "d79c0b2e-896c-561b-aab9-323701ec0314"
oneAPI_Level_Zero_Loader_LTS_jll = "f6e5cbb4-ba2a-56dc-92a2-9d66f5656ccd"
oneAPI_Support_jll = "b049733a-a71d-5ed3-8eba-7d323ac00b36"

[compat]
Expand All @@ -41,16 +41,16 @@ GPUCompiler = "1.6"
GPUToolbox = "0.1, 0.2, 0.3, 1"
KernelAbstractions = "0.9.39"
LLVM = "6, 7, 8, 9"
NEO_jll = "=25.44.36015"
NEO_LTS_jll = "=25.18.33578"
Preferences = "1"
SPIRVIntrinsics = "0.5"
SPIRV_LLVM_Translator_jll = "21"
SPIRV_Tools_jll = "2025.4.0"
SpecialFunctions = "1.3, 2"
StaticArrays = "1"
julia = "1.10"
oneAPI_Level_Zero_Loader_jll = "1.25"
oneAPI_Level_Zero_Loader_LTS_jll = "=1.24"
oneAPI_Support_jll = "0.9.2"

[extras]
libigc_jll = "94295238-5935-5bd7-bb0f-b00942e9bdd5"
libigc_LTS_jll = "9a8258a1-e827-5686-bee9-144461246960"
6 changes: 3 additions & 3 deletions deps/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ Ninja_jll = "76642167-d241-5cee-8c94-7a494e8cb7b7"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Scratch = "6c6a2e73-6563-6170-7368-637461726353"
oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d"
oneAPI_Support_Headers_jll = "24f86df5-245d-5634-a4cc-32433d9800b3"
oneAPI_Level_Zero_Headers_LTS_jll = "d79c0b2e-896c-561b-aab9-323701ec0314"
oneAPI_Support_Headers_LTS_jll = "0e9de0da-c0b6-5d6c-9871-5c996d414ca7"

[compat]
oneAPI_Support_Headers_jll = "=2025.2.0"
oneAPI_Support_Headers_LTS_jll = "=2025.3.1"
5 changes: 3 additions & 2 deletions deps/build_local.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ if haskey(ENV, "BUILDKITE")
run(`buildkite-agent annotate 'Using a locally-built support library; A bump of oneAPI_Support_jll is required before releasing this packages.' --style 'warning' --context 'ctx-deps'`)
end

using Scratch, Preferences, CMake_jll, Ninja_jll, oneAPI_Level_Zero_Headers_jll
using Scratch, Preferences, CMake_jll, Ninja_jll
import oneAPI_Level_Zero_Headers_LTS_jll as oneAPI_Level_Zero_Headers_jll

oneAPI = Base.UUID("8f75cd03-7ff8-4ecb-9b8f-daf728133b1b")

Expand Down Expand Up @@ -62,7 +63,7 @@ if !isfile(joinpath(conda_dir, "condarc-julia.yml"))
touch(joinpath(conda_dir, "conda-meta", "history"))
end
Conda.add_channel("https://software.repos.intel.com/python/conda/", conda_dir)
Conda.add(["dpcpp_linux-64=2025.2.0", "mkl-devel-dpcpp=2025.2.0"], conda_dir)
Conda.add(["dpcpp_linux-64=2025.3.1", "mkl-devel-dpcpp=2025.3.1"], conda_dir)

Conda.list(conda_dir)

Expand Down
26 changes: 24 additions & 2 deletions deps/generate_interfaces.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using oneAPI_Support_Headers_jll
import oneAPI_Support_Headers_LTS_jll as oneAPI_Support_Headers_jll

include("generate_helpers.jl")

Expand Down Expand Up @@ -337,12 +337,34 @@ function generate_headers(library::String, filename::Vector{String}, output::Str
end
end

# Dedup: when two signatures map to the same C function name (because MKL
# added an overload), keep the one with more parameters — typically the
# newer signature (e.g. set_csr_data gained an `nnz` arg in MKL 2025.3.1).
# Without this the generated onemkl.cpp has duplicate function definitions
# and won't compile.
_fn_name(h) = (pos = findfirst('(', h); strip(split(strip(h[1:pos-1]))[end]))
_param_cnt(h) = (pos = findfirst('(', h); ep = findnext(')', h, pos); count(==(','), h[pos+1:ep-1]) + 1)
keep_idx = Dict{String,Int}()
keep_pc = Dict{String,Int}()
for (i, sig) in enumerate(signatures)
(sig[2] in blacklist) && continue
fn = _fn_name(sig[1])
pc = _param_cnt(sig[1])
if !haskey(keep_idx, fn) || pc > keep_pc[fn]
keep_idx[fn] = i
keep_pc[fn] = pc
end
end
keep_set = Set(values(keep_idx))

path_oneapi_headers = joinpath(@__DIR__, output)
oneapi_headers = open(path_oneapi_headers, "w")

for (header, name_routine, version, type_routine, template) in signatures
for (i, (header, name_routine, version, type_routine, template)) in enumerate(signatures)
# Blacklist
(name_routine in blacklist) && continue
# Dedup
(i in keep_set) || continue

# Pass scalars (e.g. alpha/beta inputs) as references instead of values
for type in ("short", "float", "double", "float _Complex", "double _Complex")
Expand Down
Loading