Skip to content

Commit d65855e

Browse files
committed
[slimtensor] integration into backend
Pull Request resolved: #16565 ghstack-source-id: 335418194 @exported-using-ghexport Differential Revision: [D90606409](https://our.internmc.facebook.com/intern/diff/D90606409/)
1 parent 7cc86fb commit d65855e

39 files changed

Lines changed: 3356 additions & 9368 deletions

backends/aoti/CMakeLists.txt

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,37 +25,57 @@ endif()
2525
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
2626
find_package_torch()
2727

28-
# Common AOTI functionality - combines all AOTI common components
29-
set(_aoti_common_sources common_shims.cpp)
30-
add_library(aoti_common STATIC ${_aoti_common_sources})
28+
# ==============================================================================
29+
# AOTI common shims using ETensor (for Metal backend)
30+
# TODO(gasoonjia): Remove this after metal migration
31+
# ==============================================================================
32+
add_library(aoti_common_shims STATIC common_shims.cpp)
3133
target_include_directories(
32-
aoti_common
34+
aoti_common_shims
3335
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}> $<INSTALL_INTERFACE:include>
3436
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
3537
)
3638
target_compile_options(
37-
aoti_common
39+
aoti_common_shims
3840
PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/EHsc /GR>
3941
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-fexceptions -frtti -fPIC>
4042
)
4143
target_compile_definitions(
42-
aoti_common PRIVATE $<$<PLATFORM_ID:Windows>:EXPORT_AOTI_FUNCTIONS>
43-
)
44-
# Ensure symbols are exported properly
45-
if(APPLE)
46-
target_link_options(aoti_common PUBLIC -Wl,-export_dynamic)
47-
else()
48-
target_link_options(
49-
aoti_common PUBLIC $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wl,--export-dynamic>
50-
)
51-
endif()
44+
aoti_common_shims PUBLIC $<$<PLATFORM_ID:Windows>:EXPORT_AOTI_FUNCTIONS>
45+
)
46+
target_link_libraries(aoti_common_shims PUBLIC extension_tensor ${CMAKE_DL_LIBS})
5247

53-
# Link against ExecuTorch libraries and standard libraries
54-
target_link_libraries(aoti_common PUBLIC extension_tensor ${CMAKE_DL_LIBS})
55-
executorch_target_link_options_shared_lib(aoti_common)
48+
install(
49+
TARGETS aoti_common_shims
50+
EXPORT ExecuTorchTargets
51+
DESTINATION ${CMAKE_INSTALL_LIBDIR}
52+
)
53+
54+
# ==============================================================================
55+
# AOTI common shims using SlimTensor (for CUDA backend)
56+
# Uses SlimTensor for all tensor operations
57+
# TODO(gasoonjia): Replace aoti_common_shims with this one after metal migration
58+
# ==============================================================================
59+
add_library(aoti_common_shims_slim STATIC common_shims_slim.cpp)
60+
target_include_directories(
61+
aoti_common_shims_slim
62+
PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}> $<INSTALL_INTERFACE:include>
63+
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/..>
64+
)
65+
target_compile_options(
66+
aoti_common_shims_slim
67+
PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/EHsc /GR>
68+
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-fexceptions -frtti -fPIC>
69+
)
70+
target_compile_definitions(
71+
aoti_common_shims_slim PUBLIC $<$<PLATFORM_ID:Windows>:EXPORT_AOTI_FUNCTIONS>
72+
)
73+
target_link_libraries(
74+
aoti_common_shims_slim PUBLIC slimtensor extension_tensor ${CMAKE_DL_LIBS}
75+
)
5676

5777
install(
58-
TARGETS aoti_common
78+
TARGETS aoti_common_shims_slim
5979
EXPORT ExecuTorchTargets
6080
DESTINATION ${CMAKE_INSTALL_LIBDIR}
6181
)

backends/aoti/targets.bzl

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ def define_common_targets():
3333
],
3434
)
3535

36-
# AOTI common shims functionality
36+
# AOTI common shims functionality using ETensor
37+
# TODO(gasoonjia): Remove this after metal migration
3738
runtime.cxx_library(
3839
name = "common_shims",
3940
srcs = [
@@ -89,6 +90,7 @@ def define_common_targets():
8990

9091
# SlimTensor-based common shims library
9192
# Uses SlimTensor for all tensor operations
93+
# TODO(gasoonjia): Replace common_shims with this one after metal migration
9294
runtime.cxx_library(
9395
name = "common_shims_slim",
9496
srcs = [
@@ -97,10 +99,27 @@ def define_common_targets():
9799
headers = [
98100
"common_shims_slim.h",
99101
"export.h",
102+
"utils.h",
100103
],
101104
visibility = ["@EXECUTORCH_CLIENTS"],
102105
exported_deps = [
103106
"//executorch/runtime/core:core",
107+
"//executorch/runtime/core/exec_aten:lib",
104108
"//executorch/backends/aoti/slim/core:slimtensor",
105109
],
106110
)
111+
112+
# Common AOTI functionality for SlimTensor-based backends (combining common_shims_slim and delegate_handle)
113+
# All CUDA backend code should depend on this target
114+
# TODO(gasoonjia): Replace aoti_common with this one after metal migration
115+
runtime.cxx_library(
116+
name = "aoti_common_slim",
117+
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
118+
link_whole = True,
119+
supports_python_dlopen = True,
120+
visibility = ["PUBLIC"],
121+
exported_deps = [
122+
":common_shims_slim",
123+
":delegate_handle",
124+
],
125+
)

backends/cuda/CMakeLists.txt

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,18 @@ install(
9999

100100
# CUDA-specific AOTI shim symbols (dynamically linked) Uses
101101
# common_shims_slim.cpp for SlimTensor-based shim implementations
102-
set(_aoti_cuda_shim_sources
103-
runtime/shims/memory.cpp runtime/guard.cpp runtime/shims/cuda_guard.cpp
104-
runtime/shims/int4mm.cu ${EXECUTORCH_ROOT}/backends/aoti/common_shims.cpp
105-
${EXECUTORCH_ROOT}/backends/aoti/common_shims_slim.cpp
106-
)
102+
set(_aoti_cuda_shim_sources
103+
runtime/shims/memory.cpp runtime/shims/cuda_guard.cpp
104+
runtime/shims/int4mm.cu
105+
${EXECUTORCH_ROOT}/backends/aoti/common_shims_slim.cpp
106+
${EXECUTORCH_ROOT}/backends/aoti/slim/cuda/guard.cpp
107+
)
107108

108109
add_library(aoti_cuda_shims SHARED ${_aoti_cuda_shim_sources})
109110

111+
# Define CUDA_AVAILABLE to use SlimTensor on GPU in common_shims_slim.h
112+
target_compile_definitions(aoti_cuda_shims PRIVATE CUDA_AVAILABLE=1)
113+
110114
# Define export macros for shared library
111115
if(MSVC)
112116
target_compile_definitions(aoti_cuda_shims PRIVATE EXPORT_AOTI_FUNCTIONS)

backends/cuda/runtime/TARGETS

Lines changed: 12 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,6 @@ load("//tools/build/buck:nvcc_flags.bzl", "get_nvcc_arch_args")
33

44
oncall("executorch")
55

6-
runtime.cxx_library(
7-
name = "guard",
8-
srcs = [
9-
"guard.cpp",
10-
],
11-
headers = [
12-
"guard.h",
13-
"utils.h",
14-
],
15-
visibility = ["PUBLIC"],
16-
deps = [
17-
"//executorch/runtime/platform:platform",
18-
],
19-
exported_deps = [
20-
"//executorch/runtime/core:core",
21-
"//executorch/runtime/core/exec_aten:lib",
22-
],
23-
external_deps = [
24-
("cuda", None, "cuda-lazy"),
25-
],
26-
)
27-
286
runtime.cxx_library(
297
name = "cuda_platform",
308
srcs = [
@@ -71,14 +49,12 @@ runtime.cxx_library(
7149
runtime.cxx_library(
7250
name = "runtime_shims",
7351
srcs = [
74-
"guard.cpp",
7552
"shims/cuda_guard.cpp",
7653
"shims/int4mm.cu",
7754
"shims/memory.cpp",
7855
"shims/tensor_attribute.cpp",
7956
],
8057
headers = [
81-
"guard.h",
8258
"shims/cuda_guard.h",
8359
"shims/int4mm.cuh",
8460
"shims/int4mm.h",
@@ -91,43 +67,18 @@ runtime.cxx_library(
9167
supports_python_dlopen = True,
9268
# Constructor needed for backend registration.
9369
compiler_flags = ["-Wno-global-constructors"],
70+
preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
9471
visibility = ["PUBLIC"],
9572
deps = [
9673
":tensor_maker",
97-
"//executorch/backends/aoti:common_shims",
98-
"//executorch/runtime/core:core",
99-
"//executorch/runtime/core/exec_aten:lib",
100-
"//executorch/runtime/platform:platform",
101-
"//executorch/backends/cuda/runtime:cuda_platform",
102-
],
103-
nvcc_flags = get_nvcc_arch_args() + [
104-
"-_NVCC_HOST_COMPILER_FLAG_",
105-
"gcc",
106-
],
107-
external_deps = [
108-
("cuda", None, "cuda-lazy"),
109-
],
110-
)
111-
112-
runtime.cxx_library(
113-
name = "runtime_shims_slim",
114-
srcs = [
115-
"shims/memory_slim.cpp",
116-
],
117-
headers = [
118-
"shims/memory_slim.h",
119-
],
120-
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
121-
link_whole = True,
122-
supports_python_dlopen = True,
123-
visibility = ["@EXECUTORCH_CLIENTS"],
124-
preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
125-
deps = [
74+
"//executorch/backends/aoti:aoti_common_slim",
12675
"//executorch/backends/aoti/slim/core:slimtensor",
12776
"//executorch/backends/aoti/slim/factory:empty",
12877
"//executorch/backends/aoti/slim/factory:from_blob",
129-
"//executorch/backends/aoti:common_shims",
78+
"//executorch/backends/aoti/slim/cuda:guard",
13079
"//executorch/runtime/core:core",
80+
"//executorch/runtime/core/exec_aten:lib",
81+
"//executorch/runtime/core/exec_aten/util:tensor_util",
13182
"//executorch/runtime/platform:platform",
13283
],
13384
nvcc_flags = get_nvcc_arch_args() + [
@@ -149,10 +100,16 @@ runtime.cxx_library(
149100
supports_python_dlopen = True,
150101
# Constructor needed for backend registration.
151102
compiler_flags = ["-Wno-global-constructors"],
103+
preprocessor_flags = ["-DCUDA_AVAILABLE=1"],
152104
visibility = ["PUBLIC"],
153105
deps = [
154106
":runtime_shims",
155-
"//executorch/backends/aoti:aoti_common",
107+
"//executorch/backends/aoti:aoti_common_slim",
108+
"//executorch/backends/aoti/slim/core:slimtensor",
109+
"//executorch/backends/aoti/slim/factory:empty",
110+
"//executorch/backends/aoti/slim/factory:from_blob",
111+
"//executorch/backends/aoti/slim/factory:from_etensor",
112+
"//executorch/extension/tensor:tensor",
156113
"//executorch/runtime/backend:interface",
157114
"//executorch/runtime/core/exec_aten/util:tensor_util",
158115
],

0 commit comments

Comments
 (0)