Skip to content

Commit bfdbaf4

Browse files
ratnampafengyuan14
andauthored
[Windows][PT2.6] Split larger Other Kernels lib (#1096)
torch_xpu_ops_sycl_kernels leads to around 1.83GB in size on windows, splitting it to reduce the lib size. New libs introduced in this PR: torch_xpu_ops_sycl_tensor_srcs torch_xpu_ops_sycl_norm_loss_srcs torch_xpu_ops_sycl_poly_srcs torch_xpu_ops_sycl_dist_srcs --------- Co-authored-by: Feng Yuan <[email protected]>
1 parent 0d189dd commit bfdbaf4

File tree

2 files changed

+76
-9
lines changed

2 files changed

+76
-9
lines changed

cmake/BuildFlags.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC"
122122
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "-options '${SYCL_OFFLINE_COMPILER_CG_OPTIONS}'")
123123

124124
if(WIN32)
125-
set(AOT_TARGETS "ats-m150,lnl-m,mtl-u,mtl-h")
125+
set(AOT_TARGETS "ats-m150,mtl-u,mtl-h,xe2-lpg,xe2-hpg")
126126
else()
127127
set(AOT_TARGETS "pvc,xe-lpg,ats-m150")
128128
endif()

src/BuildOnWindows.cmake

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,6 @@
33
set(TORCH_XPU_OPS_LIBRARIES)
44
set(SYCL_LINK_LIBRARIES_KEYWORD PRIVATE)
55

6-
# Walk around cyclic dependence
7-
# libtorch_xpu.so links to libtorch_xpu_ops.a
8-
# Load libtorch_xpu_ops_aten.so explicitly by torch/__init__.py:_load_dll_libraries (Break cycle)
9-
# libtorch_xpu_ops_aten.so links to libtorch_xpu_ops_sycl_unary_binary_kernels.so and libtorch_xpu_ops_sycl_kernels.so
10-
# libtorch_xpu_ops_sycl_unary_binary_kernels.so and libtorch_xpu_ops_sycl_kernels.so links to libtorch_xpu.so
116
add_library(
127
torch_xpu_ops
138
STATIC
@@ -21,7 +16,6 @@ add_library(
2116
${ATen_XPU_NATIVE_CPP_SRCS}
2217
${ATen_XPU_GEN_SRCS})
2318
install(TARGETS torch_xpu_ops_aten DESTINATION "${TORCH_INSTALL_LIB_DIR}")
24-
# target_compile_definitions(torch_xpu_ops_aten PRIVATE CAFFE2_BUILD_MAIN_LIB)
2519
target_compile_definitions(torch_xpu_ops_aten PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
2620
target_link_libraries(torch_xpu_ops_aten PUBLIC torch_xpu)
2721
target_link_libraries(torch_xpu_ops_aten PUBLIC torch_cpu)
@@ -48,8 +42,11 @@ else()
4842
set(ATen_XPU_SYCL_REDUCE_SRCS)
4943
set(ATen_XPU_SYCL_ACTIVATION_SRCS)
5044
set(ATen_XPU_SYCL_FOREACH_SRCS)
45+
set(ATen_XPU_SYCL_TENSOR_SRCS)
46+
set(ATen_XPU_SYCL_NORM_LOSS_SRCS)
47+
set(ATen_XPU_SYCL_POLY_SRCS)
48+
set(ATen_XPU_SYCL_DISTRIBUTION_SRCS)
5149
set(ATen_XPU_SYCL_OTHERS_SRCS)
52-
5350
foreach(sycl_src ${ATen_XPU_SYCL_SRCS})
5451
string(REGEX MATCH "Binary" IS_BINARY ${sycl_src})
5552
string(REGEX MATCH "Unary" IS_UNARY ${sycl_src})
@@ -63,6 +60,13 @@ else()
6360
string(REGEX MATCH "Activation" IS_ACTIVATION ${sycl_src})
6461
string(REGEX MATCH "Foreach" IS_FOREACH ${sycl_src})
6562
string(REGEX MATCH "Reduce" IS_REDUCE ${sycl_src})
63+
string(REGEX MATCH "Tensor" IS_TENSOR ${sycl_src})
64+
string(REGEX MATCH "Norm" IS_NORM ${sycl_src})
65+
string(REGEX MATCH "Loss" IS_LOSS ${sycl_src})
66+
string(REGEX MATCH "Polynomial" IS_POLY ${sycl_src})
67+
#Move resize kernel to Norm and Loss lib, to resolve symbol.
68+
string(REGEX MATCH "Resize" IS_RESIZE ${sycl_src})
69+
string(REGEX MATCH "Distribution" IS_DISTRIBUTION ${sycl_src})
6670
6771
if(NOT IS_FOREACH STREQUAL "")
6872
list(APPEND ATen_XPU_SYCL_FOREACH_SRCS ${sycl_src})
@@ -74,11 +78,18 @@ else()
7478
list(APPEND ATen_XPU_SYCL_REDUCE_SRCS ${sycl_src})
7579
elseif(NOT IS_ACTIVATION STREQUAL "")
7680
list(APPEND ATen_XPU_SYCL_ACTIVATION_SRCS ${sycl_src})
81+
elseif(NOT IS_TENSOR STREQUAL "")
82+
list(APPEND ATen_XPU_SYCL_TENSOR_SRCS ${sycl_src})
83+
elseif(NOT IS_DISTRIBUTION STREQUAL "")
84+
list(APPEND ATen_XPU_SYCL_DISTRIBUTION_SRCS ${sycl_src})
85+
elseif(NOT IS_NORM STREQUAL "" OR NOT IS_LOSS STREQUAL "" OR NOT IS_RESIZE STREQUAL "")
86+
list(APPEND ATen_XPU_SYCL_NORM_LOSS_SRCS ${sycl_src})
87+
elseif(NOT IS_POLY STREQUAL "")
88+
list(APPEND ATen_XPU_SYCL_POLY_SRCS ${sycl_src})
7789
else()
7890
list(APPEND ATen_XPU_SYCL_OTHERS_SRCS ${sycl_src})
7991
endif()
8092
endforeach()
81-
8293
# Binary kernel lib
8394
set(sycl_binary_lib torch_xpu_ops_sycl_binary_kernels)
8495
sycl_add_library(
@@ -148,7 +159,63 @@ else()
148159
149160
# Decouple with PyTorch cmake definition.
150161
install(TARGETS ${sycl_foreach_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
162+
163+
# Tensor kernel lib
164+
set(sycl_tensor_lib torch_xpu_ops_sycl_tensor_kernels)
165+
sycl_add_library(
166+
${sycl_tensor_lib}
167+
SHARED
168+
SYCL_SOURCES ${ATen_XPU_SYCL_TENSOR_SRCS})
169+
target_compile_definitions(${sycl_tensor_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
170+
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_tensor_lib})
171+
target_link_libraries(${sycl_tensor_lib} PUBLIC torch_xpu)
172+
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_tensor_lib})
151173
174+
# Decouple with PyTorch cmake definition.
175+
install(TARGETS ${sycl_tensor_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
176+
177+
# Norm and Loss kernel lib
178+
set(sycl_norm_loss_lib torch_xpu_ops_sycl_norm_loss_kernels)
179+
sycl_add_library(
180+
${sycl_norm_loss_lib}
181+
SHARED
182+
SYCL_SOURCES ${ATen_XPU_SYCL_NORM_LOSS_SRCS})
183+
target_compile_definitions(${sycl_norm_loss_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
184+
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_norm_loss_lib})
185+
target_link_libraries(${sycl_norm_loss_lib} PUBLIC torch_xpu)
186+
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_norm_loss_lib})
187+
188+
# Decouple with PyTorch cmake definition.
189+
install(TARGETS ${sycl_norm_loss_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
190+
191+
# Polynomial kernel lib
192+
set(sycl_poly_lib torch_xpu_ops_sycl_poly_kernels)
193+
sycl_add_library(
194+
${sycl_poly_lib}
195+
SHARED
196+
SYCL_SOURCES ${ATen_XPU_SYCL_POLY_SRCS})
197+
target_compile_definitions(${sycl_poly_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
198+
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_poly_lib})
199+
target_link_libraries(${sycl_poly_lib} PUBLIC torch_xpu)
200+
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_poly_lib})
201+
202+
# Decouple with PyTorch cmake definition.
203+
install(TARGETS ${sycl_poly_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
204+
205+
# Distribution kernel lib
206+
set(sycl_dist_lib torch_xpu_ops_sycl_dist_kernels)
207+
sycl_add_library(
208+
${sycl_dist_lib}
209+
SHARED
210+
SYCL_SOURCES ${ATen_XPU_SYCL_DISTRIBUTION_SRCS})
211+
target_compile_definitions(${sycl_dist_lib} PRIVATE TORCH_XPU_BUILD_MAIN_LIB)
212+
target_link_libraries(torch_xpu_ops_aten PUBLIC ${sycl_dist_lib})
213+
target_link_libraries(${sycl_dist_lib} PUBLIC torch_xpu)
214+
list(APPEND TORCH_XPU_OPS_LIBRARIES ${sycl_dist_lib})
215+
216+
# Decouple with PyTorch cmake definition.
217+
install(TARGETS ${sycl_dist_lib} DESTINATION "${TORCH_INSTALL_LIB_DIR}")
218+
152219
# Other kernel lib
153220
set(sycl_lib torch_xpu_ops_sycl_kernels)
154221
sycl_add_library(

0 commit comments

Comments
 (0)