Skip to content

Commit d08c20e

Browse files
authored
[SYCL] Fix the sub group size of Intel (ggml-org#8106)
* use warp_size macro for all sycl kernels * fix mask of permute_sub_group_by_xor * fix rms_norm with correct warp number * fix rms_norm_f32/group_norm_f32 * move norm to norm.cpp file * fix quantize bug * fix mmvq's batch size
1 parent 5fac350 commit d08c20e

File tree

9 files changed

+587
-509
lines changed

9 files changed

+587
-509
lines changed

ggml/src/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,11 @@ if (GGML_SYCL)
486486
add_compile_options(-I./) #include DPCT
487487

488488
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
489-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
490489
if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
491490
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
491+
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
492+
else()
493+
add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
492494
endif()
493495

494496
file(GLOB GGML_HEADERS_SYCL "ggml-sycl/*.hpp")

0 commit comments

Comments
 (0)