-
Notifications
You must be signed in to change notification settings - Fork 787
[SYCL] Add more aspect information for intel_gpu_* in device config file #14188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
4257d8c
f2fb7c6
435b7bd
f3a52b3
0b1adb1
e08a6f5
0684382
18ea9a9
fd966aa
39c2720
ae3534b
579be18
2f3306a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -160,9 +160,40 @@ def : TargetInfo<"x86_64", [], [], "", "", 1>; | |
|
||
// TODO: The aspects listed for the intel_gpu targets right now are incomplete; | ||
// only the fp16/fp64/atomic64 aspects are listed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. May be update comment to include SG aspects? Also, is the list of GPUs complete? Thanks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree about SG sizes comment update.
Likely no, we have almost 40 lines describing different Intel GPU architectures in #13976, not counting aliases I think that we need some kind of an integration test here between clang driver, SYCL headers and device config file. All those places list known targets and we want them to be in sync. However, I wouldn't block this commit by lack of such test, but that is something we need to have going forward. The fact that this file is expanded incrementally is expected, I think |
||
def : TargetInfo<"intel_gpu_cfl", [AspectFp16, AspectFp64, AspectAtomic64], [8, 16, 32]>; | ||
def : TargetInfo<"intel_gpu_tgllp", [AspectFp16, AspectAtomic64], [8, 16, 32]>; | ||
def : TargetInfo<"intel_gpu_pvc", [AspectFp16, AspectFp64, AspectAtomic64], [16, 32]>; | ||
defvar Fp16Fp64Atomic64 = [AspectFp16, AspectFp64, AspectAtomic64]; | ||
defvar Fp16Atomic64 = [AspectFp16, AspectAtomic64]; | ||
defvar Sg8_16_32 = [8, 16, 32]; | ||
defvar Sg16_32 = [16, 32]; | ||
defvar IntelBaseAspects = [AspectExt_intel_esimd]; | ||
class IntelTargetInfo<string Name, list<Aspect> Aspects, list<int> subGroupSizesList> | ||
: TargetInfo<Name, IntelBaseAspects # Aspects, subGroupSizesList>; | ||
// Note: only the "canonical" target names are listed here - see | ||
// SYCL::gen::resolveGenDevice(). | ||
def : IntelTargetInfo<"intel_gpu_arl_h", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_mtl_h", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_mtl_u", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_pvc_vg", Fp16Fp64Atomic64, Sg16_32>; | ||
def : IntelTargetInfo<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>; | ||
def : IntelTargetInfo<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_acm_g10", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_dg1", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_adl_n", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_adl_p", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_adl_s", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_rkl", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_tgllp", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_ehl", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_icllp", Fp16Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_cml", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_aml", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_whl", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_glk", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_apl", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_cfl", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_kbl", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_skl", Fp16Fp64Atomic64, Sg8_16_32>; | ||
def : IntelTargetInfo<"intel_gpu_bdw", Fp16Fp64Atomic64, Sg8_16_32>; | ||
|
||
// | ||
// CUDA / NVPTX device aspects | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
; With ESIMD, the reqd_sub_group_size of a kernel will be 1. Normally, | ||
; no device can handled compiling for this reqd_sub_group_size, but | ||
; for ESIMD, this is an exception. This test makes sure that | ||
; ESIMD kernels are not filtered out when using filtering | ||
; (e.g. -o intel_gpu_dg1,%t-dg1.table) and also ensures that | ||
; non ESIMD kernels with reqd_sub_group_size=1 are still filtered out. | ||
|
||
; RUN: sycl-post-link %s -symbols -split=auto \ | ||
; RUN: -o intel_gpu_dg1,%t-dg1.table | ||
|
||
; RUN: FileCheck %s -input-file=%t-dg1.table -check-prefix=CHECK-TABLE | ||
; RUN: FileCheck %s -input-file=%t-dg1_esimd_0.sym -check-prefix=CHECK-SYM -implicit-check-not=reqd_sub_group_size_kernel_1 | ||
|
||
; CHECK-TABLE: _esimd_0.sym | ||
; CHECK-SYM: esimd_kernel | ||
|
||
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
target triple = "spir64-unknown-unknown" | ||
|
||
define spir_kernel void @esimd_kernel(ptr addrspace(1) noundef align 8 %_arg_out) #0 !sycl_explicit_simd !69 !intel_reqd_sub_group_size !68 !sycl_used_aspects !67 { | ||
entry: | ||
ret void | ||
} | ||
|
||
define spir_kernel void @reqd_sub_group_size_kernel_1(ptr addrspace(1) noundef align 8 %_arg_out) #0 !intel_reqd_sub_group_size !68 { | ||
entry: | ||
ret void | ||
} | ||
|
||
attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="double.cpp" "sycl-optlevel"="3" "uniform-work-group-size"="true" } | ||
|
||
!llvm.module.flags = !{!0, !1} | ||
!opencl.spir.version = !{!2} | ||
!spirv.Source = !{!3} | ||
!llvm.ident = !{!64} | ||
|
||
!0 = !{i32 1, !"wchar_size", i32 4} | ||
!1 = !{i32 7, !"frame-pointer", i32 2} | ||
!2 = !{i32 1, i32 2} | ||
!3 = !{i32 4, i32 100000} | ||
!9 = !{!"ext_intel_esimd", i32 53} | ||
!64 = !{!"clang version 19.0.0git (/ws/llvm/clang a7f3a637bdd6299831f903bbed9e8d069fea5c86)"} | ||
!67 = !{!9} | ||
!68 = !{i32 1} | ||
!69 = !{} | ||
!78 = !{i32 8} | ||
!79 = !{i32 16} | ||
!80 = !{i32 32} | ||
!81 = !{i32 64} |
Uh oh!
There was an error while loading. Please reload this page.