Skip to content

Commit f51e43b

Browse files
authored
[SYCL] Add more aspect information for intel_gpu_* in device config file (#14188)
1 parent 633a806 commit f51e43b

File tree

6 files changed

+110
-15
lines changed

6 files changed

+110
-15
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,6 +1567,15 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
15671567
getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
15681568
<< "-device" << Target;
15691569
}
1570+
// ocloc has different names for some of the newer architectures;
1571+
// translate them to the apropriate value here.
1572+
DepInfo =
1573+
llvm::StringSwitch<StringRef>(DepInfo)
1574+
.Cases("pvc_vg", "12_61_7", "pvc_xt_c0_vg")
1575+
.Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "12_70_4", "mtl_s")
1576+
.Cases("mtl_h", "12_71_4", "mtl_p")
1577+
.Cases("arl_h", "12_74_4", "xe_lpgplus_b0")
1578+
.Default(DepInfo);
15701579
CmdArgs.push_back("-device");
15711580
CmdArgs.push_back(Args.MakeArgString(DepInfo));
15721581
}

clang/test/Driver/sycl-oneapi-gpu-intelgpu.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -123,27 +123,27 @@
123123
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_60_7 -### %s 2>&1 | \
124124
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc -DMAC_STR=PVC
125125
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc_vg -### %s 2>&1 | \
126-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_vg -DMAC_STR=PVC_VG
126+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_xt_c0_vg -DMAC_STR=PVC_VG
127127
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_61_7 -### %s 2>&1 | \
128-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_vg -DMAC_STR=PVC_VG
128+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=pvc_xt_c0_vg -DMAC_STR=PVC_VG
129129
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_mtl_u -### %s 2>&1 | \
130-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U
130+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U
131131
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_mtl_s -### %s 2>&1 | \
132-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U
132+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U
133133
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_arl_u -### %s 2>&1 | \
134-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U
134+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U
135135
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_arl_s -### %s 2>&1 | \
136-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U
136+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U
137137
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_70_4 -### %s 2>&1 | \
138-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_u -DMAC_STR=MTL_U
138+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_s -DMAC_STR=MTL_U
139139
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_mtl_h -### %s 2>&1 | \
140-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_h -DMAC_STR=MTL_H
140+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_p -DMAC_STR=MTL_H
141141
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_71_4 -### %s 2>&1 | \
142-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_h -DMAC_STR=MTL_H
142+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=mtl_p -DMAC_STR=MTL_H
143143
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_arl_h -### %s 2>&1 | \
144-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=arl_h -DMAC_STR=ARL_H
144+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=xe_lpgplus_b0 -DMAC_STR=ARL_H
145145
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_12_74_4 -### %s 2>&1 | \
146-
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=arl_h -DMAC_STR=ARL_H
146+
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=xe_lpgplus_b0 -DMAC_STR=ARL_H
147147
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bmg_g21 -### %s 2>&1 | \
148148
// RUN: FileCheck %s --check-prefixes=DEVICE,MACRO -DDEV_STR=bmg_g21 -DMAC_STR=BMG_G21
149149
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_20_1_4 -### %s 2>&1 | \

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,40 @@ def : TargetInfo<"x86_64", [], [], "", "", 1>;
160160

161161
// TODO: The aspects listed for the intel_gpu targets right now are incomplete;
162162
// only the fp16/fp64/atomic64 aspects are listed.
163-
def : TargetInfo<"intel_gpu_cfl", [AspectFp16, AspectFp64, AspectAtomic64], [8, 16, 32]>;
164-
def : TargetInfo<"intel_gpu_tgllp", [AspectFp16, AspectAtomic64], [8, 16, 32]>;
165-
def : TargetInfo<"intel_gpu_pvc", [AspectFp16, AspectFp64, AspectAtomic64], [16, 32]>;
163+
defvar Fp16Fp64Atomic64 = [AspectFp16, AspectFp64, AspectAtomic64];
164+
defvar Fp16Atomic64 = [AspectFp16, AspectAtomic64];
165+
defvar Sg8_16_32 = [8, 16, 32];
166+
defvar Sg16_32 = [16, 32];
167+
defvar IntelBaseAspects = [AspectExt_intel_esimd];
168+
class IntelTargetInfo<string Name, list<Aspect> Aspects, list<int> subGroupSizesList>
169+
: TargetInfo<Name, IntelBaseAspects # Aspects, subGroupSizesList>;
170+
// Note: only the "canonical" target names are listed here - see
171+
// SYCL::gen::resolveGenDevice().
172+
def : IntelTargetInfo<"intel_gpu_arl_h", Fp16Fp64Atomic64, Sg8_16_32>;
173+
def : IntelTargetInfo<"intel_gpu_mtl_h", Fp16Fp64Atomic64, Sg8_16_32>;
174+
def : IntelTargetInfo<"intel_gpu_mtl_u", Fp16Fp64Atomic64, Sg8_16_32>;
175+
def : IntelTargetInfo<"intel_gpu_pvc_vg", Fp16Fp64Atomic64, Sg16_32>;
176+
def : IntelTargetInfo<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>;
177+
def : IntelTargetInfo<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>;
178+
def : IntelTargetInfo<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>;
179+
def : IntelTargetInfo<"intel_gpu_acm_g10", Fp16Atomic64, Sg8_16_32>;
180+
def : IntelTargetInfo<"intel_gpu_dg1", Fp16Atomic64, Sg8_16_32>;
181+
def : IntelTargetInfo<"intel_gpu_adl_n", Fp16Atomic64, Sg8_16_32>;
182+
def : IntelTargetInfo<"intel_gpu_adl_p", Fp16Atomic64, Sg8_16_32>;
183+
def : IntelTargetInfo<"intel_gpu_adl_s", Fp16Atomic64, Sg8_16_32>;
184+
def : IntelTargetInfo<"intel_gpu_rkl", Fp16Atomic64, Sg8_16_32>;
185+
def : IntelTargetInfo<"intel_gpu_tgllp", Fp16Atomic64, Sg8_16_32>;
186+
def : IntelTargetInfo<"intel_gpu_ehl", Fp16Atomic64, Sg8_16_32>;
187+
def : IntelTargetInfo<"intel_gpu_icllp", Fp16Atomic64, Sg8_16_32>;
188+
def : IntelTargetInfo<"intel_gpu_cml", Fp16Fp64Atomic64, Sg8_16_32>;
189+
def : IntelTargetInfo<"intel_gpu_aml", Fp16Fp64Atomic64, Sg8_16_32>;
190+
def : IntelTargetInfo<"intel_gpu_whl", Fp16Fp64Atomic64, Sg8_16_32>;
191+
def : IntelTargetInfo<"intel_gpu_glk", Fp16Fp64Atomic64, Sg8_16_32>;
192+
def : IntelTargetInfo<"intel_gpu_apl", Fp16Fp64Atomic64, Sg8_16_32>;
193+
def : IntelTargetInfo<"intel_gpu_cfl", Fp16Fp64Atomic64, Sg8_16_32>;
194+
def : IntelTargetInfo<"intel_gpu_kbl", Fp16Fp64Atomic64, Sg8_16_32>;
195+
def : IntelTargetInfo<"intel_gpu_skl", Fp16Fp64Atomic64, Sg8_16_32>;
196+
def : IntelTargetInfo<"intel_gpu_bdw", Fp16Fp64Atomic64, Sg8_16_32>;
166197

167198
//
168199
// CUDA / NVPTX device aspects
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; With ESIMD, the reqd_sub_group_size of a kernel will be 1. Normally,
2+
; no device can handled compiling for this reqd_sub_group_size, but
3+
; for ESIMD, this is an exception. This test makes sure that
4+
; ESIMD kernels are not filtered out when using filtering
5+
; (e.g. -o intel_gpu_dg1,%t-dg1.table) and also ensures that
6+
; non ESIMD kernels with reqd_sub_group_size=1 are still filtered out.
7+
8+
; RUN: sycl-post-link %s -symbols -split=auto \
9+
; RUN: -o intel_gpu_dg1,%t-dg1.table
10+
11+
; RUN: FileCheck %s -input-file=%t-dg1.table -check-prefix=CHECK-TABLE
12+
; RUN: FileCheck %s -input-file=%t-dg1_esimd_0.sym -check-prefix=CHECK-SYM -implicit-check-not=reqd_sub_group_size_kernel_1
13+
14+
; CHECK-TABLE: _esimd_0.sym
15+
; CHECK-SYM: esimd_kernel
16+
17+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
18+
target triple = "spir64-unknown-unknown"
19+
20+
define spir_kernel void @esimd_kernel(ptr addrspace(1) noundef align 8 %_arg_out) #0 !sycl_explicit_simd !69 !intel_reqd_sub_group_size !68 !sycl_used_aspects !67 {
21+
entry:
22+
ret void
23+
}
24+
25+
define spir_kernel void @reqd_sub_group_size_kernel_1(ptr addrspace(1) noundef align 8 %_arg_out) #0 !intel_reqd_sub_group_size !68 {
26+
entry:
27+
ret void
28+
}
29+
30+
attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="double.cpp" "sycl-optlevel"="3" "uniform-work-group-size"="true" }
31+
32+
!llvm.module.flags = !{!0, !1}
33+
!opencl.spir.version = !{!2}
34+
!spirv.Source = !{!3}
35+
!llvm.ident = !{!64}
36+
37+
!0 = !{i32 1, !"wchar_size", i32 4}
38+
!1 = !{i32 7, !"frame-pointer", i32 2}
39+
!2 = !{i32 1, i32 2}
40+
!3 = !{i32 4, i32 100000}
41+
!9 = !{!"ext_intel_esimd", i32 53}
42+
!64 = !{!"clang version 19.0.0git (/ws/llvm/clang a7f3a637bdd6299831f903bbed9e8d069fea5c86)"}
43+
!67 = !{!9}
44+
!68 = !{i32 1}
45+
!69 = !{}
46+
!78 = !{i32 8}
47+
!79 = !{i32 16}
48+
!80 = !{i32 32}
49+
!81 = !{i32 64}

llvm/tools/sycl-post-link/sycl-post-link.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,11 @@ bool isTargetCompatibleWithModule(const std::optional<std::string> &Target,
710710
}
711711

712712
// Check if module sub group size is compatible with the target.
713-
if (ModuleReqs.SubGroupSize.has_value() &&
713+
// For ESIMD, the reqd_sub_group_size will be 1; this is not
714+
// a supported by any backend (e.g. no backend can support a kernel
715+
// with sycl::reqd_sub_group_size(1)), but for ESIMD, this is
716+
// a special case.
717+
if (!IrMD.isESIMD() && ModuleReqs.SubGroupSize.has_value() &&
714718
!is_contained(TargetInfo.subGroupSizes, *ModuleReqs.SubGroupSize))
715719
return false;
716720

sycl/include/sycl/ext/oneapi/experimental/architectures.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
// device::get_info<ext::oneapi::experimental::info::device::architecture>
1818
// - alias of architecture if this is Intel GPU architecture in format
1919
// intel_gpu_<intel_gpu_arch_version>
20+
// - supported aspects of architecture in
21+
// llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
2022
//
2123
// Important note about keeping architecture IDs below unique:
2224
// - the architecture ID must be a hex number with 16 digits

0 commit comments

Comments
 (0)