intel · AlexeySachkov · Jul 11, 2024 · Jun 14, 2024 · Jun 20, 2024 · Jun 26, 2024
@@ -1553,6 +1553,15 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
         getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
             << "-device" << Target;
       }
+      // ocloc has different names for some of the newer architectures;
+      // translate them to the apropriate value here.
+      DepInfo =
+          llvm::StringSwitch<StringRef>(DepInfo)
+              .Cases("pvc_vg", "12_61_7", "pvc_xt_c0_vg")
+              .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "12_70_4", "mtl_s")
+              .Cases("mtl_h", "12_71_4", "mtl_p")
+              .Cases("arl_h", "12_74_4", "xe_lpgplus_b0")
+              .Default(DepInfo);
       CmdArgs.push_back("-device");
       CmdArgs.push_back(Args.MakeArgString(DepInfo));
     }

@@ -160,9 +160,40 @@ def : TargetInfo<"x86_64", [], [], "", "", 1>;
 
 // TODO: The aspects listed for the intel_gpu targets right now are incomplete; 
 // only the fp16/fp64/atomic64 aspects are listed.
-def : TargetInfo<"intel_gpu_cfl", [AspectFp16, AspectFp64, AspectAtomic64], [8, 16, 32]>;
-def : TargetInfo<"intel_gpu_tgllp", [AspectFp16, AspectAtomic64], [8, 16, 32]>;
-def : TargetInfo<"intel_gpu_pvc", [AspectFp16, AspectFp64, AspectAtomic64], [16, 32]>;
+defvar Fp16Fp64Atomic64 = [AspectFp16, AspectFp64, AspectAtomic64];
+defvar Fp16Atomic64 = [AspectFp16, AspectAtomic64];
+defvar Sg8_16_32 = [8, 16, 32];
+defvar Sg16_32 = [16, 32];
+defvar IntelBaseAspects = [AspectExt_intel_esimd];
+class IntelTargetInfo<string Name, list<Aspect> Aspects, list<int> subGroupSizesList>
+: TargetInfo<Name, IntelBaseAspects # Aspects, subGroupSizesList>;
+// Note: only the "canonical" target names are listed here - see
+// SYCL::gen::resolveGenDevice().
+def : IntelTargetInfo<"intel_gpu_arl_h", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_mtl_h", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_mtl_u", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_pvc_vg", Fp16Fp64Atomic64, Sg16_32>;
+def : IntelTargetInfo<"intel_gpu_pvc", Fp16Fp64Atomic64, Sg16_32>;
+def : IntelTargetInfo<"intel_gpu_acm_g12", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_acm_g11", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_acm_g10", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_dg1", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_adl_n", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_adl_p", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_adl_s", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_rkl", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_tgllp", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_ehl", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_icllp", Fp16Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_cml", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_aml", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_whl", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_glk", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_apl", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_cfl", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_kbl", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_skl", Fp16Fp64Atomic64, Sg8_16_32>;
+def : IntelTargetInfo<"intel_gpu_bdw", Fp16Fp64Atomic64, Sg8_16_32>;
 
 //
 // CUDA / NVPTX device aspects

@@ -0,0 +1,49 @@
+; With ESIMD, the reqd_sub_group_size of a kernel will be 1. Normally,
+; no device can handled compiling for this reqd_sub_group_size, but 
+; for ESIMD, this is an exception. This test makes sure that 
+; ESIMD kernels are not filtered out when using filtering 
+; (e.g. -o intel_gpu_dg1,%t-dg1.table) and also ensures that 
+; non ESIMD kernels with reqd_sub_group_size=1 are still filtered out.
+
+; RUN: sycl-post-link %s -symbols -split=auto \
+; RUN: -o intel_gpu_dg1,%t-dg1.table
+
+; RUN: FileCheck %s -input-file=%t-dg1.table -check-prefix=CHECK-TABLE
+; RUN: FileCheck %s -input-file=%t-dg1_esimd_0.sym -check-prefix=CHECK-SYM -implicit-check-not=reqd_sub_group_size_kernel_1
+
+; CHECK-TABLE: _esimd_0.sym
+; CHECK-SYM: esimd_kernel
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+define spir_kernel void @esimd_kernel(ptr addrspace(1) noundef align 8 %_arg_out) #0 !sycl_explicit_simd !69 !intel_reqd_sub_group_size !68 !sycl_used_aspects !67 {
+entry:
+  ret void
+}
+
+define spir_kernel void @reqd_sub_group_size_kernel_1(ptr addrspace(1) noundef align 8 %_arg_out) #0 !intel_reqd_sub_group_size !68 {
+entry:
+  ret void
+}
+
+attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="double.cpp" "sycl-optlevel"="3" "uniform-work-group-size"="true" }
+
+!llvm.module.flags = !{!0, !1}
+!opencl.spir.version = !{!2}
+!spirv.Source = !{!3}
+!llvm.ident = !{!64}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{i32 1, i32 2}
+!3 = !{i32 4, i32 100000}
+!9 = !{!"ext_intel_esimd", i32 53}
+!64 = !{!"clang version 19.0.0git (/ws/llvm/clang a7f3a637bdd6299831f903bbed9e8d069fea5c86)"}
+!67 = !{!9}
+!68 = !{i32 1}
+!69 = !{}
+!78 = !{i32 8}
+!79 = !{i32 16}
+!80 = !{i32 32}
+!81 = !{i32 64}
@@ -1050,7 +1050,11 @@ bool isTargetCompatibleWithModule(const std::optional<std::string> &Target,
   }
 
   // Check if module sub group size is compatible with the target.
-  if (ModuleReqs.SubGroupSize.has_value() &&
+  // For ESIMD, the reqd_sub_group_size will be 1; this is not
+  // a supported by any backend (e.g. no backend can support a kernel
+  // with sycl::reqd_sub_group_size(1)), but for ESIMD, this is
+  // a special case.
+  if (!IrMD.isESIMD() && ModuleReqs.SubGroupSize.has_value() &&
       !is_contained(TargetInfo.subGroupSizes, *ModuleReqs.SubGroupSize))
     return false;
 

@@ -17,6 +17,8 @@
 //     device::get_info<ext::oneapi::experimental::info::device::architecture>
 //   - alias of architecture if this is Intel GPU architecture in format
 //     intel_gpu_<intel_gpu_arch_version>
+//   - supported aspects ofarchitecture in
+//     llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td 
 //
 // Important note about keeping architecture IDs below unique:
 //   - the architecture ID must be a hex number with 16 digits