[mlir][spirv] Make EntryPointABIAttr.local_size optional

Hardcode84 · Hardcode84 · commit 4df95441089a · 2022-03-11T22:25:23.000+03:00
* It doesn't required by OpenCL/Intel Level Zero and can be set programmatically. * Add GPU to spirv lowering in case when attribute is not present. * Set higher benefit to WorkGroupSizeConversion pattern so it will always try to lower first from the attribute. Differential Revision: https://reviews.llvm.org/D120399
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.td b/mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.td
@@ -27,7 +27,7 @@ include "mlir/Dialect/SPIRV/IR/SPIRVBase.td"
 // points in the generated SPIR-V module:
 // 1) WorkGroup Size.
 def SPV_EntryPointABIAttr : StructAttr<"EntryPointABIAttr", SPIRV_Dialect, [
-    StructFieldAttr<"local_size", I32ElementsAttr>
+    StructFieldAttr<"local_size", OptionalAttr<I32ElementsAttr>>
 ]>;
 
 def SPV_ExtensionArrayAttr : TypedArrayAttrBase<
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@@ -55,7 +55,8 @@ class SingleDimLaunchConfigConversion : public OpConversionPattern<SourceOp> {
 /// attribute on the surrounding FuncOp is used to replace the gpu::BlockDimOp.
 class WorkGroupSizeConversion : public OpConversionPattern<gpu::BlockDimOp> {
 public:
-  using OpConversionPattern<gpu::BlockDimOp>::OpConversionPattern;
+  WorkGroupSizeConversion(TypeConverter &typeConverter, MLIRContext *context)
+      : OpConversionPattern(typeConverter, context, /*benefit*/ 10) {}
 
   LogicalResult
   matchAndRewrite(gpu::BlockDimOp op, OpAdaptor adaptor,
@@ -159,6 +160,9 @@ LogicalResult WorkGroupSizeConversion::matchAndRewrite(
     gpu::BlockDimOp op, OpAdaptor adaptor,
     ConversionPatternRewriter &rewriter) const {
   auto workGroupSizeAttr = spirv::lookupLocalWorkGroupSize(op);
+  if (!workGroupSizeAttr)
+    return failure();
+
   auto val = workGroupSizeAttr
                  .getValues<int32_t>()[static_cast<int32_t>(op.dimension())];
   auto convertedType =
@@ -366,6 +370,7 @@ void mlir::populateGPUToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
       GPUModuleEndConversion, GPUReturnOpConversion,
       LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
       LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
+      LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
       LaunchConfigConversion<gpu::ThreadIdOp,
                              spirv::BuiltIn::LocalInvocationId>,
       SingleDimLaunchConfigConversion<gpu::SubgroupIdOp,
diff --git a/mlir/lib/Dialect/SPIRV/IR/TargetAndABI.cpp b/mlir/lib/Dialect/SPIRV/IR/TargetAndABI.cpp
@@ -120,6 +120,9 @@ StringRef spirv::getEntryPointABIAttrName() { return "spv.entry_point_abi"; }
 
 spirv::EntryPointABIAttr
 spirv::getEntryPointABIAttr(ArrayRef<int32_t> localSize, MLIRContext *context) {
+  if (localSize.empty())
+    return spirv::EntryPointABIAttr::get(nullptr, context);
+
   assert(localSize.size() == 3);
   return spirv::EntryPointABIAttr::get(
       DenseElementsAttr::get<int32_t>(
diff --git a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
@@ -136,10 +136,13 @@ static LogicalResult lowerEntryPointABIAttr(spirv::FuncOp funcOp,
 
   // Specifies the spv.ExecutionModeOp.
   auto localSizeAttr = entryPointAttr.local_size();
-  SmallVector<int32_t, 3> localSize(localSizeAttr.getValues<int32_t>());
-  builder.create<spirv::ExecutionModeOp>(
-      funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
-  funcOp->removeAttr(entryPointAttrName);
+  if (localSizeAttr) {
+    auto values = localSizeAttr.getValues<int32_t>();
+    SmallVector<int32_t, 3> localSize(values);
+    builder.create<spirv::ExecutionModeOp>(
+        funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
+    funcOp->removeAttr(entryPointAttrName);
+  }
   return success();
 }
 
diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@@ -223,6 +223,78 @@ module attributes {gpu.container_module} {
 
 // -----
 
+module attributes {gpu.container_module} {
+  func @builtin() {
+    %c0 = arith.constant 1 : index
+    gpu.launch_func @kernels::@builtin_workgroup_size_x
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
+    return
+  }
+
+  // CHECK-LABEL:  spv.module @{{.*}}
+  // CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
+  gpu.module @kernels {
+    gpu.func @builtin_workgroup_size_x() kernel
+      attributes {spv.entry_point_abi = {}} {
+      // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
+      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
+      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
+      %0 = gpu.block_dim x
+      gpu.return
+    }
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  func @builtin() {
+    %c0 = arith.constant 1 : index
+    gpu.launch_func @kernels::@builtin_workgroup_size_y
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
+    return
+  }
+
+  // CHECK-LABEL:  spv.module @{{.*}}
+  // CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
+  gpu.module @kernels {
+    gpu.func @builtin_workgroup_size_y() kernel
+      attributes {spv.entry_point_abi = {}} {
+      // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
+      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
+      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
+      %0 = gpu.block_dim y
+      gpu.return
+    }
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  func @builtin() {
+    %c0 = arith.constant 1 : index
+    gpu.launch_func @kernels::@builtin_workgroup_size_z
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
+    return
+  }
+
+  // CHECK-LABEL:  spv.module @{{.*}}
+  // CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
+  gpu.module @kernels {
+    gpu.func @builtin_workgroup_size_z() kernel
+      attributes {spv.entry_point_abi = {}} {
+      // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
+      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
+      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
+      %0 = gpu.block_dim z
+      gpu.return
+    }
+  }
+}
+
+// -----
+
 module attributes {gpu.container_module} {
   // CHECK-LABEL:  spv.module @{{.*}} Logical GLSL450
   // CHECK: spv.GlobalVariable [[SUBGROUPSIZE:@.*]] built_in("SubgroupSize")