Skip to content

Commit 4df9544

Browse files
committed
[mlir][spirv] Make EntryPointABIAttr.local_size optional
* It doesn't required by OpenCL/Intel Level Zero and can be set programmatically. * Add GPU to spirv lowering in case when attribute is not present. * Set higher benefit to WorkGroupSizeConversion pattern so it will always try to lower first from the attribute. Differential Revision: https://reviews.llvm.org/D120399
1 parent bd0bddc commit 4df9544

File tree

5 files changed

+89
-6
lines changed

5 files changed

+89
-6
lines changed

mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ include "mlir/Dialect/SPIRV/IR/SPIRVBase.td"
2727
// points in the generated SPIR-V module:
2828
// 1) WorkGroup Size.
2929
def SPV_EntryPointABIAttr : StructAttr<"EntryPointABIAttr", SPIRV_Dialect, [
30-
StructFieldAttr<"local_size", I32ElementsAttr>
30+
StructFieldAttr<"local_size", OptionalAttr<I32ElementsAttr>>
3131
]>;
3232

3333
def SPV_ExtensionArrayAttr : TypedArrayAttrBase<

mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ class SingleDimLaunchConfigConversion : public OpConversionPattern<SourceOp> {
5555
/// attribute on the surrounding FuncOp is used to replace the gpu::BlockDimOp.
5656
class WorkGroupSizeConversion : public OpConversionPattern<gpu::BlockDimOp> {
5757
public:
58-
using OpConversionPattern<gpu::BlockDimOp>::OpConversionPattern;
58+
WorkGroupSizeConversion(TypeConverter &typeConverter, MLIRContext *context)
59+
: OpConversionPattern(typeConverter, context, /*benefit*/ 10) {}
5960

6061
LogicalResult
6162
matchAndRewrite(gpu::BlockDimOp op, OpAdaptor adaptor,
@@ -159,6 +160,9 @@ LogicalResult WorkGroupSizeConversion::matchAndRewrite(
159160
gpu::BlockDimOp op, OpAdaptor adaptor,
160161
ConversionPatternRewriter &rewriter) const {
161162
auto workGroupSizeAttr = spirv::lookupLocalWorkGroupSize(op);
163+
if (!workGroupSizeAttr)
164+
return failure();
165+
162166
auto val = workGroupSizeAttr
163167
.getValues<int32_t>()[static_cast<int32_t>(op.dimension())];
164168
auto convertedType =
@@ -366,6 +370,7 @@ void mlir::populateGPUToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
366370
GPUModuleEndConversion, GPUReturnOpConversion,
367371
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
368372
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
373+
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
369374
LaunchConfigConversion<gpu::ThreadIdOp,
370375
spirv::BuiltIn::LocalInvocationId>,
371376
SingleDimLaunchConfigConversion<gpu::SubgroupIdOp,

mlir/lib/Dialect/SPIRV/IR/TargetAndABI.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ StringRef spirv::getEntryPointABIAttrName() { return "spv.entry_point_abi"; }
120120

121121
spirv::EntryPointABIAttr
122122
spirv::getEntryPointABIAttr(ArrayRef<int32_t> localSize, MLIRContext *context) {
123+
if (localSize.empty())
124+
return spirv::EntryPointABIAttr::get(nullptr, context);
125+
123126
assert(localSize.size() == 3);
124127
return spirv::EntryPointABIAttr::get(
125128
DenseElementsAttr::get<int32_t>(

mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,13 @@ static LogicalResult lowerEntryPointABIAttr(spirv::FuncOp funcOp,
136136

137137
// Specifies the spv.ExecutionModeOp.
138138
auto localSizeAttr = entryPointAttr.local_size();
139-
SmallVector<int32_t, 3> localSize(localSizeAttr.getValues<int32_t>());
140-
builder.create<spirv::ExecutionModeOp>(
141-
funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
142-
funcOp->removeAttr(entryPointAttrName);
139+
if (localSizeAttr) {
140+
auto values = localSizeAttr.getValues<int32_t>();
141+
SmallVector<int32_t, 3> localSize(values);
142+
builder.create<spirv::ExecutionModeOp>(
143+
funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
144+
funcOp->removeAttr(entryPointAttrName);
145+
}
143146
return success();
144147
}
145148

mlir/test/Conversion/GPUToSPIRV/builtins.mlir

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,78 @@ module attributes {gpu.container_module} {
223223

224224
// -----
225225

226+
module attributes {gpu.container_module} {
227+
func @builtin() {
228+
%c0 = arith.constant 1 : index
229+
gpu.launch_func @kernels::@builtin_workgroup_size_x
230+
blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
231+
return
232+
}
233+
234+
// CHECK-LABEL: spv.module @{{.*}}
235+
// CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
236+
gpu.module @kernels {
237+
gpu.func @builtin_workgroup_size_x() kernel
238+
attributes {spv.entry_point_abi = {}} {
239+
// CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
240+
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
241+
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
242+
%0 = gpu.block_dim x
243+
gpu.return
244+
}
245+
}
246+
}
247+
248+
// -----
249+
250+
module attributes {gpu.container_module} {
251+
func @builtin() {
252+
%c0 = arith.constant 1 : index
253+
gpu.launch_func @kernels::@builtin_workgroup_size_y
254+
blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
255+
return
256+
}
257+
258+
// CHECK-LABEL: spv.module @{{.*}}
259+
// CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
260+
gpu.module @kernels {
261+
gpu.func @builtin_workgroup_size_y() kernel
262+
attributes {spv.entry_point_abi = {}} {
263+
// CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
264+
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
265+
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
266+
%0 = gpu.block_dim y
267+
gpu.return
268+
}
269+
}
270+
}
271+
272+
// -----
273+
274+
module attributes {gpu.container_module} {
275+
func @builtin() {
276+
%c0 = arith.constant 1 : index
277+
gpu.launch_func @kernels::@builtin_workgroup_size_z
278+
blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
279+
return
280+
}
281+
282+
// CHECK-LABEL: spv.module @{{.*}}
283+
// CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
284+
gpu.module @kernels {
285+
gpu.func @builtin_workgroup_size_z() kernel
286+
attributes {spv.entry_point_abi = {}} {
287+
// CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
288+
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
289+
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
290+
%0 = gpu.block_dim z
291+
gpu.return
292+
}
293+
}
294+
}
295+
296+
// -----
297+
226298
module attributes {gpu.container_module} {
227299
// CHECK-LABEL: spv.module @{{.*}} Logical GLSL450
228300
// CHECK: spv.GlobalVariable [[SUBGROUPSIZE:@.*]] built_in("SubgroupSize")

0 commit comments

Comments
 (0)