Skip to content

Commit 3670e7f

Browse files
authored
[MLIR] Add the convergent attribute to the barrier and shuffle ops (#97807)
When lowering from the gpu dialect to the llvm dialect for spirv, the barrier op and shuffle ops need a convergent attribute for correctness.
1 parent 19cc461 commit 3670e7f

File tree

2 files changed

+17
-14
lines changed

2 files changed

+17
-14
lines changed

mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ namespace mlir {
4343
static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
4444
StringRef name,
4545
ArrayRef<Type> paramTypes,
46-
Type resultType) {
46+
Type resultType,
47+
bool isConvergent = false) {
4748
auto func = dyn_cast_or_null<LLVM::LLVMFuncOp>(
4849
SymbolTable::lookupSymbolIn(symbolTable, name));
4950
if (!func) {
@@ -52,6 +53,7 @@ static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
5253
symbolTable->getLoc(), name,
5354
LLVM::LLVMFunctionType::get(resultType, paramTypes));
5455
func.setCConv(LLVM::cconv::CConv::SPIR_FUNC);
56+
func.setConvergent(isConvergent);
5557
}
5658
return func;
5759
}
@@ -89,8 +91,8 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
8991
assert(moduleOp && "Expecting module");
9092
Type flagTy = rewriter.getI32Type();
9193
Type voidTy = rewriter.getType<LLVM::LLVMVoidType>();
92-
LLVM::LLVMFuncOp func =
93-
lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy);
94+
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
95+
moduleOp, funcName, flagTy, voidTy, /*isConvergent=*/true);
9496

9597
// Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`.
9698
// See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
@@ -266,8 +268,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
266268
Type valueType = adaptor.getValue().getType();
267269
Type offsetType = adaptor.getOffset().getType();
268270
Type resultType = valueType;
269-
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
270-
moduleOp, funcName, {valueType, offsetType}, resultType);
271+
LLVM::LLVMFuncOp func =
272+
lookupOrCreateSPIRVFn(moduleOp, funcName, {valueType, offsetType},
273+
resultType, /*isConvergent=*/true);
271274

272275
Location loc = op->getLoc();
273276
std::array<Value, 2> args{adaptor.getValue(), adaptor.getOffset()};

mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ gpu.module @builtins {
104104
// -----
105105

106106
gpu.module @barriers {
107-
// CHECK: llvm.func spir_funccc @_Z7barrierj(i32)
107+
// CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {convergent}
108108

109109
// CHECK-LABEL: gpu_barrier
110110
func.func @gpu_barrier() {
@@ -120,10 +120,10 @@ gpu.module @barriers {
120120
// Check `gpu.shuffle` conversion with default subgroup size.
121121

122122
gpu.module @shuffles {
123-
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64
124-
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32
125-
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64
126-
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32
123+
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
124+
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
125+
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
126+
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}
127127

128128
// CHECK-LABEL: gpu_shuffles
129129
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
@@ -155,10 +155,10 @@ gpu.module @shuffles {
155155
gpu.module @shuffles attributes {
156156
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
157157
} {
158-
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64
159-
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32
160-
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64
161-
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32
158+
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
159+
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
160+
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
161+
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}
162162

163163
// CHECK-LABEL: gpu_shuffles
164164
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)

0 commit comments

Comments
 (0)