Skip to content

Commit 4c5887e

Browse files
committed
set optimization attrs for gpu-to-llvmspv ops
Adds the attributes include nounwind, willreturn and memory(none).
1 parent cf67360 commit 4c5887e

File tree

2 files changed

+108
-31
lines changed

2 files changed

+108
-31
lines changed

mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,10 @@ namespace mlir {
4040
// Helper Functions
4141
//===----------------------------------------------------------------------===//
4242

43-
static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
44-
StringRef name,
45-
ArrayRef<Type> paramTypes,
46-
Type resultType,
47-
bool isConvergent = false) {
43+
static LLVM::LLVMFuncOp
44+
lookupOrCreateSPIRVFn(Operation *symbolTable, StringRef name,
45+
ArrayRef<Type> paramTypes, Type resultType,
46+
bool hasMemoryEffects = true, bool isConvergent = false) {
4847
auto func = dyn_cast_or_null<LLVM::LLVMFuncOp>(
4948
SymbolTable::lookupSymbolIn(symbolTable, name));
5049
if (!func) {
@@ -53,6 +52,17 @@ static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
5352
symbolTable->getLoc(), name,
5453
LLVM::LLVMFunctionType::get(resultType, paramTypes));
5554
func.setCConv(LLVM::cconv::CConv::SPIR_FUNC);
55+
func.setNoUnwind(true);
56+
func.setWillReturn(true);
57+
if (!hasMemoryEffects) {
58+
// no externally observable effects
59+
constexpr auto noModRef = mlir::LLVM::ModRefInfo::NoModRef;
60+
auto memAttr = b.getAttr<LLVM::MemoryEffectsAttr>(
61+
/*other*/ noModRef,
62+
/*argMem*/ noModRef, /*inaccessibleMem*/ noModRef);
63+
func.setMemoryAttr(memAttr);
64+
}
65+
5666
func.setConvergent(isConvergent);
5767
}
5868
return func;
@@ -91,8 +101,9 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
91101
assert(moduleOp && "Expecting module");
92102
Type flagTy = rewriter.getI32Type();
93103
Type voidTy = rewriter.getType<LLVM::LLVMVoidType>();
94-
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
95-
moduleOp, funcName, flagTy, voidTy, /*isConvergent=*/true);
104+
LLVM::LLVMFuncOp func =
105+
lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy,
106+
/*hasMemoryEffects*/ true, /*isConvergent=*/true);
96107

97108
// Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`.
98109
// See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
@@ -134,8 +145,8 @@ struct LaunchConfigConversion : ConvertToLLVMPattern {
134145
assert(moduleOp && "Expecting module");
135146
Type dimTy = rewriter.getI32Type();
136147
Type indexTy = getTypeConverter()->getIndexType();
137-
LLVM::LLVMFuncOp func =
138-
lookupOrCreateSPIRVFn(moduleOp, funcName, dimTy, indexTy);
148+
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
149+
moduleOp, funcName, dimTy, indexTy, /*hasMemoryEffects*/ false);
139150

140151
Location loc = op->getLoc();
141152
gpu::Dimension dim = getDimension(op);
@@ -268,9 +279,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
268279
Type valueType = adaptor.getValue().getType();
269280
Type offsetType = adaptor.getOffset().getType();
270281
Type resultType = valueType;
271-
LLVM::LLVMFuncOp func =
272-
lookupOrCreateSPIRVFn(moduleOp, funcName, {valueType, offsetType},
273-
resultType, /*isConvergent=*/true);
282+
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
283+
moduleOp, funcName, {valueType, offsetType}, resultType,
284+
/*hasMemoryEffects*/ true, /*isConvergent=*/true);
274285

275286
Location loc = op->getLoc();
276287
std::array<Value, 2> args{adaptor.getValue(), adaptor.getOffset()};

mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir

Lines changed: 85 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,46 @@
44
// RUN: | FileCheck --check-prefixes=CHECK-32,CHECK %s
55

66
gpu.module @builtins {
7-
// CHECK-64: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64
8-
// CHECK-64: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i64
9-
// CHECK-64: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i64
10-
// CHECK-64: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i64
11-
// CHECK-64: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i64
12-
// CHECK-32: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i32
13-
// CHECK-32: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i32
14-
// CHECK-32: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i32
15-
// CHECK-32: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i32
16-
// CHECK-32: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i32
7+
// CHECK-64: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64 attributes {
8+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
9+
// CHECK-SAME-DAG: no_unwind
10+
// CHECK-SAME-DAG: will_return
11+
// CHECK-64: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i64 attributes {
12+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
13+
// CHECK-SAME-DAG: no_unwind
14+
// CHECK-SAME-DAG: will_return
15+
// CHECK-64: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i64 attributes {
16+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
17+
// CHECK-SAME-DAG: no_unwind
18+
// CHECK-SAME-DAG: will_return
19+
// CHECK-64: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i64 attributes {
20+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
21+
// CHECK-SAME-DAG: no_unwind
22+
// CHECK-SAME-DAG: will_return
23+
// CHECK-64: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i64 attributes {
24+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
25+
// CHECK-SAME-DAG: no_unwind
26+
// CHECK-SAME-DAG: will_return
27+
// CHECK-32: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i32 attributes {
28+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
29+
// CHECK-SAME-DAG: no_unwind
30+
// CHECK-SAME-DAG: will_return
31+
// CHECK-32: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i32 attributes {
32+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
33+
// CHECK-SAME-DAG: no_unwind
34+
// CHECK-SAME-DAG: will_return
35+
// CHECK-32: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i32 attributes {
36+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
37+
// CHECK-SAME-DAG: no_unwind
38+
// CHECK-SAME-DAG: will_return
39+
// CHECK-32: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i32 attributes {
40+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
41+
// CHECK-SAME-DAG: no_unwind
42+
// CHECK-SAME-DAG: will_return
43+
// CHECK-32: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i32 attributes {
44+
// CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
45+
// CHECK-SAME-DAG: no_unwind
46+
// CHECK-SAME-DAG: will_return
1747

1848
// CHECK-LABEL: gpu_block_id
1949
func.func @gpu_block_id() -> (index, index, index) {
@@ -104,7 +134,11 @@ gpu.module @builtins {
104134
// -----
105135

106136
gpu.module @barriers {
107-
// CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {convergent}
137+
// CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {
138+
// CHECK-SAME-DAG: no_unwind
139+
// CHECK-SAME-DAG: convergent
140+
// CHECK-NOT: memory = #llvm.memory_effects
141+
// CHECK-SAME: }
108142

109143
// CHECK-LABEL: gpu_barrier
110144
func.func @gpu_barrier() {
@@ -120,10 +154,26 @@ gpu.module @barriers {
120154
// Check `gpu.shuffle` conversion with default subgroup size.
121155

122156
gpu.module @shuffles {
123-
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
124-
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
125-
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
126-
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}
157+
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
158+
// CHECK-SAME-DAG: no_unwind
159+
// CHECK-SAME-DAG: convergent
160+
// CHECK-NOT: memory = #llvm.memory_effects
161+
// CHECK-SAME: }
162+
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
163+
// CHECK-SAME-DAG: no_unwind
164+
// CHECK-SAME-DAG: convergent
165+
// CHECK-NOT: memory = #llvm.memory_effects
166+
// CHECK-SAME: }
167+
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
168+
// CHECK-SAME-DAG: no_unwind
169+
// CHECK-SAME-DAG: convergent
170+
// CHECK-NOT: memory = #llvm.memory_effects
171+
// CHECK-SAME: }
172+
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
173+
// CHECK-SAME-DAG: no_unwind
174+
// CHECK-SAME-DAG: convergent
175+
// CHECK-NOT: memory = #llvm.memory_effects
176+
// CHECK-SAME: }
127177

128178
// CHECK-LABEL: gpu_shuffles
129179
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
@@ -155,10 +205,26 @@ gpu.module @shuffles {
155205
gpu.module @shuffles attributes {
156206
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
157207
} {
158-
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
159-
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
160-
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
161-
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}
208+
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
209+
// CHECK-SAME-DAG: no_unwind
210+
// CHECK-SAME-DAG: convergent
211+
// CHECK-NOT: memory = #llvm.memory_effects
212+
// CHECK-SAME: }
213+
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
214+
// CHECK-SAME-DAG: no_unwind
215+
// CHECK-SAME-DAG: convergent
216+
// CHECK-NOT: memory = #llvm.memory_effects
217+
// CHECK-SAME: }
218+
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
219+
// CHECK-SAME-DAG: no_unwind
220+
// CHECK-SAME-DAG: convergent
221+
// CHECK-NOT: memory = #llvm.memory_effects
222+
// CHECK-SAME: }
223+
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
224+
// CHECK-SAME-DAG: no_unwind
225+
// CHECK-SAME-DAG: convergent
226+
// CHECK-NOT: memory = #llvm.memory_effects
227+
// CHECK-SAME: }
162228

163229
// CHECK-LABEL: gpu_shuffles
164230
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)

0 commit comments

Comments
 (0)