-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[mlir] Add optimization attrs for gpu-to-llvmspv function declarations and calls #99301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-gpu Author: Finlay (FMarno) ChangesAdds the attributes nounwind and willreturn to all function declarations. Adds Full diff: https://github.com/llvm/llvm-project/pull/99301.diff 2 Files Affected:
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index ebeb8f803d71d..81682a52c8c4b 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -40,11 +40,10 @@ namespace mlir {
// Helper Functions
//===----------------------------------------------------------------------===//
-static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
- StringRef name,
- ArrayRef<Type> paramTypes,
- Type resultType,
- bool isConvergent = false) {
+static LLVM::LLVMFuncOp
+lookupOrCreateSPIRVFn(Operation *symbolTable, StringRef name,
+ ArrayRef<Type> paramTypes, Type resultType,
+ bool hasMemoryEffects = true, bool isConvergent = false) {
auto func = dyn_cast_or_null<LLVM::LLVMFuncOp>(
SymbolTable::lookupSymbolIn(symbolTable, name));
if (!func) {
@@ -53,6 +52,17 @@ static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
symbolTable->getLoc(), name,
LLVM::LLVMFunctionType::get(resultType, paramTypes));
func.setCConv(LLVM::cconv::CConv::SPIR_FUNC);
+ func.setNoUnwind(true);
+ func.setWillReturn(true);
+ if (!hasMemoryEffects) {
+ // no externally observable effects
+ constexpr auto noModRef = mlir::LLVM::ModRefInfo::NoModRef;
+ auto memAttr = b.getAttr<LLVM::MemoryEffectsAttr>(
+ /*other*/ noModRef,
+ /*argMem*/ noModRef, /*inaccessibleMem*/ noModRef);
+ func.setMemoryAttr(memAttr);
+ }
+
func.setConvergent(isConvergent);
}
return func;
@@ -91,8 +101,9 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
assert(moduleOp && "Expecting module");
Type flagTy = rewriter.getI32Type();
Type voidTy = rewriter.getType<LLVM::LLVMVoidType>();
- LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
- moduleOp, funcName, flagTy, voidTy, /*isConvergent=*/true);
+ LLVM::LLVMFuncOp func =
+ lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy,
+ /*hasMemoryEffects*/ true, /*isConvergent=*/true);
// Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`.
// See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
@@ -134,8 +145,8 @@ struct LaunchConfigConversion : ConvertToLLVMPattern {
assert(moduleOp && "Expecting module");
Type dimTy = rewriter.getI32Type();
Type indexTy = getTypeConverter()->getIndexType();
- LLVM::LLVMFuncOp func =
- lookupOrCreateSPIRVFn(moduleOp, funcName, dimTy, indexTy);
+ LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
+ moduleOp, funcName, dimTy, indexTy, /*hasMemoryEffects*/ false);
Location loc = op->getLoc();
gpu::Dimension dim = getDimension(op);
@@ -268,9 +279,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
Type valueType = adaptor.getValue().getType();
Type offsetType = adaptor.getOffset().getType();
Type resultType = valueType;
- LLVM::LLVMFuncOp func =
- lookupOrCreateSPIRVFn(moduleOp, funcName, {valueType, offsetType},
- resultType, /*isConvergent=*/true);
+ LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
+ moduleOp, funcName, {valueType, offsetType}, resultType,
+ /*hasMemoryEffects*/ true, /*isConvergent=*/true);
Location loc = op->getLoc();
std::array<Value, 2> args{adaptor.getValue(), adaptor.getOffset()};
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 1b0f89a9a573e..d094c786414fc 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -4,16 +4,46 @@
// RUN: | FileCheck --check-prefixes=CHECK-32,CHECK %s
gpu.module @builtins {
- // CHECK-64: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64
- // CHECK-64: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i64
- // CHECK-64: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i64
- // CHECK-64: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i64
- // CHECK-64: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i64
- // CHECK-32: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i32
- // CHECK-32: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i32
- // CHECK-32: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i32
- // CHECK-32: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i32
- // CHECK-32: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i32
+ // CHECK-64: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-64: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-64: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-64: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-64: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i64 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z12get_local_idj(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z14get_local_sizej(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z13get_global_idj(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
+ // CHECK-32: llvm.func spir_funccc @_Z12get_group_idj(i32) -> i32 attributes {
+ // CHECK-SAME-DAG: memory = #llvm.memory_effects<other = none, argMem = none, inaccessibleMem = none>
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: will_return
// CHECK-LABEL: gpu_block_id
func.func @gpu_block_id() -> (index, index, index) {
@@ -104,7 +134,11 @@ gpu.module @builtins {
// -----
gpu.module @barriers {
- // CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {convergent}
+ // CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
// CHECK-LABEL: gpu_barrier
func.func @gpu_barrier() {
@@ -120,10 +154,26 @@ gpu.module @barriers {
// Check `gpu.shuffle` conversion with default subgroup size.
gpu.module @shuffles {
- // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}
+ // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
// CHECK-LABEL: gpu_shuffles
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
@@ -155,10 +205,26 @@ gpu.module @shuffles {
gpu.module @shuffles attributes {
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
} {
- // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
- // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}
+ // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
+ // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
+ // CHECK-SAME-DAG: no_unwind
+ // CHECK-SAME-DAG: convergent
+ // CHECK-NOT: memory = #llvm.memory_effects
+ // CHECK-SAME: }
// CHECK-LABEL: gpu_shuffles
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
|
151c7de
to
4c5887e
Compare
c458336
to
54d16fe
Compare
Adds attributes including nounwind, willreturn and memory(none).
1c237ad
to
a097821
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM % small nit comment
…s and calls (#99301) Summary: Adds the attributes nounwind and willreturn to all function declarations. Adds `memory(none)` equivalent to the id/dimension function declarations. The function declaration attributes are copied to the function calls. `nounwind` is legal because there are no exception in SPIR-V. I also do not see any reason why any of these functions would not return when used correctly. I'm confident that the get id/dim functions will have no externally observable memory effects, but think the convergent functions will have effects. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250575
Adds the attributes nounwind and willreturn to all function declarations. Adds
memory(none)
equivalent to the id/dimension function declarations. The function declaration attributes are copied to the function calls.nounwind
is legal because there are no exception in SPIR-V. I also do not see any reason why any of these functions would not return when used correctly.I'm confident that the get id/dim functions will have no externally observable memory effects, but think the convergent functions will have effects.