-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][GPUToLLVMSPV] Use llvm.func
attributes to convert gpu.shuffle
#116967
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Use `llvm.func`'s `intel_reqd_sub_group_size` attribute instead of SPIR-V environment attributes in the `gpu.shuffle` conversion pattern. This metadata is needed to check the semantics of the operation are supported, i.e., it has a constant width and its value is equal to the sub-group size. As the pass also converts `gpu.func` to `llvm.func`, adding a discardable attribute of name `intel_reqd_sub_group_size` attribute to the latter is enough for this pattern to work. We no longer have a notion of "default" sub-group size, so this attribute needs to be set in the parent function for `gpu.shuffle` operations to be converted. Drop dependency on the SPIR-V dialect as we no longer require creating attributes from this dialect to lower `gpu.shuffle` instances. Signed-off-by: Victor Perez <[email protected]>
This addresses our concerns discussed in #90972 (comment) |
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-gpu Author: Victor Perez (victor-eds) ChangesUse As the pass also converts We no longer have a notion of "default" sub-group size, so this attribute needs to be set in the parent function for Drop dependency on the SPIR-V dialect as we no longer require creating attributes from this dialect to lower Full diff: https://github.com/llvm/llvm-project/pull/116967.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 4d272ba219c6f1..15a84b893387ad 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -543,10 +543,7 @@ def LowerHostCodeToLLVMPass : Pass<"lower-host-to-llvm", "ModuleOp"> {
def ConvertGpuOpsToLLVMSPVOps : Pass<"convert-gpu-to-llvm-spv", "gpu::GPUModuleOp"> {
let summary =
"Generate LLVM operations to be ingested by a SPIR-V backend for gpu operations";
- let dependentDialects = [
- "LLVM::LLVMDialect",
- "spirv::SPIRVDialect",
- ];
+ let dependentDialects = ["LLVM::LLVMDialect"];
let options = [
Option<"indexBitwidth", "index-bitwidth", "unsigned",
/*default=kDeriveIndexBitwidthFromDataLayout*/"0",
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt b/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt
index d47c5e679d86e8..f2381c623e2014 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt
@@ -10,5 +10,4 @@ add_mlir_conversion_library(MLIRGPUToLLVMSPV
MLIRLLVMCommonConversion
MLIRLLVMDialect
MLIRSPIRVAttrToLLVMConversion
- MLIRSPIRVDialect
)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index bb6a38c0e76edf..03745f4537e99e 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -20,9 +20,7 @@
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
-#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"
-#include "mlir/Dialect/SPIRV/IR/TargetAndABI.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/PatternMatch.h"
@@ -274,10 +272,11 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
}
/// Get the subgroup size from the target or return a default.
- static int getSubgroupSize(Operation *op) {
- return spirv::lookupTargetEnvOrDefault(op)
- .getResourceLimits()
- .getSubgroupSize();
+ static std::optional<int> getSubgroupSize(Operation *op) {
+ auto parentFunc = op->getParentOfType<LLVM::LLVMFuncOp>();
+ if (!parentFunc)
+ return std::nullopt;
+ return parentFunc.getIntelReqdSubGroupSize();
}
static bool hasValidWidth(gpu::ShuffleOp op) {
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index c143d030ed362b..e147d705c5a06c 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -227,84 +227,9 @@ gpu.module @barriers {
// -----
-// Check `gpu.shuffle` conversion with default subgroup size.
-
-gpu.module @shuffles {
- // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
- // CHECK-SAME-DAG: no_unwind
- // CHECK-SAME-DAG: convergent
- // CHECK-SAME-DAG: will_return
- // CHECK-NOT: memory_effects = #llvm.memory_effects
- // CHECK-SAME: }
- // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
- // CHECK-SAME-DAG: no_unwind
- // CHECK-SAME-DAG: convergent
- // CHECK-SAME-DAG: will_return
- // CHECK-NOT: memory_effects = #llvm.memory_effects
- // CHECK-SAME: }
- // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
- // CHECK-SAME-DAG: no_unwind
- // CHECK-SAME-DAG: convergent
- // CHECK-SAME-DAG: will_return
- // CHECK-NOT: memory_effects = #llvm.memory_effects
- // CHECK-SAME: }
- // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
- // CHECK-SAME-DAG: no_unwind
- // CHECK-SAME-DAG: convergent
- // CHECK-SAME-DAG: will_return
- // CHECK-NOT: memory_effects = #llvm.memory_effects
- // CHECK-SAME: }
-
- // CHECK-LABEL: gpu_shuffles
- // CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
- func.func @gpu_shuffles(%val0: i32, %id: i32,
- %val1: i64, %mask: i32,
- %val2: f32, %delta_up: i32,
- %val3: f64, %delta_down: i32) {
- %width = arith.constant 32 : i32
- // CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
- // CHECK-SAME-DAG: no_unwind
- // CHECK-SAME-DAG: convergent
- // CHECK-SAME-DAG: will_return
- // CHECK-NOT: memory_effects = #llvm.memory_effects
- // CHECK-SAME: } : (i32, i32) -> i32
- // CHECK: llvm.mlir.constant(true) : i1
- // CHECK: llvm.call spir_funccc @_Z21sub_group_shuffle_xorlj(%[[VAL_2]], %[[VAL_3]]) {
- // CHECK-SAME-DAG: no_unwind
- // CHECK-SAME-DAG: convergent
- // CHECK-SAME-DAG: will_return
- // CHECK-NOT: memory_effects = #llvm.memory_effects
- // CHECK-SAME: } : (i64, i32) -> i64
- // CHECK: llvm.mlir.constant(true) : i1
- // CHECK: llvm.call spir_funccc @_Z20sub_group_shuffle_upfj(%[[VAL_4]], %[[VAL_5]]) {
- // CHECK-SAME-DAG: no_unwind
- // CHECK-SAME-DAG: convergent
- // CHECK-SAME-DAG: will_return
- // CHECK-NOT: memory_effects= #llvm.memory_effects
- // CHECK-SAME: } : (f32, i32) -> f32
- // CHECK: llvm.mlir.constant(true) : i1
- // CHECK: llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[VAL_6]], %[[VAL_7]]) {
- // CHECK-SAME-DAG: no_unwind
- // CHECK-SAME-DAG: convergent
- // CHECK-SAME-DAG: will_return
- // CHECK-NOT: memory_effects= #llvm.memory_effects
- // CHECK-SAME: } : (f64, i32) -> f64
- // CHECK: llvm.mlir.constant(true) : i1
- %shuffleResult0, %valid0 = gpu.shuffle idx %val0, %id, %width : i32
- %shuffleResult1, %valid1 = gpu.shuffle xor %val1, %mask, %width : i64
- %shuffleResult2, %valid2 = gpu.shuffle up %val2, %delta_up, %width : f32
- %shuffleResult3, %valid3 = gpu.shuffle down %val3, %delta_down, %width : f64
- return
- }
-}
-
-// -----
-
// Check `gpu.shuffle` conversion with explicit subgroup size.
-gpu.module @shuffles attributes {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
-} {
+gpu.module @shuffles {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
@@ -352,15 +277,15 @@ gpu.module @shuffles attributes {
// CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
// CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
// CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
- // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) {
- func.func @gpu_shuffles(%i8_val: i8,
+ // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32)
+ llvm.func @gpu_shuffles(%i8_val: i8,
%i16_val: i16,
%i32_val: i32,
%i64_val: i64,
%f16_val: f16,
%f32_val: f32,
%f64_val: f64,
- %offset: i32) {
+ %offset: i32) attributes {intel_reqd_sub_group_size = 16 : i32} {
%width = arith.constant 16 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
// CHECK: llvm.mlir.constant(true) : i1
@@ -383,7 +308,20 @@ gpu.module @shuffles attributes {
%shuffleResult4, %valid4 = gpu.shuffle up %f16_val, %offset, %width : f16
%shuffleResult5, %valid5 = gpu.shuffle up %f32_val, %offset, %width : f32
%shuffleResult6, %valid6 = gpu.shuffle down %f64_val, %offset, %width : f64
- return
+ llvm.return
+ }
+}
+
+// -----
+
+// Check attaching a discardable attribute to a `gpu.func` hosting a `gpu.shuffle` operation works,
+// i.e., the attribute is propagated.
+
+gpu.module @shuffles_ {
+ gpu.func @propagated_size(%val: i32, %id: i32) attributes {intel_reqd_sub_group_size = 16 : i32} {
+ %width = arith.constant 16 : i32
+ %shuffleResult, %valid = gpu.shuffle idx %val, %id, %width : i32
+ llvm.return
}
}
@@ -392,11 +330,11 @@ gpu.module @shuffles attributes {
// Cannot convert due to shuffle width and target subgroup size mismatch
gpu.module @shuffles_mismatch {
- func.func @gpu_shuffles(%val: i32, %id: i32) {
+ llvm.func @gpu_shuffles(%val: i32, %id: i32) attributes {intel_reqd_sub_group_size = 32 : i32} {
%width = arith.constant 16 : i32
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
%shuffleResult, %valid = gpu.shuffle idx %val, %id, %width : i32
- return
+ llvm.return
}
}
@@ -405,10 +343,10 @@ gpu.module @shuffles_mismatch {
// Cannot convert due to variable shuffle width
gpu.module @shuffles_mismatch {
- func.func @gpu_shuffles(%val: i32, %id: i32, %width: i32) {
+ llvm.func @gpu_shuffles(%val: i32, %id: i32, %width: i32) attributes {intel_reqd_sub_group_size = 32 : i32} {
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
%shuffleResult, %valid = gpu.shuffle idx %val, %id, %width : i32
- return
+ llvm.return
}
}
@@ -417,11 +355,11 @@ gpu.module @shuffles_mismatch {
// Cannot convert due to value type not being supported by the conversion
gpu.module @not_supported_lowering {
- func.func @gpu_shuffles(%val: i1, %id: i32) {
+ llvm.func @gpu_shuffles(%val: i1, %id: i32) attributes {intel_reqd_sub_group_size = 32 : i32} {
%width = arith.constant 32 : i32
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
%shuffleResult, %valid = gpu.shuffle xor %val, %id, %width : i1
- return
+ llvm.return
}
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just a wee thing
I'd like to get merged by tomorrow European morning. If no other reviewer has any major concerns, I'll just go with it as this addresses the concerns we had at first. |
Use
llvm.func
'sintel_reqd_sub_group_size
attribute instead of SPIR-V environment attributes in thegpu.shuffle
conversion pattern. This metadata is needed to check the semantics of the operation are supported, i.e., it has a constant width and its value is equal to the sub-group size.As the pass also converts
gpu.func
tollvm.func
, adding a discardable attribute of nameintel_reqd_sub_group_size
attribute to the latter is enough for this pattern to work.We no longer have a notion of "default" sub-group size, so this attribute needs to be set in the parent function for
gpu.shuffle
operations to be converted.Drop dependency on the SPIR-V dialect as we no longer require creating attributes from this dialect to lower
gpu.shuffle
instances.