Skip to content

[MLIR][GPUToLLVMSPV] Use llvm.func attributes to convert gpu.shuffle #116967

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions mlir/include/mlir/Conversion/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -543,10 +543,7 @@ def LowerHostCodeToLLVMPass : Pass<"lower-host-to-llvm", "ModuleOp"> {
def ConvertGpuOpsToLLVMSPVOps : Pass<"convert-gpu-to-llvm-spv", "gpu::GPUModuleOp"> {
let summary =
"Generate LLVM operations to be ingested by a SPIR-V backend for gpu operations";
let dependentDialects = [
"LLVM::LLVMDialect",
"spirv::SPIRVDialect",
];
let dependentDialects = ["LLVM::LLVMDialect"];
let options = [
Option<"indexBitwidth", "index-bitwidth", "unsigned",
/*default=kDeriveIndexBitwidthFromDataLayout*/"0",
Expand Down
1 change: 0 additions & 1 deletion mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,4 @@ add_mlir_conversion_library(MLIRGPUToLLVMSPV
MLIRLLVMCommonConversion
MLIRLLVMDialect
MLIRSPIRVAttrToLLVMConversion
MLIRSPIRVDialect
)
11 changes: 5 additions & 6 deletions mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"
#include "mlir/Dialect/SPIRV/IR/TargetAndABI.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/PatternMatch.h"
Expand Down Expand Up @@ -274,10 +272,11 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
}

/// Get the subgroup size from the target or return a default.
static int getSubgroupSize(Operation *op) {
return spirv::lookupTargetEnvOrDefault(op)
.getResourceLimits()
.getSubgroupSize();
static std::optional<int> getSubgroupSize(Operation *op) {
auto parentFunc = op->getParentOfType<LLVM::LLVMFuncOp>();
if (!parentFunc)
return std::nullopt;
return parentFunc.getIntelReqdSubGroupSize();
}

static bool hasValidWidth(gpu::ShuffleOp op) {
Expand Down
97 changes: 11 additions & 86 deletions mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -227,84 +227,9 @@ gpu.module @barriers {

// -----

// Check `gpu.shuffle` conversion with default subgroup size.

gpu.module @shuffles {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: }
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: }
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: }
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: }

// CHECK-LABEL: gpu_shuffles
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
func.func @gpu_shuffles(%val0: i32, %id: i32,
%val1: i64, %mask: i32,
%val2: f32, %delta_up: i32,
%val3: f64, %delta_down: i32) {
%width = arith.constant 32 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: } : (i32, i32) -> i32
// CHECK: llvm.mlir.constant(true) : i1
// CHECK: llvm.call spir_funccc @_Z21sub_group_shuffle_xorlj(%[[VAL_2]], %[[VAL_3]]) {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: } : (i64, i32) -> i64
// CHECK: llvm.mlir.constant(true) : i1
// CHECK: llvm.call spir_funccc @_Z20sub_group_shuffle_upfj(%[[VAL_4]], %[[VAL_5]]) {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects= #llvm.memory_effects
// CHECK-SAME: } : (f32, i32) -> f32
// CHECK: llvm.mlir.constant(true) : i1
// CHECK: llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[VAL_6]], %[[VAL_7]]) {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects= #llvm.memory_effects
// CHECK-SAME: } : (f64, i32) -> f64
// CHECK: llvm.mlir.constant(true) : i1
%shuffleResult0, %valid0 = gpu.shuffle idx %val0, %id, %width : i32
%shuffleResult1, %valid1 = gpu.shuffle xor %val1, %mask, %width : i64
%shuffleResult2, %valid2 = gpu.shuffle up %val2, %delta_up, %width : f32
%shuffleResult3, %valid3 = gpu.shuffle down %val3, %delta_down, %width : f64
return
}
}

// -----

// Check `gpu.shuffle` conversion with explicit subgroup size.

gpu.module @shuffles attributes {
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
} {
gpu.module @shuffles {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
Expand Down Expand Up @@ -352,15 +277,15 @@ gpu.module @shuffles attributes {
// CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
// CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
// CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
// CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) {
func.func @gpu_shuffles(%i8_val: i8,
// CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32)
llvm.func @gpu_shuffles(%i8_val: i8,
%i16_val: i16,
%i32_val: i32,
%i64_val: i64,
%f16_val: f16,
%f32_val: f32,
%f64_val: f64,
%offset: i32) {
%offset: i32) attributes {intel_reqd_sub_group_size = 16 : i32} {
%width = arith.constant 16 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
// CHECK: llvm.mlir.constant(true) : i1
Expand All @@ -383,7 +308,7 @@ gpu.module @shuffles attributes {
%shuffleResult4, %valid4 = gpu.shuffle up %f16_val, %offset, %width : f16
%shuffleResult5, %valid5 = gpu.shuffle up %f32_val, %offset, %width : f32
%shuffleResult6, %valid6 = gpu.shuffle down %f64_val, %offset, %width : f64
return
llvm.return
}
}

Expand All @@ -392,11 +317,11 @@ gpu.module @shuffles attributes {
// Cannot convert due to shuffle width and target subgroup size mismatch

gpu.module @shuffles_mismatch {
func.func @gpu_shuffles(%val: i32, %id: i32) {
llvm.func @gpu_shuffles(%val: i32, %id: i32) attributes {intel_reqd_sub_group_size = 32 : i32} {
%width = arith.constant 16 : i32
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
%shuffleResult, %valid = gpu.shuffle idx %val, %id, %width : i32
return
llvm.return
}
}

Expand All @@ -405,10 +330,10 @@ gpu.module @shuffles_mismatch {
// Cannot convert due to variable shuffle width

gpu.module @shuffles_mismatch {
func.func @gpu_shuffles(%val: i32, %id: i32, %width: i32) {
llvm.func @gpu_shuffles(%val: i32, %id: i32, %width: i32) attributes {intel_reqd_sub_group_size = 32 : i32} {
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
%shuffleResult, %valid = gpu.shuffle idx %val, %id, %width : i32
return
llvm.return
}
}

Expand All @@ -417,11 +342,11 @@ gpu.module @shuffles_mismatch {
// Cannot convert due to value type not being supported by the conversion

gpu.module @not_supported_lowering {
func.func @gpu_shuffles(%val: i1, %id: i32) {
llvm.func @gpu_shuffles(%val: i1, %id: i32) attributes {intel_reqd_sub_group_size = 32 : i32} {
%width = arith.constant 32 : i32
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
%shuffleResult, %valid = gpu.shuffle xor %val, %id, %width : i1
return
llvm.return
}
}

Expand Down
Loading