Skip to content

Commit 82d22a1

Browse files
authored
[AMDGPU] Fixed folding of inline imm into dot w/o opsel (#73589)
A splat packed constant can be folded as an inline immediate but it shall use opsel. On gfx940 this code path can be skipped due to HW bug workaround and then it may be folded w/o opsel which is a bug. Fixed.
1 parent d01237c commit 82d22a1

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,11 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
205205
const uint64_t TSFlags = MI->getDesc().TSFlags;
206206
if (Fold.isImm()) {
207207
if (TSFlags & SIInstrFlags::IsPacked && !(TSFlags & SIInstrFlags::IsMAI) &&
208-
(!ST->hasDOTOpSelHazard() || !(TSFlags & SIInstrFlags::IsDOT)) &&
209208
AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
210209
ST->hasInv2PiInlineImm())) {
210+
if (ST->hasDOTOpSelHazard() && (TSFlags & SIInstrFlags::IsDOT))
211+
return false; // Prevent further folding of this operand without opsel.
212+
211213
// Set op_sel/op_sel_hi on this operand or bail out if op_sel is
212214
// already set.
213215
unsigned Opcode = MI->getOpcode();

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX906
2-
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940
3-
; RUN: llc -march=amdgcn -mcpu=gfx940 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940
2+
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940-SDAG
3+
; RUN: llc -march=amdgcn -mcpu=gfx940 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940-GISEL
44
; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
55
; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
66

@@ -43,7 +43,10 @@ entry:
4343

4444
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_op_sel:
4545
; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1,0] op_sel_hi:[0,0,1]{{$}}
46-
; GFX940: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}}{{$}}
46+
; GFX940-SDAG: s_mov_b32 [[K:s[0-9]+]], 0x10001
47+
; GFX940-SDAG: v_dot2_u32_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
48+
; GFX940-GISEL: v_mov_b32_e32 [[K:v[0-9]+]], 0x10001
49+
; GFX940-GISEL: v_dot2_u32_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, s{{[0-9]+}}{{$}}
4750
; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1,0] op_sel_hi:[0,0,1]{{$}}
4851
define amdgpu_kernel void @test_llvm_amdgcn_udot2_op_sel(
4952
ptr addrspace(1) %r,

0 commit comments

Comments
 (0)