Skip to content

Commit 0462aef

Browse files
committed
[AMDGPU] Inhibit SDWA if target instruction has FI
Differential Revision: https://reviews.llvm.org/D85918
1 parent d25cb5a commit 0462aef

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,16 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
992992
if (Opc == AMDGPU::V_CNDMASK_B32_e32)
993993
return false;
994994

995+
if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
996+
if (!Src0->isReg() && !Src0->isImm())
997+
return false;
998+
}
999+
1000+
if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) {
1001+
if (!Src1->isReg() && !Src1->isImm())
1002+
return false;
1003+
}
1004+
9951005
return true;
9961006
}
9971007

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
2+
3+
# Do not fold stack objects into SDWA.
4+
5+
---
6+
# GCN-LABEL: name: sdwa_stack_object_src0
7+
# GCN: V_ADD_U32_e64 %stack.0, killed %1
8+
name: sdwa_stack_object_src0
9+
stack:
10+
- { id: 0, type: default, offset: 0, size: 32, alignment: 4, stack-id: default }
11+
body: |
12+
bb.0:
13+
%0:vgpr_32 = IMPLICIT_DEF
14+
%1:vgpr_32 = V_AND_B32_e32 255, %0, implicit $exec
15+
%2:vgpr_32 = V_ADD_U32_e64 %stack.0, killed %1, 0, implicit $exec
16+
S_ENDPGM 0, implicit %2
17+
18+
...
19+
---
20+
name: sdwa_stack_object_src1
21+
# GCN-LABEL: name: sdwa_stack_object_src1
22+
# GCN: V_ADD_U32_e64 killed %1, %stack.0
23+
stack:
24+
- { id: 0, type: default, offset: 0, size: 32, alignment: 4, stack-id: default }
25+
body: |
26+
bb.0:
27+
%0:vgpr_32 = IMPLICIT_DEF
28+
%1:vgpr_32 = V_AND_B32_e32 255, %0, implicit $exec
29+
%2:vgpr_32 = V_ADD_U32_e64 killed %1, %stack.0, 0, implicit $exec
30+
S_ENDPGM 0, implicit %2
31+
32+
...

0 commit comments

Comments
 (0)