Skip to content

Commit 578adb3

Browse files
committed
[AMDGPU] Fix SIFoldOperands when folding clamp into pseudo scalar instructions
Clamp is canonicaly a v_max* instruction with a VGPR dst. Folding clamp into a pseudo scalar instruction can cause issue due to a change in regbank. We fix this with a copy.
1 parent a27f816 commit 578adb3

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1581,7 +1581,18 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
15811581

15821582
// Clamp is applied after omod, so it is OK if omod is set.
15831583
DefClamp->setImm(1);
1584-
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
1584+
1585+
Register DefReg = Def->getOperand(0).getReg();
1586+
Register MIDstReg = MI.getOperand(0).getReg();
1587+
if (TRI->isSGPRReg(*MRI, DefReg)) {
1588+
// Psuedo scalar instructions have a SGPR for dst and clamp is a v_max*
1589+
// instruction with a VGPR dst.
1590+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
1591+
MIDstReg)
1592+
.addReg(DefReg);
1593+
} else {
1594+
MRI->replaceRegWith(MIDstReg, DefReg);
1595+
}
15851596
MI.eraseFromParent();
15861597

15871598
// Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
3+
---
4+
name: test
5+
tracksRegLiveness: true
6+
body: |
7+
bb.0:
8+
liveins: $sgpr0
9+
10+
; CHECK-LABEL: name: test
11+
; CHECK: liveins: $sgpr0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
14+
; CHECK-NEXT: [[V_S_RSQ_F32_e64_:%[0-9]+]]:sgpr_32 = nofpexcept V_S_RSQ_F32_e64 0, [[COPY]], 1, 0, implicit $mode, implicit $exec
15+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_S_RSQ_F32_e64_]]
16+
; CHECK-NEXT: EXP_DONE 0, killed [[COPY1]], [[COPY1]], [[COPY1]], [[COPY1]], -1, 0, 15, implicit $exec
17+
; CHECK-NEXT: S_ENDPGM 0
18+
%0:sgpr_32 = COPY $sgpr0
19+
%1:sgpr_32 = nofpexcept V_S_RSQ_F32_e64 0, %0, 0, 0, implicit $mode, implicit $exec
20+
%2:vgpr_32 = nofpexcept V_MAX_F32_e64 0, %1, 0, %1, -1, 0, implicit $mode, implicit $exec
21+
EXP_DONE 0, killed %2, %2, %2, %2, -1, 0, 15, implicit $exec
22+
S_ENDPGM 0
23+
24+
...

0 commit comments

Comments
 (0)