Skip to content

Commit 27e1931

Browse files
committed
[AMDGPU] Fix PreRARematerialize scheduler pass sinking subreg defs
When collecting trivially rematerializable defs, skip any subreg defs. We do not want to sink these. Differential Revision: https://reviews.llvm.org/D121874
1 parent bd38234 commit 27e1931

File tree

2 files changed

+111
-1
lines changed

2 files changed

+111
-1
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,8 @@ void GCNScheduleDAGMILive::collectRematerializableInstructions(
741741
continue;
742742

743743
MachineInstr *Def = MRI.getOneDef(Reg)->getParent();
744-
if (!Def || !isTriviallyReMaterializable(*Def, AA))
744+
if (!Def || Def->getOperand(0).getSubReg() != 0 ||
745+
!isTriviallyReMaterializable(*Def, AA))
745746
continue;
746747

747748
MachineInstr *UseI = &*MRI.use_instr_begin(Reg);

llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5731,4 +5731,113 @@ body: |
57315731
S_NOP 0, implicit %20, implicit %21
57325732
S_ENDPGM 0
57335733
...
5734+
---
5735+
name: test_occ_9_no_sink_one_def_of_undef_subreg
5736+
tracksRegLiveness: true
5737+
machineFunctionInfo:
5738+
isEntryFunction: true
5739+
body: |
5740+
; GFX908-LABEL: name: test_occ_9_no_sink_one_def_of_undef_subreg
5741+
; GFX908: bb.0:
5742+
; GFX908-NEXT: successors: %bb.1(0x80000000)
5743+
; GFX908-NEXT: {{ $}}
5744+
; GFX908-NEXT: %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
5745+
; GFX908-NEXT: %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
5746+
; GFX908-NEXT: %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
5747+
; GFX908-NEXT: %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
5748+
; GFX908-NEXT: %4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
5749+
; GFX908-NEXT: %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
5750+
; GFX908-NEXT: %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
5751+
; GFX908-NEXT: %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
5752+
; GFX908-NEXT: %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
5753+
; GFX908-NEXT: %9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
5754+
; GFX908-NEXT: %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
5755+
; GFX908-NEXT: %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
5756+
; GFX908-NEXT: %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
5757+
; GFX908-NEXT: %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
5758+
; GFX908-NEXT: %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
5759+
; GFX908-NEXT: %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
5760+
; GFX908-NEXT: %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
5761+
; GFX908-NEXT: %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
5762+
; GFX908-NEXT: %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
5763+
; GFX908-NEXT: %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
5764+
; GFX908-NEXT: %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
5765+
; GFX908-NEXT: %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
5766+
; GFX908-NEXT: %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
5767+
; GFX908-NEXT: undef %23.sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
5768+
; GFX908-NEXT: {{ $}}
5769+
; GFX908-NEXT: bb.1:
5770+
; GFX908-NEXT: successors: %bb.2(0x80000000)
5771+
; GFX908-NEXT: {{ $}}
5772+
; GFX908-NEXT: %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
5773+
; GFX908-NEXT: S_NOP 0, implicit %24
5774+
; GFX908-NEXT: {{ $}}
5775+
; GFX908-NEXT: bb.2:
5776+
; GFX908-NEXT: S_NOP 0, implicit %23.sub1
5777+
; GFX908-NEXT: S_NOP 0, implicit %0, implicit %1
5778+
; GFX908-NEXT: S_NOP 0, implicit %2, implicit %3
5779+
; GFX908-NEXT: S_NOP 0, implicit %4, implicit %5
5780+
; GFX908-NEXT: S_NOP 0, implicit %6, implicit %7
5781+
; GFX908-NEXT: S_NOP 0, implicit %8, implicit %9
5782+
; GFX908-NEXT: S_NOP 0, implicit %10, implicit %11
5783+
; GFX908-NEXT: S_NOP 0, implicit %12, implicit %13
5784+
; GFX908-NEXT: S_NOP 0, implicit %14, implicit %15
5785+
; GFX908-NEXT: S_NOP 0, implicit %16, implicit %17
5786+
; GFX908-NEXT: S_NOP 0, implicit %18, implicit %19
5787+
; GFX908-NEXT: S_NOP 0, implicit %20, implicit %21
5788+
; GFX908-NEXT: S_NOP 0, implicit %22
5789+
; GFX908-NEXT: S_ENDPGM 0
5790+
bb.0:
5791+
successors: %bb.1
5792+
5793+
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
5794+
%1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
5795+
%2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
5796+
%3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
5797+
%4:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode, implicit-def $m0
5798+
%5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode, implicit-def $m0
5799+
%6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode, implicit-def $m0
5800+
%7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode, implicit-def $m0
5801+
%8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode, implicit-def $m0
5802+
%9:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode, implicit-def $m0
5803+
%10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode, implicit-def $m0
5804+
%11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode, implicit-def $m0
5805+
%12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode, implicit-def $m0
5806+
%13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode, implicit-def $m0
5807+
%14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode, implicit-def $m0
5808+
%15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode, implicit-def $m0
5809+
%16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode, implicit-def $m0
5810+
%17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode, implicit-def $m0
5811+
%18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
5812+
%19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
5813+
%20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
5814+
%21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
5815+
%22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
5816+
undef %23.sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
5817+
5818+
bb.1:
5819+
; predecessors: %bb.0
5820+
successors: %bb.2
5821+
5822+
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
5823+
S_NOP 0, implicit %24
5824+
5825+
bb.2:
5826+
; predcessors: %bb.1
5827+
5828+
S_NOP 0, implicit %23.sub1
5829+
S_NOP 0, implicit %0, implicit %1
5830+
S_NOP 0, implicit %2, implicit %3
5831+
S_NOP 0, implicit %4, implicit %5
5832+
S_NOP 0, implicit %6, implicit %7
5833+
S_NOP 0, implicit %8, implicit %9
5834+
S_NOP 0, implicit %10, implicit %11
5835+
S_NOP 0, implicit %12, implicit %13
5836+
S_NOP 0, implicit %14, implicit %15
5837+
S_NOP 0, implicit %16, implicit %17
5838+
S_NOP 0, implicit %18, implicit %19
5839+
S_NOP 0, implicit %20, implicit %21
5840+
S_NOP 0, implicit %22
5841+
S_ENDPGM 0
5842+
...
57345843

0 commit comments

Comments
 (0)