Skip to content

Commit b3e1f07

Browse files
committed
MachineLICM: Allow hoisting REG_SEQUENCE
This is just a fancy copy. Extend the copy handling to cover reg_sequence with only virtual register uses. This avoids some test regressions in a future commit.
1 parent a34f1fa commit b3e1f07

File tree

5 files changed

+35
-27
lines changed

5 files changed

+35
-27
lines changed

llvm/lib/CodeGen/MachineLICM.cpp

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,25 +1264,32 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
12641264

12651265
// If we have a COPY with other uses in the loop, hoist to allow the users to
12661266
// also be hoisted.
1267-
Register DefReg;
1268-
if (MI.isCopy() && (DefReg = MI.getOperand(0).getReg()).isVirtual() &&
1269-
MI.getOperand(1).getReg().isVirtual() &&
1270-
IsLoopInvariantInst(MI, CurLoop) &&
1271-
any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()),
1272-
[&CurLoop, this, DefReg, Cost](MachineInstr &UseMI) {
1273-
if (!CurLoop->contains(&UseMI))
1274-
return false;
1275-
1276-
// COPY is a cheap instruction, but if moving it won't cause high
1277-
// RP we're fine to hoist it even if the user can't be hoisted
1278-
// later Otherwise we want to check the user if it's hoistable
1279-
if (CanCauseHighRegPressure(Cost, false) &&
1280-
!CurLoop->isLoopInvariant(UseMI, DefReg))
1281-
return false;
1282-
1283-
return true;
1284-
}))
1285-
return true;
1267+
// TODO: Handle all isCopyLike?
1268+
if (MI.isCopy() || MI.isRegSequence()) {
1269+
Register DefReg = MI.getOperand(0).getReg();
1270+
if (DefReg.isVirtual() &&
1271+
all_of(MI.uses(),
1272+
[](const MachineOperand &UseOp) {
1273+
return !UseOp.isReg() || UseOp.getReg().isVirtual();
1274+
}) &&
1275+
IsLoopInvariantInst(MI, CurLoop) &&
1276+
any_of(MRI->use_nodbg_instructions(DefReg),
1277+
[&CurLoop, this, DefReg, Cost](MachineInstr &UseMI) {
1278+
if (!CurLoop->contains(&UseMI))
1279+
return false;
1280+
1281+
// COPY is a cheap instruction, but if moving it won't cause
1282+
// high RP we're fine to hoist it even if the user can't be
1283+
// hoisted later Otherwise we want to check the user if it's
1284+
// hoistable
1285+
if (CanCauseHighRegPressure(Cost, false) &&
1286+
!CurLoop->isLoopInvariant(UseMI, DefReg))
1287+
return false;
1288+
1289+
return true;
1290+
}))
1291+
return true;
1292+
}
12861293

12871294
// High register pressure situation, only hoist if the instruction is going
12881295
// to be remat'ed.

llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8907,17 +8907,17 @@ define amdgpu_kernel void @atomic_min_i64(ptr addrspace(1) %out, i64 %in) {
89078907
; SI: ; %bb.0: ; %entry
89088908
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
89098909
; SI-NEXT: s_waitcnt lgkmcnt(0)
8910-
; SI-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x0
8910+
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
89118911
; SI-NEXT: s_mov_b64 s[8:9], 0
89128912
; SI-NEXT: s_mov_b32 s7, 0xf000
89138913
; SI-NEXT: v_mov_b32_e32 v4, s3
89148914
; SI-NEXT: v_mov_b32_e32 v5, s2
8915-
; SI-NEXT: s_mov_b32 s5, s1
8916-
; SI-NEXT: s_mov_b32 s4, s0
89178915
; SI-NEXT: s_waitcnt lgkmcnt(0)
8918-
; SI-NEXT: v_mov_b32_e32 v2, s10
8919-
; SI-NEXT: v_mov_b32_e32 v3, s11
8916+
; SI-NEXT: v_mov_b32_e32 v2, s4
8917+
; SI-NEXT: v_mov_b32_e32 v3, s5
89208918
; SI-NEXT: s_mov_b32 s6, -1
8919+
; SI-NEXT: s_mov_b32 s4, s0
8920+
; SI-NEXT: s_mov_b32 s5, s1
89218921
; SI-NEXT: .LBB127_1: ; %atomicrmw.start
89228922
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
89238923
; SI-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]

llvm/test/CodeGen/AMDGPU/machinelicm-copy-like-instrs.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ body: |
1313
; CHECK-NEXT: {{ $}}
1414
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1515
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
16+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
1617
; CHECK-NEXT: {{ $}}
1718
; CHECK-NEXT: bb.1:
18-
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
1919
; CHECK-NEXT: S_NOP 0, implicit [[REG_SEQUENCE]]
2020
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
2121
; CHECK-NEXT: S_BRANCH %bb.2

llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@ define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
88
; GCN-NEXT: s_mov_b32 s7, 0xf000
99
; GCN-NEXT: s_mov_b32 s10, -1
1010
; GCN-NEXT: s_mov_b32 s6, 0
11+
; GCN-NEXT: s_mov_b32 s11, s7
1112
; GCN-NEXT: s_waitcnt lgkmcnt(0)
12-
; GCN-NEXT: s_mov_b32 s9, s5
1313
; GCN-NEXT: s_mov_b32 s8, s4
14+
; GCN-NEXT: s_mov_b32 s9, s5
1415
; GCN-NEXT: v_mov_b32_e32 v0, 0
1516
; GCN-NEXT: s_branch .LBB0_2
1617
; GCN-NEXT: .LBB0_1: ; %loop.exit.guard
@@ -20,7 +21,6 @@ define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
2021
; GCN-NEXT: .LBB0_2: ; %bb1
2122
; GCN-NEXT: ; =>This Loop Header: Depth=1
2223
; GCN-NEXT: ; Child Loop BB0_4 Depth 2
23-
; GCN-NEXT: s_mov_b32 s11, s7
2424
; GCN-NEXT: buffer_load_dword v1, off, s[8:11], 0
2525
; GCN-NEXT: s_waitcnt vmcnt(0)
2626
; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, v1

llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ b18: ; preds = %b16, %b7
6969
br label %b22
7070

7171
b21: ; preds = %b22
72+
store volatile <64 x i32> %v20, ptr null
7273
tail call void @sammy() #3
7374
br label %b7
7475

0 commit comments

Comments
 (0)