Skip to content

Commit 92c1fd1

Browse files
committed
Allow rematerialization of virtual reg uses
Currently isReallyTriviallyReMaterializableGeneric() implementation prevents rematerialization on any virtual register use on the grounds that is not a trivial rematerialization and that we do not want to extend liveranges. It appears that LRE logic does not attempt to extend a liverange of a source register for rematerialization so that is not an issue. That is checked in the LiveRangeEdit::allUsesAvailableAt(). The only non-trivial aspect of it is accounting for tied-defs which normally represent a read-modify-write operation and not rematerializable. The test for a tied-def situation already exists in the /CodeGen/AMDGPU/remat-vop.mir, test_no_remat_v_cvt_f32_i32_sdwa_dst_unused_preserve. The change has affected ARM/Thumb, Mips, RISCV, and x86. For the targets where I more or less understand the asm it seems to reduce spilling (as expected) or be neutral. However, it needs a review by all targets' specialists. Differential Revision: https://reviews.llvm.org/D106408
1 parent 1d02a8b commit 92c1fd1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+4217
-4202
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,11 @@ class TargetInstrInfo : public MCInstrInfo {
117117
const MachineFunction &MF) const;
118118

119119
/// Return true if the instruction is trivially rematerializable, meaning it
120-
/// has no side effects and requires no operands that aren't always available.
121-
/// This means the only allowed uses are constants and unallocatable physical
122-
/// registers so that the instructions result is independent of the place
123-
/// in the function.
120+
/// has no side effects. Uses of constants and unallocatable physical
121+
/// registers are always trivial to rematerialize so that the instructions
122+
/// result is independent of the place in the function. Uses of virtual
123+
/// registers are allowed but it is caller's responsility to ensure these
124+
/// operands are valid at the point the instruction is beeing moved.
124125
bool isTriviallyReMaterializable(const MachineInstr &MI,
125126
AAResults *AA = nullptr) const {
126127
return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF ||
@@ -140,8 +141,7 @@ class TargetInstrInfo : public MCInstrInfo {
140141
/// set, this hook lets the target specify whether the instruction is actually
141142
/// trivially rematerializable, taking into consideration its operands. This
142143
/// predicate must return false if the instruction has any side effects other
143-
/// than producing a value, or if it requres any address registers that are
144-
/// not always available.
144+
/// than producing a value.
145145
/// Requirements must be check as stated in isTriviallyReMaterializable() .
146146
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
147147
AAResults *AA) const {

llvm/lib/CodeGen/TargetInstrInfo.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,8 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
921921
const MachineRegisterInfo &MRI = MF.getRegInfo();
922922

923923
// Remat clients assume operand 0 is the defined register.
924-
if (!MI.getNumOperands() || !MI.getOperand(0).isReg())
924+
if (!MI.getNumOperands() || !MI.getOperand(0).isReg() ||
925+
MI.getOperand(0).isTied())
925926
return false;
926927
Register DefReg = MI.getOperand(0).getReg();
927928

@@ -983,12 +984,6 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
983984
// same virtual register, though.
984985
if (MO.isDef() && Reg != DefReg)
985986
return false;
986-
987-
// Don't allow any virtual-register uses. Rematting an instruction with
988-
// virtual register uses would length the live ranges of the uses, which
989-
// is not necessarily a good idea, certainly not "trivial".
990-
if (MO.isUse())
991-
return false;
992987
}
993988

994989
// Everything checked out.

llvm/test/CodeGen/AMDGPU/remat-sop.mir

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,66 @@ body: |
5151
S_NOP 0, implicit %2
5252
S_ENDPGM 0
5353
...
54+
# The liverange of %0 covers a point of rematerialization, source value is
55+
# availabe.
56+
---
57+
name: test_remat_s_mov_b32_vreg_src_long_lr
58+
tracksRegLiveness: true
59+
machineFunctionInfo:
60+
stackPtrOffsetReg: $sgpr32
61+
body: |
62+
bb.0:
63+
; GCN-LABEL: name: test_remat_s_mov_b32_vreg_src_long_lr
64+
; GCN: renamable $sgpr0 = IMPLICIT_DEF
65+
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
66+
; GCN: S_NOP 0, implicit killed renamable $sgpr1
67+
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
68+
; GCN: S_NOP 0, implicit killed renamable $sgpr1
69+
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
70+
; GCN: S_NOP 0, implicit killed renamable $sgpr1
71+
; GCN: S_NOP 0, implicit killed renamable $sgpr0
72+
; GCN: S_ENDPGM 0
73+
%0:sreg_32 = IMPLICIT_DEF
74+
%1:sreg_32 = S_MOV_B32 %0:sreg_32
75+
%2:sreg_32 = S_MOV_B32 %0:sreg_32
76+
%3:sreg_32 = S_MOV_B32 %0:sreg_32
77+
S_NOP 0, implicit %1
78+
S_NOP 0, implicit %2
79+
S_NOP 0, implicit %3
80+
S_NOP 0, implicit %0
81+
S_ENDPGM 0
82+
...
83+
# The liverange of %0 does not cover a point of rematerialization, source value is
84+
# unavailabe and we do not want to artificially extend the liverange.
85+
---
86+
name: test_no_remat_s_mov_b32_vreg_src_short_lr
87+
tracksRegLiveness: true
88+
machineFunctionInfo:
89+
stackPtrOffsetReg: $sgpr32
90+
body: |
91+
bb.0:
92+
; GCN-LABEL: name: test_no_remat_s_mov_b32_vreg_src_short_lr
93+
; GCN: renamable $sgpr0 = IMPLICIT_DEF
94+
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
95+
; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
96+
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
97+
; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
98+
; GCN: renamable $sgpr0 = S_MOV_B32 killed renamable $sgpr0
99+
; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
100+
; GCN: S_NOP 0, implicit killed renamable $sgpr1
101+
; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
102+
; GCN: S_NOP 0, implicit killed renamable $sgpr1
103+
; GCN: S_NOP 0, implicit killed renamable $sgpr0
104+
; GCN: S_ENDPGM 0
105+
%0:sreg_32 = IMPLICIT_DEF
106+
%1:sreg_32 = S_MOV_B32 %0:sreg_32
107+
%2:sreg_32 = S_MOV_B32 %0:sreg_32
108+
%3:sreg_32 = S_MOV_B32 %0:sreg_32
109+
S_NOP 0, implicit %1
110+
S_NOP 0, implicit %2
111+
S_NOP 0, implicit %3
112+
S_ENDPGM 0
113+
...
54114
---
55115
name: test_remat_s_mov_b64
56116
tracksRegLiveness: true

llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,20 @@ define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnon
2929
; ENABLE-NEXT: pophs {r11, pc}
3030
; ENABLE-NEXT: .LBB0_3: @ %while.body.preheader
3131
; ENABLE-NEXT: movw r12, :lower16:skip
32-
; ENABLE-NEXT: sub r1, r1, #1
32+
; ENABLE-NEXT: sub r3, r1, #1
3333
; ENABLE-NEXT: movt r12, :upper16:skip
3434
; ENABLE-NEXT: .LBB0_4: @ %while.body
3535
; ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1
36-
; ENABLE-NEXT: ldrb r3, [r0]
37-
; ENABLE-NEXT: ldrb r3, [r12, r3]
38-
; ENABLE-NEXT: add r0, r0, r3
39-
; ENABLE-NEXT: sub r3, r1, #1
40-
; ENABLE-NEXT: cmp r3, r1
36+
; ENABLE-NEXT: ldrb r1, [r0]
37+
; ENABLE-NEXT: ldrb r1, [r12, r1]
38+
; ENABLE-NEXT: add r0, r0, r1
39+
; ENABLE-NEXT: sub r1, r3, #1
40+
; ENABLE-NEXT: cmp r1, r3
4141
; ENABLE-NEXT: bhs .LBB0_6
4242
; ENABLE-NEXT: @ %bb.5: @ %while.body
4343
; ENABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
4444
; ENABLE-NEXT: cmp r0, r2
45-
; ENABLE-NEXT: mov r1, r3
45+
; ENABLE-NEXT: mov r3, r1
4646
; ENABLE-NEXT: blo .LBB0_4
4747
; ENABLE-NEXT: .LBB0_6: @ %if.end29
4848
; ENABLE-NEXT: pop {r11, pc}
@@ -119,20 +119,20 @@ define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnon
119119
; DISABLE-NEXT: pophs {r11, pc}
120120
; DISABLE-NEXT: .LBB0_3: @ %while.body.preheader
121121
; DISABLE-NEXT: movw r12, :lower16:skip
122-
; DISABLE-NEXT: sub r1, r1, #1
122+
; DISABLE-NEXT: sub r3, r1, #1
123123
; DISABLE-NEXT: movt r12, :upper16:skip
124124
; DISABLE-NEXT: .LBB0_4: @ %while.body
125125
; DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1
126-
; DISABLE-NEXT: ldrb r3, [r0]
127-
; DISABLE-NEXT: ldrb r3, [r12, r3]
128-
; DISABLE-NEXT: add r0, r0, r3
129-
; DISABLE-NEXT: sub r3, r1, #1
130-
; DISABLE-NEXT: cmp r3, r1
126+
; DISABLE-NEXT: ldrb r1, [r0]
127+
; DISABLE-NEXT: ldrb r1, [r12, r1]
128+
; DISABLE-NEXT: add r0, r0, r1
129+
; DISABLE-NEXT: sub r1, r3, #1
130+
; DISABLE-NEXT: cmp r1, r3
131131
; DISABLE-NEXT: bhs .LBB0_6
132132
; DISABLE-NEXT: @ %bb.5: @ %while.body
133133
; DISABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
134134
; DISABLE-NEXT: cmp r0, r2
135-
; DISABLE-NEXT: mov r1, r3
135+
; DISABLE-NEXT: mov r3, r1
136136
; DISABLE-NEXT: blo .LBB0_4
137137
; DISABLE-NEXT: .LBB0_6: @ %if.end29
138138
; DISABLE-NEXT: pop {r11, pc}

llvm/test/CodeGen/ARM/funnel-shift-rot.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
7373
; SCALAR-NEXT: push {r4, r5, r11, lr}
7474
; SCALAR-NEXT: rsb r3, r2, #0
7575
; SCALAR-NEXT: and r4, r2, #63
76-
; SCALAR-NEXT: and lr, r3, #63
77-
; SCALAR-NEXT: rsb r3, lr, #32
76+
; SCALAR-NEXT: and r12, r3, #63
77+
; SCALAR-NEXT: rsb r3, r12, #32
7878
; SCALAR-NEXT: lsl r2, r0, r4
79-
; SCALAR-NEXT: lsr r12, r0, lr
80-
; SCALAR-NEXT: orr r3, r12, r1, lsl r3
81-
; SCALAR-NEXT: subs r12, lr, #32
82-
; SCALAR-NEXT: lsrpl r3, r1, r12
79+
; SCALAR-NEXT: lsr lr, r0, r12
80+
; SCALAR-NEXT: orr r3, lr, r1, lsl r3
81+
; SCALAR-NEXT: subs lr, r12, #32
82+
; SCALAR-NEXT: lsrpl r3, r1, lr
8383
; SCALAR-NEXT: subs r5, r4, #32
8484
; SCALAR-NEXT: movwpl r2, #0
8585
; SCALAR-NEXT: cmp r5, #0
@@ -88,8 +88,8 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
8888
; SCALAR-NEXT: lsr r3, r0, r3
8989
; SCALAR-NEXT: orr r3, r3, r1, lsl r4
9090
; SCALAR-NEXT: lslpl r3, r0, r5
91-
; SCALAR-NEXT: lsr r0, r1, lr
92-
; SCALAR-NEXT: cmp r12, #0
91+
; SCALAR-NEXT: lsr r0, r1, r12
92+
; SCALAR-NEXT: cmp lr, #0
9393
; SCALAR-NEXT: movwpl r0, #0
9494
; SCALAR-NEXT: orr r1, r3, r0
9595
; SCALAR-NEXT: mov r0, r2
@@ -245,15 +245,15 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
245245
; CHECK: @ %bb.0:
246246
; CHECK-NEXT: .save {r4, r5, r11, lr}
247247
; CHECK-NEXT: push {r4, r5, r11, lr}
248-
; CHECK-NEXT: and lr, r2, #63
248+
; CHECK-NEXT: and r12, r2, #63
249249
; CHECK-NEXT: rsb r2, r2, #0
250-
; CHECK-NEXT: rsb r3, lr, #32
250+
; CHECK-NEXT: rsb r3, r12, #32
251251
; CHECK-NEXT: and r4, r2, #63
252-
; CHECK-NEXT: lsr r12, r0, lr
253-
; CHECK-NEXT: orr r3, r12, r1, lsl r3
254-
; CHECK-NEXT: subs r12, lr, #32
252+
; CHECK-NEXT: lsr lr, r0, r12
253+
; CHECK-NEXT: orr r3, lr, r1, lsl r3
254+
; CHECK-NEXT: subs lr, r12, #32
255255
; CHECK-NEXT: lsl r2, r0, r4
256-
; CHECK-NEXT: lsrpl r3, r1, r12
256+
; CHECK-NEXT: lsrpl r3, r1, lr
257257
; CHECK-NEXT: subs r5, r4, #32
258258
; CHECK-NEXT: movwpl r2, #0
259259
; CHECK-NEXT: cmp r5, #0
@@ -262,8 +262,8 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
262262
; CHECK-NEXT: lsr r3, r0, r3
263263
; CHECK-NEXT: orr r3, r3, r1, lsl r4
264264
; CHECK-NEXT: lslpl r3, r0, r5
265-
; CHECK-NEXT: lsr r0, r1, lr
266-
; CHECK-NEXT: cmp r12, #0
265+
; CHECK-NEXT: lsr r0, r1, r12
266+
; CHECK-NEXT: cmp lr, #0
267267
; CHECK-NEXT: movwpl r0, #0
268268
; CHECK-NEXT: orr r1, r0, r3
269269
; CHECK-NEXT: mov r0, r2

llvm/test/CodeGen/ARM/funnel-shift.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -224,31 +224,31 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
224224
; CHECK-NEXT: mov r3, #0
225225
; CHECK-NEXT: bl __aeabi_uldivmod
226226
; CHECK-NEXT: add r0, r2, #27
227-
; CHECK-NEXT: lsl r6, r6, #27
228-
; CHECK-NEXT: and r1, r0, #63
229227
; CHECK-NEXT: lsl r2, r7, #27
228+
; CHECK-NEXT: and r12, r0, #63
229+
; CHECK-NEXT: lsl r6, r6, #27
230230
; CHECK-NEXT: orr r7, r6, r7, lsr #5
231+
; CHECK-NEXT: rsb r3, r12, #32
232+
; CHECK-NEXT: lsr r2, r2, r12
231233
; CHECK-NEXT: mov r6, #63
232-
; CHECK-NEXT: rsb r3, r1, #32
233-
; CHECK-NEXT: lsr r2, r2, r1
234-
; CHECK-NEXT: subs r12, r1, #32
235-
; CHECK-NEXT: bic r6, r6, r0
236234
; CHECK-NEXT: orr r2, r2, r7, lsl r3
235+
; CHECK-NEXT: subs r3, r12, #32
236+
; CHECK-NEXT: bic r6, r6, r0
237237
; CHECK-NEXT: lsl r5, r9, #1
238-
; CHECK-NEXT: lsrpl r2, r7, r12
238+
; CHECK-NEXT: lsrpl r2, r7, r3
239+
; CHECK-NEXT: subs r1, r6, #32
239240
; CHECK-NEXT: lsl r0, r5, r6
240-
; CHECK-NEXT: subs r4, r6, #32
241-
; CHECK-NEXT: lsl r3, r8, #1
241+
; CHECK-NEXT: lsl r4, r8, #1
242242
; CHECK-NEXT: movwpl r0, #0
243-
; CHECK-NEXT: orr r3, r3, r9, lsr #31
243+
; CHECK-NEXT: orr r4, r4, r9, lsr #31
244244
; CHECK-NEXT: orr r0, r0, r2
245245
; CHECK-NEXT: rsb r2, r6, #32
246-
; CHECK-NEXT: cmp r4, #0
247-
; CHECK-NEXT: lsr r1, r7, r1
246+
; CHECK-NEXT: cmp r1, #0
248247
; CHECK-NEXT: lsr r2, r5, r2
249-
; CHECK-NEXT: orr r2, r2, r3, lsl r6
250-
; CHECK-NEXT: lslpl r2, r5, r4
251-
; CHECK-NEXT: cmp r12, #0
248+
; CHECK-NEXT: orr r2, r2, r4, lsl r6
249+
; CHECK-NEXT: lslpl r2, r5, r1
250+
; CHECK-NEXT: lsr r1, r7, r12
251+
; CHECK-NEXT: cmp r3, #0
252252
; CHECK-NEXT: movwpl r1, #0
253253
; CHECK-NEXT: orr r1, r2, r1
254254
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}

llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -91,17 +91,17 @@ define void @i56_or(i56* %a) {
9191
; BE-LABEL: i56_or:
9292
; BE: @ %bb.0:
9393
; BE-NEXT: mov r1, r0
94-
; BE-NEXT: ldr r12, [r0]
9594
; BE-NEXT: ldrh r2, [r1, #4]!
9695
; BE-NEXT: ldrb r3, [r1, #2]
9796
; BE-NEXT: orr r2, r3, r2, lsl #8
98-
; BE-NEXT: orr r2, r2, r12, lsl #24
99-
; BE-NEXT: orr r2, r2, #384
100-
; BE-NEXT: strb r2, [r1, #2]
101-
; BE-NEXT: lsr r3, r2, #8
102-
; BE-NEXT: strh r3, [r1]
103-
; BE-NEXT: bic r1, r12, #255
104-
; BE-NEXT: orr r1, r1, r2, lsr #24
97+
; BE-NEXT: ldr r3, [r0]
98+
; BE-NEXT: orr r2, r2, r3, lsl #24
99+
; BE-NEXT: orr r12, r2, #384
100+
; BE-NEXT: strb r12, [r1, #2]
101+
; BE-NEXT: lsr r2, r12, #8
102+
; BE-NEXT: strh r2, [r1]
103+
; BE-NEXT: bic r1, r3, #255
104+
; BE-NEXT: orr r1, r1, r12, lsr #24
105105
; BE-NEXT: str r1, [r0]
106106
; BE-NEXT: mov pc, lr
107107
%aa = load i56, i56* %a
@@ -127,13 +127,13 @@ define void @i56_and_or(i56* %a) {
127127
; BE-NEXT: ldrb r3, [r1, #2]
128128
; BE-NEXT: strb r2, [r1, #2]
129129
; BE-NEXT: orr r2, r3, r12, lsl #8
130-
; BE-NEXT: ldr r12, [r0]
131-
; BE-NEXT: orr r2, r2, r12, lsl #24
132-
; BE-NEXT: orr r2, r2, #384
133-
; BE-NEXT: lsr r3, r2, #8
134-
; BE-NEXT: strh r3, [r1]
135-
; BE-NEXT: bic r1, r12, #255
136-
; BE-NEXT: orr r1, r1, r2, lsr #24
130+
; BE-NEXT: ldr r3, [r0]
131+
; BE-NEXT: orr r2, r2, r3, lsl #24
132+
; BE-NEXT: orr r12, r2, #384
133+
; BE-NEXT: lsr r2, r12, #8
134+
; BE-NEXT: strh r2, [r1]
135+
; BE-NEXT: bic r1, r3, #255
136+
; BE-NEXT: orr r1, r1, r12, lsr #24
137137
; BE-NEXT: str r1, [r0]
138138
; BE-NEXT: mov pc, lr
139139

llvm/test/CodeGen/ARM/neon-copy.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,16 +1340,16 @@ define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
13401340
; CHECK-NEXT: .pad #8
13411341
; CHECK-NEXT: sub sp, sp, #8
13421342
; CHECK-NEXT: vmov.u16 r1, d0[1]
1343-
; CHECK-NEXT: and r0, r0, #3
1343+
; CHECK-NEXT: and r12, r0, #3
13441344
; CHECK-NEXT: vmov.u16 r2, d0[2]
1345-
; CHECK-NEXT: mov r3, sp
1346-
; CHECK-NEXT: vmov.u16 r12, d0[3]
1347-
; CHECK-NEXT: orr r0, r3, r0, lsl #1
1345+
; CHECK-NEXT: mov r0, sp
1346+
; CHECK-NEXT: vmov.u16 r3, d0[3]
1347+
; CHECK-NEXT: orr r0, r0, r12, lsl #1
13481348
; CHECK-NEXT: vst1.16 {d0[0]}, [r0:16]
13491349
; CHECK-NEXT: vldr d0, [sp]
13501350
; CHECK-NEXT: vmov.16 d0[1], r1
13511351
; CHECK-NEXT: vmov.16 d0[2], r2
1352-
; CHECK-NEXT: vmov.16 d0[3], r12
1352+
; CHECK-NEXT: vmov.16 d0[3], r3
13531353
; CHECK-NEXT: add sp, sp, #8
13541354
; CHECK-NEXT: bx lr
13551355
%tmp = extractelement <8 x i16> %x, i32 0

0 commit comments

Comments
 (0)