Skip to content

Commit 300a550

Browse files
authored
RegisterCoalescer: Fix implicit operand handling during rematerialize (#75271)
If the rematerialize was placing a subregister into a super register, and implicit operands referenced the original register, we need to add undef flags to the now-subregister indexed implicit operands. Depends #75152
1 parent 2c0abdf commit 300a550

File tree

3 files changed

+255
-2
lines changed

3 files changed

+255
-2
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1611,8 +1611,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
16111611
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
16121612
}
16131613

1614-
if (NewMI.getOperand(0).getSubReg())
1615-
NewMI.getOperand(0).setIsUndef();
1614+
NewMI.setRegisterDefReadUndef(NewMI.getOperand(0).getReg());
16161615

16171616
// Transfer over implicit operands to the rematerialized instruction.
16181617
for (MachineOperand &MO : ImplicitOps)
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=x86_64-pc-linux-gnu -verify-coalescing < %s | FileCheck %s
3+
4+
%"class.llvm::APInt." = type <{ %union.anon., i32, [4 x i8] }>
5+
%union.anon. = type { i64 }
6+
7+
define void @_ZNK4llvm5APInt21multiplicativeInverseERKS0_(ptr %r) {
8+
; CHECK-LABEL: _ZNK4llvm5APInt21multiplicativeInverseERKS0_:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: xorl %eax, %eax
11+
; CHECK-NEXT: xorl %edx, %edx
12+
; CHECK-NEXT: xorl %ecx, %ecx
13+
; CHECK-NEXT: jmp .LBB0_1
14+
; CHECK-NEXT: .p2align 4, 0x90
15+
; CHECK-NEXT: .LBB0_4: # %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i
16+
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
17+
; CHECK-NEXT: movl %edx, %edx
18+
; CHECK-NEXT: shlq $4, %rdx
19+
; CHECK-NEXT: movl $0, (%rdi,%rdx)
20+
; CHECK-NEXT: movl %ecx, %edx
21+
; CHECK-NEXT: .LBB0_1: # %bb
22+
; CHECK-NEXT: # =>This Loop Header: Depth=1
23+
; CHECK-NEXT: # Child Loop BB0_3 Depth 2
24+
; CHECK-NEXT: xorl $1, %ecx
25+
; CHECK-NEXT: xorl %esi, %esi
26+
; CHECK-NEXT: movq %rcx, %r8
27+
; CHECK-NEXT: testb %al, %al
28+
; CHECK-NEXT: jne .LBB0_4
29+
; CHECK-NEXT: .p2align 4, 0x90
30+
; CHECK-NEXT: .LBB0_3: # %for.body.i.i.i.i.i.3
31+
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
32+
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
33+
; CHECK-NEXT: orq $1, %r8
34+
; CHECK-NEXT: orq $1, %rsi
35+
; CHECK-NEXT: testb %al, %al
36+
; CHECK-NEXT: je .LBB0_3
37+
; CHECK-NEXT: jmp .LBB0_4
38+
entry:
39+
br label %bb
40+
41+
bb: ; preds = %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, %entry
42+
%i.0 = phi i32 [ 0, %entry ], [ %xor, %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i ]
43+
%xor = xor i32 %i.0, 1
44+
%idxprom = zext nneg i32 %xor to i64
45+
br label %for.body.i.i.i.i.i
46+
47+
for.body.i.i.i.i.i: ; preds = %for.body.i.i.i.i.i.3, %bb
48+
%lsr.iv37 = phi i64 [ %lsr.iv.next38, %for.body.i.i.i.i.i.3 ], [ 0, %bb ]
49+
%lsr.iv = phi i64 [ %lsr.iv.next, %for.body.i.i.i.i.i.3 ], [ %idxprom, %bb ]
50+
%exitcond.not.i.i.i.i.i.2 = icmp eq i64 0, 1
51+
br i1 %exitcond.not.i.i.i.i.i.2, label %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, label %for.body.i.i.i.i.i.3
52+
53+
for.body.i.i.i.i.i.3: ; preds = %for.body.i.i.i.i.i
54+
%sunkaddr55 = mul i64 %lsr.iv37, 0
55+
%i = xor i64 %lsr.iv, 0
56+
%lsr.iv.next = or i64 %lsr.iv, 1
57+
%lsr.iv.next38 = or i64 %lsr.iv37, 1
58+
br label %for.body.i.i.i.i.i
59+
60+
_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i: ; preds = %for.body.i.i.i.i.i
61+
%idxprom12 = zext nneg i32 %i.0 to i64
62+
%arrayidx13 = getelementptr [2 x %"class.llvm::APInt."], ptr %r, i64 0, i64 %idxprom12
63+
store i32 0, ptr %arrayidx13, align 4
64+
br label %bb
65+
}
66+
67+
; This variant hit an assert and never reached the verifier error
68+
define void @_ZNK4llvm5APInt21multiplicativeInverseERKS0__assert(ptr %r) {
69+
; CHECK-LABEL: _ZNK4llvm5APInt21multiplicativeInverseERKS0__assert:
70+
; CHECK: # %bb.0: # %entry
71+
; CHECK-NEXT: xorl %eax, %eax
72+
; CHECK-NEXT: xorl %edx, %edx
73+
; CHECK-NEXT: xorl %ecx, %ecx
74+
; CHECK-NEXT: jmp .LBB1_1
75+
; CHECK-NEXT: .p2align 4, 0x90
76+
; CHECK-NEXT: .LBB1_4: # %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i
77+
; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
78+
; CHECK-NEXT: movl %edx, %edx
79+
; CHECK-NEXT: shlq $4, %rdx
80+
; CHECK-NEXT: movl $0, (%rdi,%rdx)
81+
; CHECK-NEXT: movl %ecx, %edx
82+
; CHECK-NEXT: .LBB1_1: # %bb
83+
; CHECK-NEXT: # =>This Loop Header: Depth=1
84+
; CHECK-NEXT: # Child Loop BB1_3 Depth 2
85+
; CHECK-NEXT: xorl $1, %ecx
86+
; CHECK-NEXT: movq %rcx, %rsi
87+
; CHECK-NEXT: shlq $4, %rsi
88+
; CHECK-NEXT: movq (%rsi), %rsi
89+
; CHECK-NEXT: xorl %r8d, %r8d
90+
; CHECK-NEXT: testb %al, %al
91+
; CHECK-NEXT: jne .LBB1_4
92+
; CHECK-NEXT: .p2align 4, 0x90
93+
; CHECK-NEXT: .LBB1_3: # %for.body.i.i.i.i.i.3
94+
; CHECK-NEXT: # Parent Loop BB1_1 Depth=1
95+
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
96+
; CHECK-NEXT: orq $1, %rsi
97+
; CHECK-NEXT: orq $1, %r8
98+
; CHECK-NEXT: testb %al, %al
99+
; CHECK-NEXT: je .LBB1_3
100+
; CHECK-NEXT: jmp .LBB1_4
101+
entry:
102+
br label %bb
103+
104+
bb: ; preds = %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, %entry
105+
%i.0 = phi i32 [ 0, %entry ], [ %xor, %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i ]
106+
%xor = xor i32 %i.0, 1
107+
%idxprom = zext nneg i32 %xor to i64
108+
%arrayidx = getelementptr [2 x %"class.llvm::APInt."], ptr null, i64 0, i64 %idxprom
109+
%i.i.i.i.i.i = load ptr, ptr %arrayidx, align 16
110+
%i.i.i.i.i.i36 = ptrtoint ptr %i.i.i.i.i.i to i64
111+
br label %for.body.i.i.i.i.i
112+
113+
for.body.i.i.i.i.i: ; preds = %for.body.i.i.i.i.i.3, %bb
114+
%lsr.iv37 = phi i64 [ %lsr.iv.next38, %for.body.i.i.i.i.i.3 ], [ 0, %bb ]
115+
%lsr.iv = phi i64 [ %lsr.iv.next, %for.body.i.i.i.i.i.3 ], [ %i.i.i.i.i.i36, %bb ]
116+
%exitcond.not.i.i.i.i.i.2 = icmp eq i64 0, 1
117+
br i1 %exitcond.not.i.i.i.i.i.2, label %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, label %for.body.i.i.i.i.i.3
118+
119+
for.body.i.i.i.i.i.3: ; preds = %for.body.i.i.i.i.i
120+
%sunkaddr55 = mul i64 %lsr.iv37, 0
121+
%i = xor i64 %lsr.iv, 0
122+
%lsr.iv.next = or i64 %lsr.iv, 1
123+
%lsr.iv.next38 = or i64 %lsr.iv37, 1
124+
br label %for.body.i.i.i.i.i
125+
126+
_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i: ; preds = %for.body.i.i.i.i.i
127+
%idxprom12 = zext nneg i32 %i.0 to i64
128+
%arrayidx13 = getelementptr [2 x %"class.llvm::APInt."], ptr %r, i64 0, i64 %idxprom12
129+
store i32 0, ptr %arrayidx13, align 4
130+
br label %bb
131+
}
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
# RUN: llc -mtriple=x86_64-pc-linux-gnu -verify-coalescing -run-pass=register-coalescer -o - %s | FileCheck %s
3+
4+
# The %1 = MOV32r0 is rematerialized as a subregister of %2. The
5+
# implicit-def %1 operand needs to have an undef added, just like the
6+
# main result operand.
7+
8+
---
9+
name: remat_into_subregister_set_undef_implicit_operand_subregisters
10+
tracksRegLiveness: true
11+
body: |
12+
; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters
13+
; CHECK: bb.0:
14+
; CHECK-NEXT: successors: %bb.1(0x80000000)
15+
; CHECK-NEXT: liveins: $rdi
16+
; CHECK-NEXT: {{ $}}
17+
; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
18+
; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]]
19+
; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_32bit, implicit-def [[MOV32r0_2]]
20+
; CHECK-NEXT: {{ $}}
21+
; CHECK-NEXT: bb.1:
22+
; CHECK-NEXT: successors: %bb.2(0x80000000)
23+
; CHECK-NEXT: {{ $}}
24+
; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
25+
; CHECK-NEXT: {{ $}}
26+
; CHECK-NEXT: bb.2:
27+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
28+
; CHECK-NEXT: {{ $}}
29+
; CHECK-NEXT: JCC_1 %bb.4, 5, implicit killed undef $eflags
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: bb.3:
32+
; CHECK-NEXT: successors: %bb.4(0x80000000)
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: {{ $}}
35+
; CHECK-NEXT: bb.4:
36+
; CHECK-NEXT: successors: %bb.1(0x80000000)
37+
; CHECK-NEXT: {{ $}}
38+
; CHECK-NEXT: dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
39+
; CHECK-NEXT: dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
40+
; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
41+
; CHECK-NEXT: JMP_1 %bb.1
42+
bb.0:
43+
liveins: $rdi
44+
45+
undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
46+
%1:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def %1
47+
undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
48+
49+
bb.1:
50+
%2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags
51+
52+
bb.2:
53+
JCC_1 %bb.4, 5, implicit killed undef $eflags
54+
55+
bb.3:
56+
57+
bb.4:
58+
dead %3:gr32 = MOV32rr %1
59+
dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
60+
%1:gr32 = COPY %2.sub_32bit
61+
JMP_1 %bb.1
62+
63+
...
64+
65+
# Same, except the implicit-def on the original instruction already
66+
# has a subregister index.
67+
68+
---
69+
name: remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
70+
tracksRegLiveness: true
71+
body: |
72+
; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
73+
; CHECK: bb.0:
74+
; CHECK-NEXT: successors: %bb.1(0x80000000)
75+
; CHECK-NEXT: liveins: $rdi
76+
; CHECK-NEXT: {{ $}}
77+
; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
78+
; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_1]].sub_8bit
79+
; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_8bit, implicit-def [[MOV32r0_2]]
80+
; CHECK-NEXT: {{ $}}
81+
; CHECK-NEXT: bb.1:
82+
; CHECK-NEXT: successors: %bb.2(0x80000000)
83+
; CHECK-NEXT: {{ $}}
84+
; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
85+
; CHECK-NEXT: {{ $}}
86+
; CHECK-NEXT: bb.2:
87+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
88+
; CHECK-NEXT: {{ $}}
89+
; CHECK-NEXT: JCC_1 %bb.4, 5, implicit killed undef $eflags
90+
; CHECK-NEXT: {{ $}}
91+
; CHECK-NEXT: bb.3:
92+
; CHECK-NEXT: successors: %bb.4(0x80000000)
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: {{ $}}
95+
; CHECK-NEXT: bb.4:
96+
; CHECK-NEXT: successors: %bb.1(0x80000000)
97+
; CHECK-NEXT: {{ $}}
98+
; CHECK-NEXT: dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
99+
; CHECK-NEXT: dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
100+
; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
101+
; CHECK-NEXT: JMP_1 %bb.1
102+
bb.0:
103+
liveins: $rdi
104+
105+
undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
106+
%1:gr32 = MOV32r0 implicit-def dead $eflags, undef implicit-def %1.sub_8bit
107+
undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
108+
109+
bb.1:
110+
%2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags
111+
112+
bb.2:
113+
JCC_1 %bb.4, 5, implicit killed undef $eflags
114+
115+
bb.3:
116+
117+
bb.4:
118+
dead %3:gr32 = MOV32rr %1
119+
dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
120+
%1:gr32 = COPY %2.sub_32bit
121+
JMP_1 %bb.1
122+
123+
...

0 commit comments

Comments
 (0)