Skip to content

Commit c26dfc8

Browse files
committed
[HACK] X86: Disable isCopyInstrImpl for undef subregister defs
This is a workaround for a coalescer bug where coalescing SUBREG_TO_REG ends up losing the liveness of the high bits of the source register. The result is an incorrect undef subregister def instead of preserving the high values. Work around the observed failure after the resulting mov is eliminated during allocation until a proper fix is ready. I believe the proper fix is to make SUBREG_TO_REG use a tied operand. The test should catch a regression originally observed after b7836d8 and should not show a difference after a496c8b is reverted. https://reviews.llvm.org/D156164
1 parent 5d976ed commit c26dfc8

File tree

2 files changed

+110
-1
lines changed

2 files changed

+110
-1
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3648,8 +3648,15 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
36483648

36493649
std::optional<DestSourcePair>
36503650
X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
3651-
if (MI.isMoveReg())
3651+
if (MI.isMoveReg()) {
3652+
// FIXME: Dirty hack for apparent invariant that doesn't hold when
3653+
// subreg_to_reg is coalesced with ordinary copies, such that the bits that
3654+
// were asserted as 0 are now undef.
3655+
if (MI.getOperand(0).isUndef() && MI.getOperand(0).getSubReg())
3656+
return std::nullopt;
3657+
36523658
return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
3659+
}
36533660
return std::nullopt;
36543661
}
36553662

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s
3+
4+
; Test a bad interaction between register allocation and the register
5+
; coalescer. The coalescer lost the high subregister def when
6+
; SUBREG_TO_REG was used to implement i32->i64 zext. The allocator
7+
; then recognized the undef subregister defing MOV as a copy
8+
; instruction, resulting in the users seeing different undef values.
9+
10+
11+
%struct.wibble = type { %struct.wombat }
12+
%struct.wombat = type { %struct.ham, [3 x i8] }
13+
%struct.ham = type { %struct.zot }
14+
%struct.zot = type { %struct.blam }
15+
%struct.blam = type { %struct.ham.0 }
16+
%struct.ham.0 = type { %struct.bar }
17+
%struct.bar = type { %struct.bar.1 }
18+
%struct.bar.1 = type { %struct.baz, i8 }
19+
%struct.baz = type { %struct.snork }
20+
%struct.snork = type <{ %struct.spam, i8, [3 x i8] }>
21+
%struct.spam = type { %struct.snork.2, %struct.snork.2 }
22+
%struct.snork.2 = type { i32 }
23+
24+
define void @foo(ptr %arg3, i1 %icmp16) #0 {
25+
; CHECK-LABEL: foo:
26+
; CHECK: # %bb.0: # %bb
27+
; CHECK-NEXT: pushq %rbp
28+
; CHECK-NEXT: .cfi_def_cfa_offset 16
29+
; CHECK-NEXT: .cfi_offset %rbp, -16
30+
; CHECK-NEXT: movq %rsp, %rbp
31+
; CHECK-NEXT: .cfi_def_cfa_register %rbp
32+
; CHECK-NEXT: pushq %r15
33+
; CHECK-NEXT: pushq %r14
34+
; CHECK-NEXT: pushq %r13
35+
; CHECK-NEXT: pushq %r12
36+
; CHECK-NEXT: pushq %rbx
37+
; CHECK-NEXT: pushq %rax
38+
; CHECK-NEXT: .cfi_offset %rbx, -56
39+
; CHECK-NEXT: .cfi_offset %r12, -48
40+
; CHECK-NEXT: .cfi_offset %r13, -40
41+
; CHECK-NEXT: .cfi_offset %r14, -32
42+
; CHECK-NEXT: .cfi_offset %r15, -24
43+
; CHECK-NEXT: movl %esi, %ebx
44+
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
45+
; CHECK-NEXT: xorl %r15d, %r15d
46+
; CHECK-NEXT: xorl %r12d, %r12d
47+
; CHECK-NEXT: # implicit-def: $r13
48+
; CHECK-NEXT: jmp .LBB0_2
49+
; CHECK-NEXT: .p2align 4, 0x90
50+
; CHECK-NEXT: .LBB0_1: # %bb5
51+
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
52+
; CHECK-NEXT: orl $1, %r12d
53+
; CHECK-NEXT: movq %r14, %r15
54+
; CHECK-NEXT: .LBB0_2: # %bb7
55+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
56+
; CHECK-NEXT: xorl %eax, %eax
57+
; CHECK-NEXT: callq *%rax
58+
; CHECK-NEXT: movl %r13d, %r13d
59+
; CHECK-NEXT: testb $1, %bl
60+
; CHECK-NEXT: movl $0, %r14d
61+
; CHECK-NEXT: jne .LBB0_1
62+
; CHECK-NEXT: # %bb.3: # %bb17
63+
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
64+
; CHECK-NEXT: xorl %r14d, %r14d
65+
; CHECK-NEXT: testq %r15, %r15
66+
; CHECK-NEXT: sete %r14b
67+
; CHECK-NEXT: xorl %edi, %edi
68+
; CHECK-NEXT: xorl %eax, %eax
69+
; CHECK-NEXT: callq *%rax
70+
; CHECK-NEXT: shlq $4, %r14
71+
; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
72+
; CHECK-NEXT: movl %r13d, 0
73+
; CHECK-NEXT: movb $0, 4
74+
; CHECK-NEXT: jmp .LBB0_1
75+
bb:
76+
br label %bb7
77+
78+
bb5: ; preds = %bb17, %bb7
79+
%phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb7 ]
80+
%add = or i32 %phi9, 1
81+
%icmp = icmp eq i32 %phi9, 0
82+
br label %bb7
83+
84+
bb7: ; preds = %bb5, %bb
85+
%phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ]
86+
%phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
87+
%phi10 = phi i40 [ undef, %bb ], [ %and, %bb5 ]
88+
%call = call ptr null()
89+
%and = and i40 %phi10, 4294967295
90+
%icmp161 = icmp ugt ptr %phi8, null
91+
br i1 %icmp16, label %bb5, label %bb17
92+
93+
bb17: ; preds = %bb7
94+
%icmp18 = icmp eq ptr %phi8, null
95+
%zext = zext i1 %icmp18 to i64
96+
%call19 = call ptr null(i64 0)
97+
%getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext
98+
store i40 %and, ptr null, align 4
99+
br label %bb5
100+
}
101+
102+
attributes #0 = { "frame-pointer"="all" }

0 commit comments

Comments
 (0)