[HACK] X86: Disable isCopyInstrImpl for undef subregister defs

arsenm · arsenm · commit c26dfc81e254 · 2023-07-28T13:33:28.000-04:00
This is a workaround for a coalescer bug where coalescing SUBREG_TO_REG ends up losing the liveness of the high bits of the source register. The result is an incorrect undef subregister def instead of preserving the high values. Work around the observed failure after the resulting mov is eliminated during allocation until a proper fix is ready. I believe the proper fix is to make SUBREG_TO_REG use a tied operand. The test should catch a regression originally observed after b7836d8 and should not show a difference after a496c8b is reverted. https://reviews.llvm.org/D156164
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3648,8 +3648,15 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
 std::optional<DestSourcePair>
 X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
-  if (MI.isMoveReg())
+  if (MI.isMoveReg()) {
+    // FIXME: Dirty hack for apparent invariant that doesn't hold when
+    // subreg_to_reg is coalesced with ordinary copies, such that the bits that
+    // were asserted as 0 are now undef.
+    if (MI.getOperand(0).isUndef() && MI.getOperand(0).getSubReg())
+      return std::nullopt;
+
     return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+  }
   return std::nullopt;
 }
 
diff --git a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s
+
+; Test a bad interaction between register allocation and the register
+; coalescer. The coalescer lost the high subregister def when
+; SUBREG_TO_REG was used to implement i32->i64 zext. The allocator
+; then recognized the undef subregister defing MOV as a copy
+; instruction, resulting in the users seeing different undef values.
+
+
+%struct.wibble = type { %struct.wombat }
+%struct.wombat = type { %struct.ham, [3 x i8] }
+%struct.ham = type { %struct.zot }
+%struct.zot = type { %struct.blam }
+%struct.blam = type { %struct.ham.0 }
+%struct.ham.0 = type { %struct.bar }
+%struct.bar = type { %struct.bar.1 }
+%struct.bar.1 = type { %struct.baz, i8 }
+%struct.baz = type { %struct.snork }
+%struct.snork = type <{ %struct.spam, i8, [3 x i8] }>
+%struct.spam = type { %struct.snork.2, %struct.snork.2 }
+%struct.snork.2 = type { i32 }
+
+define void @foo(ptr %arg3, i1 %icmp16) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    xorl %r15d, %r15d
+; CHECK-NEXT:    xorl %r12d, %r12d
+; CHECK-NEXT:    # implicit-def: $r13
+; CHECK-NEXT:    jmp .LBB0_2
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_1: # %bb5
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    orl $1, %r12d
+; CHECK-NEXT:    movq %r14, %r15
+; CHECK-NEXT:  .LBB0_2: # %bb7
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    callq *%rax
+; CHECK-NEXT:    movl %r13d, %r13d
+; CHECK-NEXT:    testb $1, %bl
+; CHECK-NEXT:    movl $0, %r14d
+; CHECK-NEXT:    jne .LBB0_1
+; CHECK-NEXT:  # %bb.3: # %bb17
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    xorl %r14d, %r14d
+; CHECK-NEXT:    testq %r15, %r15
+; CHECK-NEXT:    sete %r14b
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    callq *%rax
+; CHECK-NEXT:    shlq $4, %r14
+; CHECK-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; CHECK-NEXT:    movl %r13d, 0
+; CHECK-NEXT:    movb $0, 4
+; CHECK-NEXT:    jmp .LBB0_1
+bb:
+  br label %bb7
+
+bb5:                                              ; preds = %bb17, %bb7
+  %phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb7 ]
+  %add = or i32 %phi9, 1
+  %icmp = icmp eq i32 %phi9, 0
+  br label %bb7
+
+bb7:                                              ; preds = %bb5, %bb
+  %phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ]
+  %phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+  %phi10 = phi i40 [ undef, %bb ], [ %and, %bb5 ]
+  %call = call ptr null()
+  %and = and i40 %phi10, 4294967295
+  %icmp161 = icmp ugt ptr %phi8, null
+  br i1 %icmp16, label %bb5, label %bb17
+
+bb17:                                             ; preds = %bb7
+  %icmp18 = icmp eq ptr %phi8, null
+  %zext = zext i1 %icmp18 to i64
+  %call19 = call ptr null(i64 0)
+  %getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext
+  store i40 %and, ptr null, align 4
+  br label %bb5
+}
+
+attributes #0 = { "frame-pointer"="all" }