[TwoAddressInstruction] Recompute live intervals for partial defs (#74431)

perlfu · web-flow · commit 6752f1517dcf · 2024-01-12T13:26:01.000+09:00
Force live interval recomputation for a register if its definition is
narrowed to become partial. The live interval repair process cannot
otherwise detect these changes.
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1936,13 +1936,16 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
   }
 
   bool DefEmitted = false;
+  bool DefIsPartial = false;
   for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
     MachineOperand &UseMO = MI.getOperand(i);
     Register SrcReg = UseMO.getReg();
     unsigned SubIdx = MI.getOperand(i+1).getImm();
     // Nothing needs to be inserted for undef operands.
-    if (UseMO.isUndef())
+    if (UseMO.isUndef()) {
+      DefIsPartial = true;
       continue;
+    }
 
     // Defer any kill flag to the last operand using SrcReg. Otherwise, we
     // might insert a COPY that uses SrcReg after is was killed.
@@ -1987,8 +1990,14 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
     for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
       MI.removeOperand(j);
   } else {
-    if (LIS)
+    if (LIS) {
+      // Force interval recomputation if we moved from full definition
+      // of register to partial.
+      if (DefIsPartial && LIS->hasInterval(DstReg) &&
+          MRI->shouldTrackSubRegLiveness(DstReg))
+        LIS->removeInterval(DstReg);
       LIS->RemoveMachineInstrFromMaps(MI);
+    }
 
     LLVM_DEBUG(dbgs() << "Eliminated: " << MI);
     MI.eraseFromParent();
diff --git a/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll b/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -early-live-intervals -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
 
 ; GCN-LABEL: {{^}}gws_init_odd_reg:
 ; GFX908-DAG: ds_gws_init v1 gds
diff --git a/llvm/test/CodeGen/AMDGPU/early-lis-two-address-partial-def.mir b/llvm/test/CodeGen/AMDGPU/early-lis-two-address-partial-def.mir
@@ -0,0 +1,52 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=liveintervals -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefix=GFX90A %s
+
+---
+name:            aligned_partial_vgpr_64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1
+
+    ; GFX90A-LABEL: name: aligned_partial_vgpr_64
+    ; GFX90A: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr7
+    ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr6
+    ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr5
+    ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+    ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+    ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+    ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+    ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+    ; GFX90A-NEXT: undef [[COPY10:%[0-9]+]].sub0:sgpr_256 = COPY [[COPY9]]
+    ; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub1:sgpr_256 = COPY [[COPY8]]
+    ; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub2:sgpr_256 = COPY [[COPY7]]
+    ; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub3:sgpr_256 = COPY [[COPY6]]
+    ; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub4:sgpr_256 = COPY [[COPY5]]
+    ; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub5:sgpr_256 = COPY [[COPY4]]
+    ; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub6:sgpr_256 = COPY [[COPY3]]
+    ; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub7:sgpr_256 = COPY [[COPY2]]
+    ; GFX90A-NEXT: undef [[COPY11:%[0-9]+]].sub0:vreg_64_align2 = COPY [[COPY]]
+    ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+    ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a [[COPY12]], [[COPY11]].sub0, [[COPY10]], 1, -1, 1, 0, 0, 0, implicit $exec, implicit [[COPY11]] :: (volatile dereferenceable load store (s32), addrspace 8)
+    ; GFX90A-NEXT: $vgpr0 = COPY [[COPY12]]
+    ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+    %9:vgpr_32 = COPY $vgpr1
+    %8:vgpr_32 = COPY $vgpr0
+    %7:sgpr_32 = COPY $sgpr7
+    %6:sgpr_32 = COPY $sgpr6
+    %5:sgpr_32 = COPY $sgpr5
+    %4:sgpr_32 = COPY $sgpr4
+    %3:sgpr_32 = COPY $sgpr3
+    %2:sgpr_32 = COPY $sgpr2
+    %1:sgpr_32 = COPY $sgpr1
+    %0:sgpr_32 = COPY $sgpr0
+    %11:sgpr_256 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3, %4:sgpr_32, %subreg.sub4, %5:sgpr_32, %subreg.sub5, %6:sgpr_32, %subreg.sub6, %7:sgpr_32, %subreg.sub7
+    %14:vreg_64_align2 = REG_SEQUENCE %9:vgpr_32, %subreg.sub0, undef %13:vgpr_32, %subreg.sub1
+    %12:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a %8:vgpr_32, %14.sub0:vreg_64_align2, %11:sgpr_256, 1, -1, 1, 0, 0, 0, implicit $exec, implicit %14:vreg_64_align2 :: (volatile dereferenceable load store (s32), addrspace 8)
+    $vgpr0 = COPY %12:vgpr_32
+    SI_RETURN_TO_EPILOG $vgpr0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX90A %s
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -early-live-intervals < %s | FileCheck -check-prefixes=GCN,GFX90A %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX12 %s
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -early-live-intervals < %s | FileCheck -check-prefixes=GCN %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
 
 ; GCN-LABEL: {{^}}load_1d:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s
+; RUN: llc -march=amdgcn -mcpu=gfx90a -early-live-intervals -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,GISEL %s
 
 ; GFX90A-LABEL: {{^}}sample_1d:

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s`
	`2`	`+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -early-live-intervals < %s \| FileCheck -check-prefixes=GCN %s`
`2`	`3`	`; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s \| FileCheck -check-prefixes=GCN %s`
`3`	`4`
`4`	`5`	`; GCN-LABEL: {{^}}load_1d:`