Skip to content

Commit 26fd693

Browse files
authored
RegisterCoalescer: Fix creating full / empty subrange on undef subreg use (llvm#117936)
1 parent 12cefcc commit 26fd693

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1888,7 +1888,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
18881888

18891889
// A subreg use of a partially undef (super) register may be a complete
18901890
// undef use now and then has to be marked that way.
1891-
if (MO.isUse() && !DstIsPhys) {
1891+
if (MO.isUse() && !MO.isUndef() && !DstIsPhys) {
18921892
unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
18931893
if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
18941894
if (!DstInt->hasSubRanges()) {
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
3+
4+
# Test that an invalid subreg range is not introduced due to the undef
5+
# %1.sub0 use. An undef use with a subregister index would end up
6+
# introducing subranges for the empty and full lanemasks.
7+
8+
---
9+
name: merge_with_undef_subreg_use_subrange_lanemask_is_invalid
10+
tracksRegLiveness: true
11+
machineFunctionInfo:
12+
isEntryFunction: true
13+
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
14+
stackPtrOffsetReg: '$sgpr32'
15+
sgprForEXECCopy: '$sgpr100_sgpr101'
16+
body: |
17+
; CHECK-LABEL: name: merge_with_undef_subreg_use_subrange_lanemask_is_invalid
18+
; CHECK: bb.0:
19+
; CHECK-NEXT: successors: %bb.1(0x80000000)
20+
; CHECK-NEXT: liveins: $sgpr8_sgpr9
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
23+
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), addrspace 4)
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: bb.1:
26+
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
27+
; CHECK-NEXT: {{ $}}
28+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
29+
; CHECK-NEXT: S_BRANCH %bb.2
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: bb.2:
32+
; CHECK-NEXT: successors: %bb.3(0x80000000)
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: undef [[S_LOAD_DWORDX4_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 0
35+
; CHECK-NEXT: {{ $}}
36+
; CHECK-NEXT: bb.3:
37+
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]].sub1:sgpr_128 = COPY undef [[S_LOAD_DWORDX4_IMM]].sub0
38+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_LOAD_DWORDX4_IMM]]
39+
bb.0:
40+
liveins: $sgpr8_sgpr9
41+
42+
%0:sgpr_64 = COPY $sgpr8_sgpr9
43+
%1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), addrspace 4)
44+
45+
bb.1:
46+
%2:sgpr_128 = COPY %1
47+
S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
48+
S_BRANCH %bb.2
49+
50+
bb.2:
51+
undef %3.sub0:sgpr_128 = S_MOV_B32 0
52+
%2:sgpr_128 = COPY killed %3
53+
54+
bb.3:
55+
%4:sgpr_128 = COPY killed %2
56+
%4.sub1:sgpr_128 = COPY undef %1.sub0
57+
S_ENDPGM 0, implicit %4
58+
59+
...

0 commit comments

Comments
 (0)