Skip to content

Commit 26fa399

Browse files
[RegisterCoalescer] Fix SUBREG_TO_REG handling in the RegisterCoalescer. (#96839)
The issue with the handling of the SUBREG_TO_REG is that we don't join the subranges correctly when we join live ranges across the SUBREG_TO_REG. For example when joining across this: ``` 32B %2:gr64_nosp = SUBREG_TO_REG 0, %0:gr32, %subreg.sub_32bit ``` we want to join these live ranges: ``` %0 [16r,32r:0) 0@16r weight:0.000000e+00 %2 [32r,112r:0) 0@32r weight:0.000000e+00 ``` Before the fix the range for the resulting merged `%2` is: ``` %2 [16r,112r:0) 0@16r weight:0.000000e+00 ``` After the fix it is now this: ``` %2 [16r,112r:0) 0@16r L000000000000000F [16r,112r:0) 0@16r weight:0.000000e+00 ``` Two tests are added to this fix. The X86 test fails without the patch. The PowerPC test passes with and without the patch but is added as a way track future possible failures when register classes are changed in a future patch.
1 parent 4f79ef4 commit 26fa399

File tree

3 files changed

+78
-0
lines changed

3 files changed

+78
-0
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3673,6 +3673,13 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
36733673

36743674
LHSVals.pruneSubRegValues(LHS, ShrinkMask);
36753675
RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3676+
} else if (TrackSubRegLiveness && !CP.getDstIdx() && CP.getSrcIdx()) {
3677+
LHS.createSubRangeFrom(LIS->getVNInfoAllocator(),
3678+
CP.getNewRC()->getLaneMask(), LHS);
3679+
mergeSubRangeInto(LHS, RHS, TRI->getSubRegIndexLaneMask(CP.getSrcIdx()), CP,
3680+
CP.getDstIdx());
3681+
LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3682+
LHSVals.pruneSubRegValues(LHS, ShrinkMask);
36763683
}
36773684

36783685
// The merging algorithm in LiveInterval::join() can't handle conflicting
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s \
3+
# RUN: -verify-coalescing --run-pass=register-coalescer -o - | FileCheck %s
4+
5+
# Check that the register coalescer correctly handles merging live ranges over
6+
# SUBREG_TO_REG on PowerPC. The -verify-coalescing option will give an error if
7+
# this is incorrect.
8+
9+
---
10+
name: check_subregs
11+
alignment: 16
12+
tracksRegLiveness: true
13+
body: |
14+
bb.0:
15+
liveins: $x3
16+
17+
; CHECK-LABEL: name: check_subregs
18+
; CHECK: liveins: $x3
19+
; CHECK-NEXT: {{ $}}
20+
; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
21+
; CHECK-NEXT: [[LFSUX:%[0-9]+]]:f8rc, dead [[LFSUX1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSUX [[COPY]], [[COPY]]
22+
; CHECK-NEXT: undef [[FRSP:%[0-9]+]].sub_64:vslrc = FRSP [[LFSUX]], implicit $rm
23+
; CHECK-NEXT: [[XVCVDPSP:%[0-9]+]]:vrrc = XVCVDPSP [[FRSP]], implicit $rm
24+
; CHECK-NEXT: $v2 = COPY [[XVCVDPSP]]
25+
; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $v2
26+
%0:g8rc_and_g8rc_nox0 = COPY $x3
27+
%1:f8rc, %2:g8rc_and_g8rc_nox0 = LFSUX %0, %0
28+
%3:f4rc = FRSP killed %1, implicit $rm
29+
%4:vslrc = SUBREG_TO_REG 1, %3, %subreg.sub_64
30+
%5:vrrc = XVCVDPSP killed %4, implicit $rm
31+
$v2 = COPY %5
32+
BLR8 implicit $lr8, implicit $rm, implicit $v2
33+
...
34+

llvm/test/CodeGen/X86/subreg-fail.mir

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple x86_64-unknown-unknown %s \
3+
# RUN: -verify-coalescing -enable-subreg-liveness \
4+
# RUN: --run-pass=register-coalescer -o - | FileCheck %s
5+
6+
# Check that the register coalescer correctly handles merging live ranges over
7+
# SUBREG_TO_REG on X86. The -verify-coalescing option will give an error if
8+
# this is incorrect.
9+
10+
---
11+
name: test1
12+
alignment: 16
13+
tracksRegLiveness: true
14+
body: |
15+
bb.0:
16+
; CHECK-LABEL: name: test1
17+
; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
18+
; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
19+
; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags
20+
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg
21+
; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags
22+
; CHECK-NEXT: MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, [[LEA64r]].sub_32bit :: (volatile store (s32) into `ptr undef`)
23+
; CHECK-NEXT: RET 0, undef $eax
24+
%0:gr32 = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
25+
%2:gr64_nosp = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
26+
%3:gr32 = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
27+
%5:gr64 = SUBREG_TO_REG 0, killed %3, %subreg.sub_32bit
28+
%6:gr64 = COPY killed %5
29+
%6:gr64 = SHL64ri %6, 32, implicit-def dead $eflags
30+
%7:gr64 = LEA64r killed %6, 1, killed %2, 256, $noreg
31+
%8:gr64 = COPY killed %7
32+
%8:gr64 = SHR64ri %8, 8, implicit-def dead $eflags
33+
%9:gr32 = COPY killed %8.sub_32bit
34+
MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, killed %9 :: (volatile store (s32) into `ptr undef`)
35+
RET 0, undef $eax
36+
37+
...

0 commit comments

Comments
 (0)