Skip to content

Commit 48d0e84

Browse files
[RegisterCoalescer] Fix issue in the RegisterCoalescer.
Two tests are added to this fix. The X86 test fails without the patch. The PowerPC test passes with and without the patch but is added as a way track future possible failures when register classes are changed in a future patch.
1 parent 892c58c commit 48d0e84

File tree

3 files changed

+158
-0
lines changed

3 files changed

+158
-0
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3671,6 +3671,14 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
36713671
// having stale segments.
36723672
LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
36733673

3674+
LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3675+
RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3676+
} else if (TrackSubRegLiveness && !CP.getDstIdx() && CP.getSrcIdx()) {
3677+
LHS.createSubRangeFrom(LIS->getVNInfoAllocator(),
3678+
CP.getNewRC()->getLaneMask(), LHS);
3679+
mergeSubRangeInto(LHS, RHS, TRI->getSubRegIndexLaneMask(CP.getSrcIdx()), CP,
3680+
CP.getDstIdx());
3681+
LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
36743682
LHSVals.pruneSubRegValues(LHS, ShrinkMask);
36753683
RHSVals.pruneSubRegValues(LHS, ShrinkMask);
36763684
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 -x mir < %s \
2+
# RUN: -verify-machineinstrs --run-pass=register-coalescer -o - | FileCheck %s
3+
4+
---
5+
name: check_subregs
6+
alignment: 16
7+
tracksRegLiveness: true
8+
body: |
9+
bb.0.entry:
10+
liveins: $x3
11+
12+
%0:g8rc_and_g8rc_nox0 = COPY $x3
13+
%3:f8rc, %4:g8rc_and_g8rc_nox0 = LFSUX %0, %0
14+
%5:f4rc = FRSP killed %3, implicit $rm
15+
%22:vslrc = SUBREG_TO_REG 1, %5, %subreg.sub_64
16+
%11:vrrc = XVCVDPSP killed %22, implicit $rm
17+
$v2 = COPY %11
18+
BLR8 implicit $lr8, implicit $rm, implicit $v2
19+
...
20+
21+
# CHECK: %0:g8rc_and_g8rc_nox0 = COPY $x3
22+
# CHECK-NEXT: %1:f8rc, dead %2:g8rc_and_g8rc_nox0 = LFSUX %0, %0
23+
# CHECK-NEXT: undef %4.sub_64:vslrc = FRSP %1, implicit $rm
24+
# CHECK-NEXT: %5:vrrc = XVCVDPSP %4, implicit $rm
25+
# CHECK-NEXT: $v2 = COPY %5
26+
# CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $v2

llvm/test/CodeGen/X86/subreg-fail.mir

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# RUN: llc -mtriple x86_64-unknown-unknown -x mir < %s \
2+
# RUN: -verify-machineinstrs -enable-subreg-liveness=true \
3+
# RUN: --run-pass=register-coalescer -o - | FileCheck %s
4+
5+
--- |
6+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
7+
target triple = "x86_64-unknown-unknown"
8+
9+
%pair = type { i64, double }
10+
%t21 = type { ptr }
11+
%t13 = type { ptr, %t15, %t15 }
12+
%t15 = type { i8, i32, i32 }
13+
14+
@__force_order = external hidden global i32, align 4
15+
@.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
16+
@a = external global i32, align 4
17+
@fn1.g = private unnamed_addr constant [9 x ptr] [ptr null, ptr @a, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null], align 16
18+
@e = external global i32, align 4
19+
@__stack_chk_guard = external dso_local global ptr
20+
21+
; Function Attrs: nounwind ssp
22+
define i32 @test1() #0 {
23+
entry:
24+
%tmp5.i = load volatile i32, ptr undef, align 4
25+
%conv.i = zext i32 %tmp5.i to i64
26+
%tmp12.i = load volatile i32, ptr undef, align 4
27+
%conv13.i = zext i32 %tmp12.i to i64
28+
%shl.i = shl i64 %conv13.i, 32
29+
%or.i = or i64 %shl.i, %conv.i
30+
%add16.i = add i64 %or.i, 256
31+
%shr.i = lshr i64 %add16.i, 8
32+
%conv19.i = trunc i64 %shr.i to i32
33+
store volatile i32 %conv19.i, ptr undef, align 4
34+
ret i32 undef
35+
}
36+
...
37+
---
38+
name: test1
39+
alignment: 16
40+
exposesReturnsTwice: false
41+
legalized: false
42+
regBankSelected: false
43+
selected: false
44+
failedISel: false
45+
tracksRegLiveness: true
46+
hasWinCFI: false
47+
callsEHReturn: false
48+
callsUnwindInit: false
49+
hasEHCatchret: false
50+
hasEHScopes: false
51+
hasEHFunclets: false
52+
isOutlined: false
53+
debugInstrRef: true
54+
failsVerification: false
55+
tracksDebugUserValues: false
56+
registers:
57+
- { id: 0, class: gr32, preferred-register: '' }
58+
- { id: 1, class: gr64, preferred-register: '' }
59+
- { id: 2, class: gr64_nosp, preferred-register: '' }
60+
- { id: 3, class: gr32, preferred-register: '' }
61+
- { id: 4, class: gr64, preferred-register: '' }
62+
- { id: 5, class: gr64, preferred-register: '' }
63+
- { id: 6, class: gr64, preferred-register: '' }
64+
- { id: 7, class: gr64, preferred-register: '' }
65+
- { id: 8, class: gr64, preferred-register: '' }
66+
- { id: 9, class: gr32, preferred-register: '' }
67+
- { id: 10, class: gr64, preferred-register: '' }
68+
- { id: 11, class: gr32, preferred-register: '' }
69+
liveins: []
70+
frameInfo:
71+
isFrameAddressTaken: false
72+
isReturnAddressTaken: false
73+
hasStackMap: false
74+
hasPatchPoint: false
75+
stackSize: 0
76+
offsetAdjustment: 0
77+
maxAlignment: 1
78+
adjustsStack: false
79+
hasCalls: false
80+
stackProtector: ''
81+
functionContext: ''
82+
maxCallFrameSize: 4294967295
83+
cvBytesOfCalleeSavedRegisters: 0
84+
hasOpaqueSPAdjustment: false
85+
hasVAStart: false
86+
hasMustTailInVarArgFunc: false
87+
hasTailCall: false
88+
isCalleeSavedInfoValid: false
89+
localFrameSize: 0
90+
savePoint: ''
91+
restorePoint: ''
92+
fixedStack: []
93+
stack: []
94+
entry_values: []
95+
callSites: []
96+
debugValueSubstitutions: []
97+
constants: []
98+
machineFunctionInfo:
99+
amxProgModel: None
100+
body: |
101+
bb.0.entry:
102+
%0:gr32 = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
103+
%2:gr64_nosp = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
104+
%3:gr32 = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
105+
%5:gr64 = SUBREG_TO_REG 0, killed %3, %subreg.sub_32bit
106+
%6:gr64 = COPY killed %5
107+
%6:gr64 = SHL64ri %6, 32, implicit-def dead $eflags
108+
%7:gr64 = LEA64r killed %6, 1, killed %2, 256, $noreg
109+
%8:gr64 = COPY killed %7
110+
%8:gr64 = SHR64ri %8, 8, implicit-def dead $eflags
111+
%9:gr32 = COPY killed %8.sub_32bit
112+
MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, killed %9 :: (volatile store (s32) into `ptr undef`)
113+
RET 0, undef $eax
114+
115+
...
116+
117+
# CHECK: undef %2.sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
118+
# CHECK-NEXT: undef %6.sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
119+
# CHECK-NEXT: %6:gr64_with_sub_8bit = SHL64ri %6, 32, implicit-def dead $eflags
120+
# CHECK-NEXT: %8:gr64_with_sub_8bit = LEA64r %6, 1, %2, 256, $noreg
121+
# CHECK-NEXT: %8:gr64_with_sub_8bit = SHR64ri %8, 8, implicit-def dead $eflags
122+
# CHECK-NEXT: MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, %8.sub_32bit :: (volatile store (s32) into `ptr undef`)
123+
# CHECK-NEXT: RET 0, undef $eax
124+

0 commit comments

Comments
 (0)