Skip to content

Commit 3c81356

Browse files
author
git apple-llvm automerger
committed
Merge commit '04ee0e0753b1' from apple/stable/20200714 into swift/master-rebranch
2 parents 1eac44b + 04ee0e0 commit 3c81356

File tree

7 files changed

+246
-151
lines changed

7 files changed

+246
-151
lines changed

llvm/lib/Target/PowerPC/PPCFrameLowering.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,11 +1466,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
14661466
.addImm(0)
14671467
.addImm(32 - Log2(MaxAlign))
14681468
.addImm(31);
1469-
BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX),
1469+
BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC),
14701470
SPReg)
1471-
.addReg(FPReg)
1472-
.addReg(SPReg)
1473-
.addReg(ScratchReg);
1471+
.addReg(ScratchReg)
1472+
.addReg(SPReg);
14741473
}
14751474
// Probe residual part.
14761475
if (NegResidualSize) {

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11950,18 +11950,34 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
1195011950
Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1195111951
Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
1195211952
Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11953-
11954-
// Get the canonical FinalStackPtr like what
11955-
// PPCRegisterInfo::lowerDynamicAlloc does.
11956-
BuildMI(*MBB, {MI}, DL,
11957-
TII->get(isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64
11958-
: PPC::PREPARE_PROBED_ALLOCA_32),
11959-
FramePointer)
11960-
.addDef(FinalStackPtr)
11953+
Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11954+
11955+
// Since value of NegSizeReg might be realigned in prologepilog, insert a
11956+
// PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11957+
// NegSize.
11958+
unsigned ProbeOpc;
11959+
if (!MRI.hasOneNonDBGUse(NegSizeReg))
11960+
ProbeOpc =
11961+
isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11962+
else
11963+
// By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11964+
// and NegSizeReg will be allocated in the same phyreg to avoid
11965+
// redundant copy when NegSizeReg has only one use which is current MI and
11966+
// will be replaced by PREPARE_PROBED_ALLOCA then.
11967+
ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11968+
: PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11969+
BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11970+
.addDef(ActualNegSizeReg)
1196111971
.addReg(NegSizeReg)
1196211972
.add(MI.getOperand(2))
1196311973
.add(MI.getOperand(3));
1196411974

11975+
// Calculate final stack pointer, which equals to SP + ActualNegSize.
11976+
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11977+
FinalStackPtr)
11978+
.addReg(SPReg)
11979+
.addReg(ActualNegSizeReg);
11980+
1196511981
// Materialize a scratch register for update.
1196611982
int64_t NegProbeSize = -(int64_t)ProbeSize;
1196711983
assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
@@ -11982,7 +11998,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
1198211998
// Probing leading residual part.
1198311999
Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
1198412000
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11985-
.addReg(NegSizeReg)
12001+
.addReg(ActualNegSizeReg)
1198612002
.addReg(ScratchReg);
1198712003
Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
1198812004
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
@@ -11991,7 +12007,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
1199112007
Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
1199212008
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
1199312009
.addReg(Mul)
11994-
.addReg(NegSizeReg);
12010+
.addReg(ActualNegSizeReg);
1199512011
BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1199612012
.addReg(FramePointer)
1199712013
.addReg(SPReg)

llvm/lib/Target/PowerPC/PPCInstr64Bit.td

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,9 +431,14 @@ def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
431431
(ins g8rc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_64",
432432
[(set i64:$result,
433433
(PPCprobedalloca i64:$negsize, iaddr:$fpsi))]>;
434-
def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
435-
g8rc:$sp),
434+
def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs
435+
g8rc:$fp, g8rc:$actual_negsize),
436436
(ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
437+
def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 : PPCEmitTimePseudo<(outs
438+
g8rc:$fp, g8rc:$actual_negsize),
439+
(ins g8rc:$negsize, memri:$fpsi),
440+
"#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64", []>,
441+
RegConstraint<"$actual_negsize = $negsize">;
437442
def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
438443
(ins i64imm:$stacksize),
439444
"#PROBED_STACKALLOC_64", []>;

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,9 +1406,14 @@ def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
14061406
(ins gprc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_32",
14071407
[(set i32:$result,
14081408
(PPCprobedalloca i32:$negsize, iaddr:$fpsi))]>;
1409-
def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
1410-
gprc:$sp),
1409+
def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs
1410+
gprc:$fp, gprc:$actual_negsize),
14111411
(ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
1412+
def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32 : PPCEmitTimePseudo<(outs
1413+
gprc:$fp, gprc:$actual_negsize),
1414+
(ins gprc:$negsize, memri:$fpsi),
1415+
"#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32", []>,
1416+
RegConstraint<"$actual_negsize = $negsize">;
14121417
def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp),
14131418
(ins i64imm:$stacksize),
14141419
"#PROBED_STACKALLOC_32", []>;

llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -624,21 +624,30 @@ void PPCRegisterInfo::lowerPrepareProbedAlloca(
624624
bool LP64 = TM.isPPC64();
625625
DebugLoc dl = MI.getDebugLoc();
626626
Register FramePointer = MI.getOperand(0).getReg();
627-
Register FinalStackPtr = MI.getOperand(1).getReg();
627+
const Register ActualNegSizeReg = MI.getOperand(1).getReg();
628628
bool KillNegSizeReg = MI.getOperand(2).isKill();
629629
Register NegSizeReg = MI.getOperand(2).getReg();
630-
prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
631-
if (LP64) {
632-
BuildMI(MBB, II, dl, TII.get(PPC::ADD8), FinalStackPtr)
633-
.addReg(PPC::X1)
634-
.addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
635-
636-
} else {
637-
BuildMI(MBB, II, dl, TII.get(PPC::ADD4), FinalStackPtr)
638-
.addReg(PPC::R1)
639-
.addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
630+
const MCInstrDesc &CopyInst = TII.get(LP64 ? PPC::OR8 : PPC::OR);
631+
// RegAllocator might allocate FramePointer and NegSizeReg in the same phyreg.
632+
if (FramePointer == NegSizeReg) {
633+
assert(KillNegSizeReg && "FramePointer is a def and NegSizeReg is an use, "
634+
"NegSizeReg should be killed");
635+
// FramePointer is clobbered earlier than the use of NegSizeReg in
636+
// prepareDynamicAlloca, save NegSizeReg in ActualNegSizeReg to avoid
637+
// misuse.
638+
BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
639+
.addReg(NegSizeReg)
640+
.addReg(NegSizeReg);
641+
NegSizeReg = ActualNegSizeReg;
642+
KillNegSizeReg = false;
640643
}
641-
644+
prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
645+
// NegSizeReg might be updated in prepareDynamicAlloca if MaxAlign >
646+
// TargetAlign.
647+
if (NegSizeReg != ActualNegSizeReg)
648+
BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
649+
.addReg(NegSizeReg)
650+
.addReg(NegSizeReg);
642651
MBB.erase(II);
643652
}
644653

@@ -1084,7 +1093,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
10841093

10851094
if (FPSI && FrameIndex == FPSI &&
10861095
(OpC == PPC::PREPARE_PROBED_ALLOCA_64 ||
1087-
OpC == PPC::PREPARE_PROBED_ALLOCA_32)) {
1096+
OpC == PPC::PREPARE_PROBED_ALLOCA_32 ||
1097+
OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 ||
1098+
OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32)) {
10881099
lowerPrepareProbedAlloca(II);
10891100
return;
10901101
}

llvm/test/CodeGen/PowerPC/pr46759.ll

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
3+
; RUN: -mtriple=powerpc64le-linux-gnu < %s | FileCheck \
4+
; RUN: -check-prefix=CHECK-LE %s
5+
6+
define void @foo(i32 %vla_size) #0 {
7+
; CHECK-LE-LABEL: foo:
8+
; CHECK-LE: # %bb.0: # %entry
9+
; CHECK-LE-NEXT: std r31, -8(r1)
10+
; CHECK-LE-NEXT: std r30, -16(r1)
11+
; CHECK-LE-NEXT: mr r30, r1
12+
; CHECK-LE-NEXT: mr r12, r1
13+
; CHECK-LE-NEXT: .cfi_def_cfa r12, 0
14+
; CHECK-LE-NEXT: clrldi r0, r12, 53
15+
; CHECK-LE-NEXT: subc r1, r1, r0
16+
; CHECK-LE-NEXT: stdu r12, -2048(r1)
17+
; CHECK-LE-NEXT: stdu r12, -4096(r1)
18+
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
19+
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
20+
; CHECK-LE-NEXT: .cfi_offset r31, -8
21+
; CHECK-LE-NEXT: .cfi_offset r30, -16
22+
; CHECK-LE-NEXT: clrldi r3, r3, 32
23+
; CHECK-LE-NEXT: li r5, -2048
24+
; CHECK-LE-NEXT: mr r31, r1
25+
; CHECK-LE-NEXT: addi r3, r3, 15
26+
; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
27+
; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
28+
; CHECK-LE-NEXT: neg r4, r3
29+
; CHECK-LE-NEXT: ld r3, 0(r1)
30+
; CHECK-LE-NEXT: and r5, r4, r5
31+
; CHECK-LE-NEXT: mr r4, r5
32+
; CHECK-LE-NEXT: li r5, -4096
33+
; CHECK-LE-NEXT: divd r6, r4, r5
34+
; CHECK-LE-NEXT: mulld r5, r6, r5
35+
; CHECK-LE-NEXT: sub r5, r4, r5
36+
; CHECK-LE-NEXT: add r4, r1, r4
37+
; CHECK-LE-NEXT: stdux r3, r1, r5
38+
; CHECK-LE-NEXT: cmpd r1, r4
39+
; CHECK-LE-NEXT: beq cr0, .LBB0_2
40+
; CHECK-LE-NEXT: .LBB0_1: # %entry
41+
; CHECK-LE-NEXT: #
42+
; CHECK-LE-NEXT: stdu r3, -4096(r1)
43+
; CHECK-LE-NEXT: cmpd r1, r4
44+
; CHECK-LE-NEXT: bne cr0, .LBB0_1
45+
; CHECK-LE-NEXT: .LBB0_2: # %entry
46+
; CHECK-LE-NEXT: addi r3, r1, 2048
47+
; CHECK-LE-NEXT: lbz r3, 0(r3)
48+
; CHECK-LE-NEXT: ld r1, 0(r1)
49+
; CHECK-LE-NEXT: ld r31, -8(r1)
50+
; CHECK-LE-NEXT: ld r30, -16(r1)
51+
; CHECK-LE-NEXT: blr
52+
entry:
53+
%0 = zext i32 %vla_size to i64
54+
%vla = alloca i8, i64 %0, align 2048
55+
%1 = load volatile i8, i8* %vla, align 2048
56+
ret void
57+
}
58+
59+
attributes #0 = { "probe-stack"="inline-asm" }

0 commit comments

Comments
 (0)