Skip to content

Commit 30aabec

Browse files
committed
[ARM] Optimise loads of fp16 arguments from i32 stack slots
1 parent e590e2e commit 30aabec

File tree

2 files changed

+17
-12
lines changed

2 files changed

+17
-12
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15275,10 +15275,19 @@ static SDValue PerformVMOVhrCombine(SDNode *N,
1527515275
// fold (VMOVhr (load x)) -> (load (f16*)x)
1527615276
if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
1527715277
if (LN0->hasOneUse() && LN0->isUnindexed() &&
15278-
LN0->getMemoryVT() == MVT::i16) {
15279-
SDValue Load =
15280-
DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15281-
LN0->getBasePtr(), LN0->getMemOperand());
15278+
(LN0->getMemoryVT() == MVT::i16 || LN0->getMemoryVT() == MVT::i32)) {
15279+
SDValue Addr = LN0->getBasePtr();
15280+
unsigned PtrOffset = 0;
15281+
if (DCI.DAG.getDataLayout().isBigEndian() &&
15282+
LN0->getMemoryVT() == MVT::i32) {
15283+
PtrOffset = 2;
15284+
Addr = DCI.DAG.getObjectPtrOffset(SDLoc(N), Addr,
15285+
TypeSize::getFixed(PtrOffset));
15286+
}
15287+
SDValue Load = DCI.DAG.getLoad(
15288+
N->getValueType(0), SDLoc(N), LN0->getChain(), Addr,
15289+
LN0->getPointerInfo().getWithOffset(PtrOffset), LN0->getAlign(),
15290+
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
1528215291
DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
1528315292
DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
1528415293
return Load;

llvm/test/CodeGen/Thumb2/fp16-pcs.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,15 +78,13 @@ define arm_aapcscc half @callee_soft_half_on_stack(float %r0, float %r1, float %
7878
;
7979
; LE-FP16-LABEL: callee_soft_half_on_stack:
8080
; LE-FP16: @ %bb.0: @ %entry
81-
; LE-FP16-NEXT: ldr r0, [sp]
82-
; LE-FP16-NEXT: vmov.f16 s0, r0
81+
; LE-FP16-NEXT: vldr.16 s0, [sp]
8382
; LE-FP16-NEXT: vmov r0, s0
8483
; LE-FP16-NEXT: bx lr
8584
;
8685
; BE-FP16-LABEL: callee_soft_half_on_stack:
8786
; BE-FP16: @ %bb.0: @ %entry
88-
; BE-FP16-NEXT: ldr r0, [sp]
89-
; BE-FP16-NEXT: vmov.f16 s0, r0
87+
; BE-FP16-NEXT: vldr.16 s0, [sp, #2]
9088
; BE-FP16-NEXT: vmov r0, s0
9189
; BE-FP16-NEXT: bx lr
9290
entry:
@@ -224,14 +222,12 @@ define arm_aapcs_vfpcc half @callee_hard_half_on_stack(float %s0, float %s1, flo
224222
;
225223
; LE-FP16-LABEL: callee_hard_half_on_stack:
226224
; LE-FP16: @ %bb.0: @ %entry
227-
; LE-FP16-NEXT: ldr r0, [sp]
228-
; LE-FP16-NEXT: vmov.f16 s0, r0
225+
; LE-FP16-NEXT: vldr.16 s0, [sp]
229226
; LE-FP16-NEXT: bx lr
230227
;
231228
; BE-FP16-LABEL: callee_hard_half_on_stack:
232229
; BE-FP16: @ %bb.0: @ %entry
233-
; BE-FP16-NEXT: ldr r0, [sp]
234-
; BE-FP16-NEXT: vmov.f16 s0, r0
230+
; BE-FP16-NEXT: vldr.16 s0, [sp, #2]
235231
; BE-FP16-NEXT: bx lr
236232
entry:
237233
ret half %f

0 commit comments

Comments
 (0)