Skip to content

Commit beabfe7

Browse files
committed
[AArch64] Sink splat to fmlal intrinsics
Similar to other neon index instructions, it is beneficial to sink the splat to the instruction for fmlal in order for it to create the index.
1 parent 5d6d649 commit beabfe7

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14273,6 +14273,16 @@ bool AArch64TargetLowering::shouldSinkOperands(
1427314273
if (isSplatShuffle(II->getOperand(1)))
1427414274
Ops.push_back(&II->getOperandUse(1));
1427514275
return !Ops.empty();
14276+
case Intrinsic::aarch64_neon_fmlal:
14277+
case Intrinsic::aarch64_neon_fmlal2:
14278+
case Intrinsic::aarch64_neon_fmlsl:
14279+
case Intrinsic::aarch64_neon_fmlsl2:
14280+
// Sink splats for index lane variants
14281+
if (isSplatShuffle(II->getOperand(1)))
14282+
Ops.push_back(&II->getOperandUse(1));
14283+
if (isSplatShuffle(II->getOperand(2)))
14284+
Ops.push_back(&II->getOperandUse(2));
14285+
return !Ops.empty();
1427614286
case Intrinsic::aarch64_sve_ptest_first:
1427714287
case Intrinsic::aarch64_sve_ptest_last:
1427814288
if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))

llvm/test/CodeGen/AArch64/fmlal-loreg.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,12 @@ define void @sink(ptr %out_tile, ptr %lhs_panel, ptr %rhs_panel, i32 noundef %K,
8686
; CHECK-NEXT: movi v1.2d, #0000000000000000
8787
; CHECK-NEXT: mov w8, w3
8888
; CHECK-NEXT: movi v2.2d, #0000000000000000
89-
; CHECK-NEXT: dup v0.8h, v0.h[0]
9089
; CHECK-NEXT: .LBB2_1: // %for.body
9190
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
9291
; CHECK-NEXT: ldr q3, [x2], #2
9392
; CHECK-NEXT: subs x8, x8, #1
94-
; CHECK-NEXT: fmlal v1.4s, v3.4h, v0.4h
95-
; CHECK-NEXT: fmlal2 v2.4s, v3.4h, v0.4h
93+
; CHECK-NEXT: fmlal v1.4s, v3.4h, v0.h[0]
94+
; CHECK-NEXT: fmlal2 v2.4s, v3.4h, v0.h[0]
9695
; CHECK-NEXT: b.ne .LBB2_1
9796
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
9897
; CHECK-NEXT: stp q1, q2, [x0]

0 commit comments

Comments
 (0)