Skip to content

Commit 9f63940

Browse files
authored
[AArch64] Handle ANY_EXTEND in BuildShuffleExtendCombine (#118308)
Handle ANY_EXTEND when combining a buildvector/shuffle of extended operands, as we can safely ignore ANY_EXTENDS when checking if all signs of the other extends are matching.
1 parent 39c9dda commit 9f63940

File tree

3 files changed

+65
-20
lines changed

3 files changed

+65
-20
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18595,6 +18595,7 @@ static EVT calculatePreExtendType(SDValue Extend) {
1859518595
switch (Extend.getOpcode()) {
1859618596
case ISD::SIGN_EXTEND:
1859718597
case ISD::ZERO_EXTEND:
18598+
case ISD::ANY_EXTEND:
1859818599
return Extend.getOperand(0).getValueType();
1859918600
case ISD::AssertSext:
1860018601
case ISD::AssertZext:
@@ -18639,14 +18640,15 @@ static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG) {
1863918640
// extend, and make sure it looks valid.
1864018641
SDValue Extend = BV->getOperand(0);
1864118642
unsigned ExtendOpcode = Extend.getOpcode();
18643+
bool IsAnyExt = ExtendOpcode == ISD::ANY_EXTEND;
1864218644
bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
1864318645
ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
1864418646
ExtendOpcode == ISD::AssertSext;
18645-
if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
18647+
if (!IsAnyExt && !IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
1864618648
ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
1864718649
return SDValue();
18648-
// Shuffle inputs are vector, limit to SIGN_EXTEND and ZERO_EXTEND to ensure
18649-
// calculatePreExtendType will work without issue.
18650+
// Shuffle inputs are vector, limit to SIGN_EXTEND/ZERO_EXTEND/ANY_EXTEND to
18651+
// ensure calculatePreExtendType will work without issue.
1865018652
if (BV.getOpcode() == ISD::VECTOR_SHUFFLE &&
1865118653
ExtendOpcode != ISD::SIGN_EXTEND && ExtendOpcode != ISD::ZERO_EXTEND)
1865218654
return SDValue();
@@ -18657,15 +18659,27 @@ static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG) {
1865718659
PreExtendType.getScalarSizeInBits() != VT.getScalarSizeInBits() / 2)
1865818660
return SDValue();
1865918661

18660-
// Make sure all other operands are equally extended
18662+
// Make sure all other operands are equally extended.
18663+
bool SeenZExtOrSExt = !IsAnyExt;
1866118664
for (SDValue Op : drop_begin(BV->ops())) {
1866218665
if (Op.isUndef())
1866318666
continue;
18667+
18668+
if (calculatePreExtendType(Op) != PreExtendType)
18669+
return SDValue();
18670+
1866418671
unsigned Opc = Op.getOpcode();
18672+
if (Opc == ISD::ANY_EXTEND)
18673+
continue;
18674+
1866518675
bool OpcIsSExt = Opc == ISD::SIGN_EXTEND || Opc == ISD::SIGN_EXTEND_INREG ||
1866618676
Opc == ISD::AssertSext;
18667-
if (OpcIsSExt != IsSExt || calculatePreExtendType(Op) != PreExtendType)
18677+
18678+
if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
1866818679
return SDValue();
18680+
18681+
IsSExt = OpcIsSExt;
18682+
SeenZExtOrSExt = true;
1866918683
}
1867018684

1867118685
SDValue NBV;
@@ -18688,7 +18702,10 @@ static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG) {
1868818702
: BV.getOperand(1).getOperand(0),
1868918703
cast<ShuffleVectorSDNode>(BV)->getMask());
1869018704
}
18691-
return DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, NBV);
18705+
unsigned ExtOpc = !SeenZExtOrSExt
18706+
? ISD::ANY_EXTEND
18707+
: (IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND);
18708+
return DAG.getNode(ExtOpc, DL, VT, NBV);
1869218709
}
1869318710

1869418711
/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))

llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,13 @@ target triple = "aarch64-unknown-linux-gnu"
1010
define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
1111
; CHECK-LABEL: dupext_crashtest:
1212
; CHECK: // %bb.0: // %for.body.lr.ph
13+
; CHECK-NEXT: dup v0.2s, w0
1314
; CHECK-NEXT: .LBB0_1: // %vector.body
1415
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
15-
; CHECK-NEXT: ldr d0, [x8]
16-
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
17-
; CHECK-NEXT: fmov x9, d0
18-
; CHECK-NEXT: mov x8, v0.d[1]
19-
; CHECK-NEXT: mul w9, w0, w9
20-
; CHECK-NEXT: mul w8, w0, w8
21-
; CHECK-NEXT: fmov d0, x9
22-
; CHECK-NEXT: mov v0.d[1], x8
23-
; CHECK-NEXT: xtn v0.2s, v0.2d
24-
; CHECK-NEXT: str d0, [x8]
16+
; CHECK-NEXT: ldr d1, [x8]
17+
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
18+
; CHECK-NEXT: xtn v1.2s, v1.2d
19+
; CHECK-NEXT: str d1, [x8]
2520
; CHECK-NEXT: b .LBB0_1
2621
for.body.lr.ph:
2722
%conv314 = zext i32 %e to i64

llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,39 @@ entry:
158158
ret <2 x i64> %out
159159
}
160160

161+
define <2 x i32> @dupzext_v2i32_v2i64_trunc(i32 %src, <2 x i32> %b) {
162+
; CHECK-SD-LABEL: dupzext_v2i32_v2i64_trunc:
163+
; CHECK-SD: // %bb.0: // %entry
164+
; CHECK-SD-NEXT: dup v1.2s, w0
165+
; CHECK-SD-NEXT: smull v0.2d, v1.2s, v0.2s
166+
; CHECK-SD-NEXT: xtn v0.2s, v0.2d
167+
; CHECK-SD-NEXT: ret
168+
;
169+
; CHECK-GI-LABEL: dupzext_v2i32_v2i64_trunc:
170+
; CHECK-GI: // %bb.0: // %entry
171+
; CHECK-GI-NEXT: mov w8, w0
172+
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
173+
; CHECK-GI-NEXT: dup v1.2d, x8
174+
; CHECK-GI-NEXT: fmov x9, d0
175+
; CHECK-GI-NEXT: mov x11, v0.d[1]
176+
; CHECK-GI-NEXT: fmov x8, d1
177+
; CHECK-GI-NEXT: mov x10, v1.d[1]
178+
; CHECK-GI-NEXT: mul x8, x8, x9
179+
; CHECK-GI-NEXT: mul x9, x10, x11
180+
; CHECK-GI-NEXT: mov v0.d[0], x8
181+
; CHECK-GI-NEXT: mov v0.d[1], x9
182+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
183+
; CHECK-GI-NEXT: ret
184+
entry:
185+
%in = zext i32 %src to i64
186+
%ext.b = zext <2 x i32> %b to <2 x i64>
187+
%broadcast.splatinsert = insertelement <2 x i64> poison, i64 %in, i64 0
188+
%broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
189+
%prod = mul nuw <2 x i64> %broadcast.splat, %ext.b
190+
%out = trunc <2 x i64> %prod to <2 x i32>
191+
ret <2 x i32> %out
192+
}
193+
161194
; Unsupported combines
162195

163196
define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) {
@@ -407,10 +440,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
407440
;
408441
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
409442
; CHECK-GI: // %bb.0: // %entry
410-
; CHECK-GI-NEXT: adrp x8, .LCPI13_0
443+
; CHECK-GI-NEXT: adrp x8, .LCPI14_0
411444
; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
412445
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
413-
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
446+
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI14_0]
414447
; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
415448
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
416449
; CHECK-GI-NEXT: ret
@@ -460,10 +493,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
460493
;
461494
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
462495
; CHECK-GI: // %bb.0: // %entry
463-
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
496+
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
464497
; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
465498
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
466-
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI15_0]
499+
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_0]
467500
; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
468501
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
469502
; CHECK-GI-NEXT: ret

0 commit comments

Comments
 (0)