Skip to content

Commit 06e310f

Browse files
authored
Revert "[AArch64] Improve lowering of truncating uzp1" (#85115)
Reverts #82457 The bot is broken, likely because of mid-air collision.
1 parent 27e5312 commit 06e310f

17 files changed

+284
-209
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21423,8 +21423,12 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
2142321423
}
2142421424
}
2142521425

21426-
// These optimizations only work on little endian.
21427-
if (!DAG.getDataLayout().isLittleEndian())
21426+
// uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
21427+
// Only implemented on little-endian subtargets.
21428+
bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
21429+
21430+
// This optimization only works on little endian.
21431+
if (!IsLittleEndian)
2142821432
return SDValue();
2142921433

2143021434
// uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y)
@@ -21443,28 +21447,21 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
2144321447
if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8)
2144421448
return SDValue();
2144521449

21446-
SDValue SourceOp0 = peekThroughBitcasts(Op0);
21447-
SDValue SourceOp1 = peekThroughBitcasts(Op1);
21450+
auto getSourceOp = [](SDValue Operand) -> SDValue {
21451+
const unsigned Opcode = Operand.getOpcode();
21452+
if (Opcode == ISD::TRUNCATE)
21453+
return Operand->getOperand(0);
21454+
if (Opcode == ISD::BITCAST &&
21455+
Operand->getOperand(0).getOpcode() == ISD::TRUNCATE)
21456+
return Operand->getOperand(0)->getOperand(0);
21457+
return SDValue();
21458+
};
2144821459

21449-
// truncating uzp1(x, y) -> xtn(concat (x, y))
21450-
if (SourceOp0.getValueType() == SourceOp1.getValueType()) {
21451-
EVT Op0Ty = SourceOp0.getValueType();
21452-
if ((ResVT == MVT::v4i16 && Op0Ty == MVT::v2i32) ||
21453-
(ResVT == MVT::v8i8 && Op0Ty == MVT::v4i16)) {
21454-
SDValue Concat =
21455-
DAG.getNode(ISD::CONCAT_VECTORS, DL,
21456-
Op0Ty.getDoubleNumVectorElementsVT(*DAG.getContext()),
21457-
SourceOp0, SourceOp1);
21458-
return DAG.getNode(ISD::TRUNCATE, DL, ResVT, Concat);
21459-
}
21460-
}
21460+
SDValue SourceOp0 = getSourceOp(Op0);
21461+
SDValue SourceOp1 = getSourceOp(Op1);
2146121462

21462-
// uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
21463-
if (SourceOp0.getOpcode() != ISD::TRUNCATE ||
21464-
SourceOp1.getOpcode() != ISD::TRUNCATE)
21463+
if (!SourceOp0 || !SourceOp1)
2146521464
return SDValue();
21466-
SourceOp0 = SourceOp0.getOperand(0);
21467-
SourceOp1 = SourceOp1.getOperand(0);
2146821465

2146921466
if (SourceOp0.getValueType() != SourceOp1.getValueType() ||
2147021467
!SourceOp0.getValueType().isSimple())

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6153,39 +6153,26 @@ defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
61536153
defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
61546154
defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
61556155

6156-
def trunc_optional_assert_ext : PatFrags<(ops node:$op0),
6157-
[(trunc node:$op0),
6158-
(assertzext (trunc node:$op0)),
6159-
(assertsext (trunc node:$op0))]>;
6160-
6161-
// concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
6162-
// concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
6163-
// concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
6164-
class concat_trunc_to_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy>
6165-
: Pat<(ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
6166-
(TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))),
6167-
(!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm)>;
6168-
def : concat_trunc_to_uzp1_pat<v8i16, v8i8, v16i8>;
6169-
def : concat_trunc_to_uzp1_pat<v4i32, v4i16, v8i16>;
6170-
def : concat_trunc_to_uzp1_pat<v2i64, v2i32, v4i32>;
6171-
6172-
// trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
6173-
// trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
6174-
// trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
6175-
class trunc_concat_trunc_to_xtn_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy,
6176-
ValueType Ty>
6177-
: Pat<(Ty (trunc_optional_assert_ext
6178-
(ConcatTy (concat_vectors
6179-
(TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
6180-
(TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))))),
6181-
(!cast<Instruction>("XTN"#Ty) (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm))>;
6182-
def : trunc_concat_trunc_to_xtn_uzp1_pat<v4i32, v4i16, v8i16, v8i8>;
6183-
def : trunc_concat_trunc_to_xtn_uzp1_pat<v2i64, v2i32, v4i32, v4i16>;
6184-
6185-
def : Pat<(v8i8 (trunc (concat_vectors (v4i16 V64:$Vn), (v4i16 V64:$Vm)))),
6186-
(UZP1v8i8 V64:$Vn, V64:$Vm)>;
6187-
def : Pat<(v4i16 (trunc (concat_vectors (v2i32 V64:$Vn), (v2i32 V64:$Vm)))),
6188-
(UZP1v4i16 V64:$Vn, V64:$Vm)>;
6156+
def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
6157+
(v8i8 (trunc (v8i16 V128:$Vm))))),
6158+
(UZP1v16i8 V128:$Vn, V128:$Vm)>;
6159+
def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
6160+
(v4i16 (trunc (v4i32 V128:$Vm))))),
6161+
(UZP1v8i16 V128:$Vn, V128:$Vm)>;
6162+
def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
6163+
(v2i32 (trunc (v2i64 V128:$Vm))))),
6164+
(UZP1v4i32 V128:$Vn, V128:$Vm)>;
6165+
// These are the same as above, with an optional assertzext node that can be
6166+
// generated from fptoi lowering.
6167+
def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))),
6168+
(v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))),
6169+
(UZP1v16i8 V128:$Vn, V128:$Vm)>;
6170+
def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))),
6171+
(v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))),
6172+
(UZP1v8i16 V128:$Vn, V128:$Vm)>;
6173+
def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))),
6174+
(v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))),
6175+
(UZP1v4i32 V128:$Vn, V128:$Vm)>;
61896176

61906177
def : Pat<(v16i8 (concat_vectors
61916178
(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),

llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ define <4 x i16> @fptosi_v4f64_to_v4i16(ptr %ptr) {
88
; CHECK-NEXT: ldp q0, q1, [x0]
99
; CHECK-NEXT: fcvtzs v1.2d, v1.2d
1010
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
11-
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
12-
; CHECK-NEXT: xtn v0.4h, v0.4s
11+
; CHECK-NEXT: xtn v1.2s, v1.2d
12+
; CHECK-NEXT: xtn v0.2s, v0.2d
13+
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h
1314
; CHECK-NEXT: ret
1415
%tmp1 = load <4 x double>, ptr %ptr
1516
%tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
@@ -25,10 +26,13 @@ define <8 x i8> @fptosi_v4f64_to_v4i8(ptr %ptr) {
2526
; CHECK-NEXT: fcvtzs v1.2d, v1.2d
2627
; CHECK-NEXT: fcvtzs v3.2d, v3.2d
2728
; CHECK-NEXT: fcvtzs v2.2d, v2.2d
28-
; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s
29-
; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s
30-
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
31-
; CHECK-NEXT: xtn v0.8b, v0.8h
29+
; CHECK-NEXT: xtn v0.2s, v0.2d
30+
; CHECK-NEXT: xtn v1.2s, v1.2d
31+
; CHECK-NEXT: xtn v3.2s, v3.2d
32+
; CHECK-NEXT: xtn v2.2s, v2.2d
33+
; CHECK-NEXT: uzp1 v0.4h, v1.4h, v0.4h
34+
; CHECK-NEXT: uzp1 v1.4h, v2.4h, v3.4h
35+
; CHECK-NEXT: uzp1 v0.8b, v1.8b, v0.8b
3236
; CHECK-NEXT: ret
3337
%tmp1 = load <8 x double>, ptr %ptr
3438
%tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
@@ -92,8 +96,9 @@ define <4 x i16> @fptoui_v4f64_to_v4i16(ptr %ptr) {
9296
; CHECK-NEXT: ldp q0, q1, [x0]
9397
; CHECK-NEXT: fcvtzs v1.2d, v1.2d
9498
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
95-
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
96-
; CHECK-NEXT: xtn v0.4h, v0.4s
99+
; CHECK-NEXT: xtn v1.2s, v1.2d
100+
; CHECK-NEXT: xtn v0.2s, v0.2d
101+
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h
97102
; CHECK-NEXT: ret
98103
%tmp1 = load <4 x double>, ptr %ptr
99104
%tmp2 = fptoui <4 x double> %tmp1 to <4 x i16>

llvm/test/CodeGen/AArch64/extbinopload.ll

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,7 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
650650
; CHECK-NEXT: add x11, x3, #12
651651
; CHECK-NEXT: str s1, [x4]
652652
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
653-
; CHECK-NEXT: ldp s0, s4, [x2]
653+
; CHECK-NEXT: ldp s0, s5, [x2]
654654
; CHECK-NEXT: ushll v2.8h, v0.8b, #0
655655
; CHECK-NEXT: umov w9, v2.h[0]
656656
; CHECK-NEXT: umov w10, v2.h[1]
@@ -662,25 +662,24 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
662662
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
663663
; CHECK-NEXT: mov v0.b[10], w9
664664
; CHECK-NEXT: add x9, x1, #4
665-
; CHECK-NEXT: mov v1.d[1], v2.d[0]
665+
; CHECK-NEXT: uzp1 v1.8b, v1.8b, v2.8b
666666
; CHECK-NEXT: mov v0.b[11], w10
667667
; CHECK-NEXT: add x10, x1, #12
668-
; CHECK-NEXT: bic v1.8h, #255, lsl #8
669668
; CHECK-NEXT: ld1 { v0.s }[3], [x3], #4
670-
; CHECK-NEXT: ldr s3, [x0, #12]
671-
; CHECK-NEXT: ldp s2, s7, [x0, #4]
672-
; CHECK-NEXT: ld1 { v4.s }[1], [x3]
673-
; CHECK-NEXT: ldp s5, s6, [x2, #8]
674-
; CHECK-NEXT: ld1 { v3.s }[1], [x10]
675-
; CHECK-NEXT: ld1 { v2.s }[1], [x9]
676-
; CHECK-NEXT: ld1 { v5.s }[1], [x8]
677-
; CHECK-NEXT: ld1 { v6.s }[1], [x11]
669+
; CHECK-NEXT: ldr s4, [x0, #12]
670+
; CHECK-NEXT: ldp s3, s16, [x0, #4]
671+
; CHECK-NEXT: ld1 { v5.s }[1], [x3]
672+
; CHECK-NEXT: ldp s6, s7, [x2, #8]
673+
; CHECK-NEXT: ld1 { v4.s }[1], [x10]
674+
; CHECK-NEXT: ld1 { v3.s }[1], [x9]
675+
; CHECK-NEXT: ld1 { v6.s }[1], [x8]
676+
; CHECK-NEXT: ld1 { v7.s }[1], [x11]
678677
; CHECK-NEXT: add x8, x1, #8
679-
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
680-
; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
681-
; CHECK-NEXT: ushll v3.8h, v5.8b, #0
682-
; CHECK-NEXT: uaddl v4.8h, v4.8b, v6.8b
683-
; CHECK-NEXT: uaddw v1.8h, v1.8h, v7.8b
678+
; CHECK-NEXT: ld1 { v16.s }[1], [x8]
679+
; CHECK-NEXT: uaddl v2.8h, v3.8b, v4.8b
680+
; CHECK-NEXT: ushll v3.8h, v6.8b, #0
681+
; CHECK-NEXT: uaddl v4.8h, v5.8b, v7.8b
682+
; CHECK-NEXT: uaddl v1.8h, v1.8b, v16.8b
684683
; CHECK-NEXT: uaddw2 v5.8h, v3.8h, v0.16b
685684
; CHECK-NEXT: ushll v0.4s, v2.4h, #3
686685
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3

llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,9 @@ define void @fptoui_v8f32_to_v8i8_no_loop(ptr %A, ptr %dst) {
7373
; CHECK-NEXT: ldp q0, q1, [x0]
7474
; CHECK-NEXT: fcvtzs.4s v1, v1
7575
; CHECK-NEXT: fcvtzs.4s v0, v0
76-
; CHECK-NEXT: uzp1.8h v0, v0, v1
77-
; CHECK-NEXT: xtn.8b v0, v0
76+
; CHECK-NEXT: xtn.4h v1, v1
77+
; CHECK-NEXT: xtn.4h v0, v0
78+
; CHECK-NEXT: uzp1.8b v0, v0, v1
7879
; CHECK-NEXT: str d0, [x1]
7980
; CHECK-NEXT: ret
8081
entry:

0 commit comments

Comments
 (0)