Skip to content

Commit 62cae9c

Browse files
authored
[AArch64] Don't use LowerToPredicatedOp to shufflevector -> SVE lowerings (#140713)
The use of `LowerToPredicatedOp` here seems like a mistake as `LowerToPredicatedOp` turns the SDValue passed to it into the desired predicated node by copying over operands (and adding a predicate). This results in two odd things here, the BITCASTs created and passed to `LowerToPredicatedOp` are not used, only the operands of those bitcasts are taken. Secondly, when a shuffle vector node is passed directly to `LowerToPredicatedOp` to create a `REVD_MERGE_PASSTHRU` node an invalid REV node is created as REV only takes one vector operand, but both operands from the shuffle vector are copied to the new REV node. This is not an issue in practice as the extra operand is ignored. These issues were found by the verification added in #140472. Part of #140472. Note: Test changes only result in the vxf64 lowering matching the vxi64 lowering.
1 parent f4e14bf commit 62cae9c

File tree

3 files changed

+18
-21
lines changed

3 files changed

+18
-21
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29783,35 +29783,33 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
2978329783
}
2978429784

2978529785
unsigned EltSize = VT.getScalarSizeInBits();
29786-
for (unsigned LaneSize : {64U, 32U, 16U}) {
29787-
if (isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), LaneSize)) {
29788-
EVT NewVT =
29789-
getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
29786+
for (unsigned BlockSize : {64U, 32U, 16U}) {
29787+
if (isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), BlockSize)) {
2979029788
unsigned RevOp;
2979129789
if (EltSize == 8)
2979229790
RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
2979329791
else if (EltSize == 16)
2979429792
RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
2979529793
else
2979629794
RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
29797-
29798-
Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
29799-
Op = LowerToPredicatedOp(Op, DAG, RevOp);
29800-
Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
29801-
return convertFromScalableVector(DAG, VT, Op);
29795+
EVT BlockedVT =
29796+
getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), BlockSize));
29797+
SDValue Pg = getPredicateForVector(DAG, DL, BlockedVT);
29798+
SDValue BlockedOp1 = DAG.getNode(ISD::BITCAST, DL, BlockedVT, Op1);
29799+
SDValue BlockedRev = DAG.getNode(RevOp, DL, BlockedVT, Pg, BlockedOp1,
29800+
DAG.getUNDEF(BlockedVT));
29801+
SDValue Container =
29802+
DAG.getNode(ISD::BITCAST, DL, ContainerVT, BlockedRev);
29803+
return convertFromScalableVector(DAG, VT, Container);
2980229804
}
2980329805
}
2980429806

2980529807
if (Subtarget->hasSVE2p1() && EltSize == 64 &&
2980629808
isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), 128)) {
29807-
if (!VT.isFloatingPoint())
29808-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
29809-
29810-
EVT NewVT = getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), 64));
29811-
Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
29812-
Op = LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
29813-
Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
29814-
return convertFromScalableVector(DAG, VT, Op);
29809+
SDValue Pg = getPredicateForVector(DAG, DL, VT);
29810+
SDValue Revd = DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, DL, ContainerVT,
29811+
Pg, Op1, DAG.getUNDEF(ContainerVT));
29812+
return convertFromScalableVector(DAG, VT, Revd);
2981529813
}
2981629814

2981729815
unsigned WhichResult;

llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,8 @@ define void @test_revdv4f64_sve2p1(ptr %a) #2 {
227227
; CHECK-LABEL: test_revdv4f64_sve2p1:
228228
; CHECK: // %bb.0:
229229
; CHECK-NEXT: ptrue p0.d, vl4
230-
; CHECK-NEXT: ptrue p1.d
231230
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
232-
; CHECK-NEXT: revd z0.q, p1/m, z0.q
231+
; CHECK-NEXT: revd z0.q, p0/m, z0.q
233232
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
234233
; CHECK-NEXT: ret
235234
%tmp1 = load <4 x double>, ptr %a

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,7 @@ define void @test_revdv4f64_sve2p1(ptr %a) #1 {
701701
; CHECK-LABEL: test_revdv4f64_sve2p1:
702702
; CHECK: // %bb.0:
703703
; CHECK-NEXT: ldp q0, q1, [x0]
704-
; CHECK-NEXT: ptrue p0.d
704+
; CHECK-NEXT: ptrue p0.d, vl2
705705
; CHECK-NEXT: revd z0.q, p0/m, z0.q
706706
; CHECK-NEXT: revd z1.q, p0/m, z1.q
707707
; CHECK-NEXT: stp q0, q1, [x0]
@@ -710,7 +710,7 @@ define void @test_revdv4f64_sve2p1(ptr %a) #1 {
710710
; NONEON-NOSVE-LABEL: test_revdv4f64_sve2p1:
711711
; NONEON-NOSVE: // %bb.0:
712712
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
713-
; NONEON-NOSVE-NEXT: ptrue p0.d
713+
; NONEON-NOSVE-NEXT: ptrue p0.d, vl2
714714
; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q
715715
; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q
716716
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]

0 commit comments

Comments
 (0)