Skip to content

Commit fec92e4

Browse files
committed
[AArch64] Don't use LowerToPredicatedOp to shufflevector -> SVE lowerings
The use of `LowerToPredicatedOp` here seems like a mistake as `LowerToPredicatedOp` turns the SDValue passed to it into the desired predicated node by copying over operands (and adding a predicate). This results in two odd things here, the BITCASTs created and passed to `LowerToPredicatedOp` are not used, only the operands of those bitcasts are taken. Secondly, when a shuffle vector node is passed directly to `LowerToPredicatedOp` to create a `REVD_MERGE_PASSTHRU` node an invalid REV node is created as REV only takes one vector operand, but both operands from the shuffle vector are copied to the new REV node. This is not an issue in practice as the extra operand is ignored. These issues were found by the verification added in #140472. Part of #140472. Note: Test changes only result in the vxi64 lowering matching the vxf64 lowering.
1 parent 35a9631 commit fec92e4

File tree

3 files changed

+18
-18
lines changed

3 files changed

+18
-18
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29773,36 +29773,35 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
2977329773
return convertFromScalableVector(DAG, VT, Op);
2977429774
}
2977529775

29776+
auto lowerToRevMergePassthru = [&](unsigned Opcode, SDValue Vec, EVT NewVT) {
29777+
auto Pg = getPredicateForVector(DAG, DL, NewVT);
29778+
SDValue RevOp = DAG.getNode(ISD::BITCAST, DL, NewVT, Vec);
29779+
auto Rev =
29780+
DAG.getNode(Opcode, DL, NewVT, Pg, RevOp, DAG.getUNDEF(ContainerVT));
29781+
auto Cast = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Rev);
29782+
return convertFromScalableVector(DAG, VT, Cast);
29783+
};
29784+
2977629785
unsigned EltSize = VT.getScalarSizeInBits();
2977729786
for (unsigned LaneSize : {64U, 32U, 16U}) {
2977829787
if (isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), LaneSize)) {
29779-
EVT NewVT =
29780-
getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
2978129788
unsigned RevOp;
2978229789
if (EltSize == 8)
2978329790
RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
2978429791
else if (EltSize == 16)
2978529792
RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
2978629793
else
2978729794
RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
29788-
29789-
Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
29790-
Op = LowerToPredicatedOp(Op, DAG, RevOp);
29791-
Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
29792-
return convertFromScalableVector(DAG, VT, Op);
29795+
EVT NewVT =
29796+
getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
29797+
return lowerToRevMergePassthru(RevOp, Op1, NewVT);
2979329798
}
2979429799
}
2979529800

2979629801
if (Subtarget->hasSVE2p1() && EltSize == 64 &&
2979729802
isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), 128)) {
29798-
if (!VT.isFloatingPoint())
29799-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
29800-
29801-
EVT NewVT = getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), 64));
29802-
Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
29803-
Op = LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);
29804-
Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
29805-
return convertFromScalableVector(DAG, VT, Op);
29803+
return lowerToRevMergePassthru(AArch64ISD::REVD_MERGE_PASSTHRU, Op1,
29804+
ContainerVT);
2980629805
}
2980729806

2980829807
unsigned WhichResult;

llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,9 @@ define void @test_revdv4i64_sve2p1(ptr %a) #2 {
213213
; CHECK-LABEL: test_revdv4i64_sve2p1:
214214
; CHECK: // %bb.0:
215215
; CHECK-NEXT: ptrue p0.d, vl4
216+
; CHECK-NEXT: ptrue p1.d
216217
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
217-
; CHECK-NEXT: revd z0.q, p0/m, z0.q
218+
; CHECK-NEXT: revd z0.q, p1/m, z0.q
218219
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
219220
; CHECK-NEXT: ret
220221
%tmp1 = load <4 x i64>, ptr %a

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ define void @test_revdv4i64_sve2p1(ptr %a) #1 {
677677
; CHECK-LABEL: test_revdv4i64_sve2p1:
678678
; CHECK: // %bb.0:
679679
; CHECK-NEXT: ldp q0, q1, [x0]
680-
; CHECK-NEXT: ptrue p0.d, vl2
680+
; CHECK-NEXT: ptrue p0.d
681681
; CHECK-NEXT: revd z0.q, p0/m, z0.q
682682
; CHECK-NEXT: revd z1.q, p0/m, z1.q
683683
; CHECK-NEXT: stp q0, q1, [x0]
@@ -686,7 +686,7 @@ define void @test_revdv4i64_sve2p1(ptr %a) #1 {
686686
; NONEON-NOSVE-LABEL: test_revdv4i64_sve2p1:
687687
; NONEON-NOSVE: // %bb.0:
688688
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
689-
; NONEON-NOSVE-NEXT: ptrue p0.d, vl2
689+
; NONEON-NOSVE-NEXT: ptrue p0.d
690690
; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q
691691
; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q
692692
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]

0 commit comments

Comments
 (0)