Skip to content

Commit 9c2648d

Browse files
committed
[AArch64] Utilize XAR for certain vector rotates
Resolves #137162 For cases when there isn't any `XOR` in the transformation, replace with a zero register.
1 parent ba3fa39 commit 9c2648d

File tree

2 files changed

+44
-19
lines changed

2 files changed

+44
-19
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4532,7 +4532,9 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
45324532

45334533
SDValue N0 = N->getOperand(0);
45344534
SDValue N1 = N->getOperand(1);
4535+
45354536
EVT VT = N->getValueType(0);
4537+
SDLoc DL(N);
45364538

45374539
// Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
45384540
// Rotate by a constant is a funnel shift in IR which is exanded to
@@ -4558,10 +4560,26 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
45584560
!TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
45594561
return false;
45604562

4561-
SDValue XOR = N0.getOperand(1);
4562-
if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4563+
if (N0.getOperand(1) != N1.getOperand(1))
45634564
return false;
45644565

4566+
SDValue R1, R2;
4567+
if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4568+
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4569+
SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4570+
SDValue MOVIV = SDValue(MOV, 0);
4571+
4572+
SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4573+
SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4574+
VT, Zero, MOVIV, ZSub);
4575+
4576+
R1 = N1->getOperand(1);
4577+
R2 = SDValue(SubRegToReg, 0);
4578+
} else {
4579+
R1 = N0.getOperand(1).getOperand(0);
4580+
R2 = N1.getOperand(1).getOperand(1);
4581+
}
4582+
45654583
APInt ShlAmt, ShrAmt;
45664584
if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
45674585
!ISD::isConstantSplatVector(N1.getOperand(2).getNode(), ShrAmt))
@@ -4570,11 +4588,11 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
45704588
if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
45714589
return false;
45724590

4573-
SDLoc DL(N);
4591+
SDLoc DLImm(N);
45744592
SDValue Imm =
4575-
CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4593+
CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DLImm, MVT::i32);
45764594

4577-
SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4595+
SDValue Ops[] = {R1, R2, Imm};
45784596
if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
45794597
VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
45804598
AArch64::XAR_ZZZI_D})) {
@@ -4591,20 +4609,28 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
45914609
N1->getOpcode() != AArch64ISD::VLSHR)
45924610
return false;
45934611

4594-
if (N0->getOperand(0) != N1->getOperand(0) ||
4595-
N1->getOperand(0)->getOpcode() != ISD::XOR)
4612+
if (N0->getOperand(0) != N1->getOperand(0))
45964613
return false;
45974614

4598-
SDValue XOR = N0.getOperand(0);
4599-
SDValue R1 = XOR.getOperand(0);
4600-
SDValue R2 = XOR.getOperand(1);
4615+
SDValue R1, R2;
4616+
if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4617+
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4618+
SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4619+
SDValue MOVIV = SDValue(MOV, 0);
4620+
R1 = N1->getOperand(0);
4621+
R2 = MOVIV;
4622+
} else {
4623+
SDValue XOR = N0.getOperand(0);
4624+
R1 = XOR.getOperand(0);
4625+
R2 = XOR.getOperand(1);
4626+
}
46014627

46024628
unsigned HsAmt = N0.getConstantOperandVal(1);
46034629
unsigned ShAmt = N1.getConstantOperandVal(1);
46044630

4605-
SDLoc DL = SDLoc(N0.getOperand(1));
4631+
SDLoc DLImm = SDLoc(N0.getOperand(1));
46064632
SDValue Imm = CurDAG->getTargetConstant(
4607-
ShAmt, DL, N0.getOperand(1).getValueType(), false);
4633+
ShAmt, DLImm, N0.getOperand(1).getValueType(), false);
46084634

46094635
if (ShAmt + HsAmt != 64)
46104636
return false;

llvm/test/CodeGen/AArch64/sve2-xar.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,12 @@ define <vscale x 2 x i64> @xar_nxv2i64_l_neg1(<vscale x 2 x i64> %x, <vscale x 2
170170
; OR instead of an XOR.
171171
; TODO: We could use usra instruction here for SVE2.
172172
define <vscale x 2 x i64> @xar_nxv2i64_l_neg2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
173-
; CHECK-LABEL: xar_nxv2i64_l_neg2:
174-
; CHECK: // %bb.0:
175-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
176-
; CHECK-NEXT: lsr z1.d, z0.d, #4
177-
; CHECK-NEXT: lsl z0.d, z0.d, #60
178-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
179-
; CHECK-NEXT: ret
173+
; SVE2-LABEL: xar_nxv2i64_l_neg2:
174+
; SVE2: // %bb.0:
175+
; SVE2-NEXT: movi v2.2d, #0000000000000000
176+
; SVE2-NEXT: orr z0.d, z0.d, z1.d
177+
; SVE2-NEXT: xar z0.d, z0.d, z2.d, #4
178+
; SVE2-NEXT: ret
180179
%a = or <vscale x 2 x i64> %x, %y
181180
%b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 60))
182181
ret <vscale x 2 x i64> %b

0 commit comments

Comments
 (0)