-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][SVE2] Generate XAR #77160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][SVE2] Generate XAR #77160
Conversation
Bitwise exclusive OR and rotate right by immediate Added a new ISD node for XAR and lower the following rotate pattern to XAR for appropriate types: rotr (xor(x, y), imm) -> xar1 (x, y, imm) Change-Id: If1f649b1bf5365b575dc9fa3e6618e97dc19a066
@llvm/pr-subscribers-backend-aarch64 Author: Usman Nadeem (UsmanNadeem) ChangesBitwise exclusive OR and rotate right by immediate Added a new ISD node for XAR and lower the following rotate pattern Change-Id: If1f649b1bf5365b575dc9fa3e6618e97dc19a066 Full diff: https://github.com/llvm/llvm-project/pull/77160.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 102fd0c3dae2ab..cd51ce01caee9f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1648,6 +1648,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLDEXP, MVT::f16, Custom);
}
+ if (Subtarget->hasSVE2orSME()) {
+ for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64})
+ setOperationAction(ISD::ROTL, VT, Custom);
+ }
+
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
IsStrictFPEnabled = true;
@@ -2645,6 +2650,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::MSRR)
MAKE_CASE(AArch64ISD::RSHRNB_I)
MAKE_CASE(AArch64ISD::CTTZ_ELTS)
+ MAKE_CASE(AArch64ISD::XAR_I)
}
#undef MAKE_CASE
return nullptr;
@@ -3741,6 +3747,30 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
return std::make_pair(Value, Overflow);
}
+SDValue AArch64TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isScalableVector() && "Expected a scalable vector.");
+ assert(Subtarget->hasSVE2orSME() && "Custom lowering only for SVE2.");
+
+ // rotr (xor(x, y), imm) -> xar1 (x, y, imm)
+ SDValue Xor = Op.getOperand(0);
+ SDValue RotlValue = Op.getOperand(1);
+
+ if (Xor.getOpcode() != ISD::XOR || RotlValue.getOpcode() != ISD::SPLAT_VECTOR)
+ return SDValue();
+ if (!isa<ConstantSDNode>(RotlValue.getOperand(0).getNode()))
+ return SDValue();
+
+ uint64_t RotrAmt =
+ (VT.getScalarSizeInBits() - RotlValue->getConstantOperandVal(0)) %
+ VT.getScalarSizeInBits();
+
+ SDLoc DL(Op);
+ SDValue Ops[] = {Xor.getOperand(0), Xor.getOperand(1),
+ DAG.getTargetConstant(RotrAmt, DL, MVT::i32)};
+ return DAG.getNode(AArch64ISD::XAR_I, DL, VT, Ops);
+}
+
SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
!Subtarget->isNeonAvailable()))
@@ -6414,6 +6444,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFunnelShift(Op, DAG);
case ISD::FLDEXP:
return LowerFLDEXP(Op, DAG);
+ case ISD::ROTL:
+ return LowerROTL(Op, DAG);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6ddbcd41dcb769..0d9ebad4ada905 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -215,6 +215,9 @@ enum NodeType : unsigned {
// Vector narrowing shift by immediate (bottom)
RSHRNB_I,
+ // Vector bitwise xor and rotate right by immediate
+ XAR_I,
+
// Vector shift by constant and insert
VSLI,
VSRI,
@@ -1143,6 +1146,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 344a153890631e..6e018afe18bd40 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -454,6 +454,15 @@ def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3),
(xor node:$op1, (xor node:$op2, node:$op3))]>;
+def SDT_AArch64xar_Imm : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>,
+ SDTCisSameAs<0,1>, SDTCisSameAs<1,2>]>;
+def AArch64xar_node : SDNode<"AArch64ISD::XAR_I", SDT_AArch64xar_Imm>;
+def AArch64xar : PatFrags<(ops node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_xar node:$op1, node:$op2, node:$op3),
+ (AArch64xar_node node:$op1, node:$op2, node:$op3)]>;
+
+
def AArch64fmla_m1 : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
[(int_aarch64_sve_fmla node:$pg, node:$za, node:$zn, node:$zm),
(vselect node:$pg, (AArch64fadd_p_contract (SVEAllActive), node:$za, (AArch64fmul_p_oneuse (SVEAllActive), node:$zn, node:$zm)), node:$za),
@@ -3721,7 +3730,7 @@ let Predicates = [HasSVE2orSME] in {
defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>;
// SVE2 bitwise xor and rotate right by immediate
- defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>;
+ defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", AArch64xar>;
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b17e215e200dea..a131cf8a6f5402 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -394,6 +394,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
void mirFileLoaded(MachineFunction &MF) const override;
bool hasSVEorSME() const { return hasSVE() || hasSME(); }
+ bool hasSVE2orSME() const { return hasSVE2() || hasSME(); }
// Return the known range for the bit length of SVE data registers. A value
// of 0 means nothing is known about that particular limit beyong what's
diff --git a/llvm/test/CodeGen/AArch64/sve2-xar.ll b/llvm/test/CodeGen/AArch64/sve2-xar.ll
new file mode 100644
index 00000000000000..4b032f74b3a244
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-xar.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefix=SVE %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefix=SVE2 %s
+
+define <vscale x 2 x i64> @xar_nxv2i64_l(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; SVE-LABEL: xar_nxv2i64_l:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.d, z0.d, #4
+; SVE-NEXT: lsl z0.d, z0.d, #60
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv2i64_l:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 2 x i64> %x, %y
+ %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 60, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+ ret <vscale x 2 x i64> %b
+}
+
+define <vscale x 2 x i64> @xar_nxv2i64_r(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; SVE-LABEL: xar_nxv2i64_r:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsl z1.d, z0.d, #60
+; SVE-NEXT: lsr z0.d, z0.d, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv2i64_r:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 2 x i64> %x, %y
+ %b = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 4, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+ ret <vscale x 2 x i64> %b
+}
+
+
+define <vscale x 4 x i32> @xar_nxv4i32_l(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; SVE-LABEL: xar_nxv4i32_l:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.s, z0.s, #4
+; SVE-NEXT: lsl z0.s, z0.s, #28
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv4i32_l:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.s, z0.s, z1.s, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 4 x i32> %x, %y
+ %b = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 28, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+ ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 4 x i32> @xar_nxv4i32_r(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; SVE-LABEL: xar_nxv4i32_r:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsl z1.s, z0.s, #28
+; SVE-NEXT: lsr z0.s, z0.s, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv4i32_r:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.s, z0.s, z1.s, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 4 x i32> %x, %y
+ %b = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 4, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+ ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 8 x i16> @xar_nxv8i16_l(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; SVE-LABEL: xar_nxv8i16_l:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.h, z0.h, #4
+; SVE-NEXT: lsl z0.h, z0.h, #12
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv8i16_l:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.h, z0.h, z1.h, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 8 x i16> %x, %y
+ %b = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 12, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+ ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 8 x i16> @xar_nxv8i16_r(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; SVE-LABEL: xar_nxv8i16_r:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsl z1.h, z0.h, #12
+; SVE-NEXT: lsr z0.h, z0.h, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv8i16_r:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.h, z0.h, z1.h, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 8 x i16> %x, %y
+ %b = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 4, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+ ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 16 x i8> @xar_nxv16i8_l(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; SVE-LABEL: xar_nxv16i8_l:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.b, z0.b, #4
+; SVE-NEXT: lsl z0.b, z0.b, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv16i8_l:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.b, z0.b, z1.b, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 16 x i8> %x, %y
+ %b = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 4, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer))
+ ret <vscale x 16 x i8> %b
+}
+
+define <vscale x 16 x i8> @xar_nxv16i8_r(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; SVE-LABEL: xar_nxv16i8_r:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsl z1.b, z0.b, #4
+; SVE-NEXT: lsr z0.b, z0.b, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv16i8_r:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.b, z0.b, z1.b, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 16 x i8> %x, %y
+ %b = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 4, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer))
+ ret <vscale x 16 x i8> %b
+}
+
+
+define <vscale x 2 x i64> @xar_nxv2i64_l_neg1(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i64> %z) {
+; SVE-LABEL: xar_nxv2i64_l_neg1:
+; SVE: // %bb.0:
+; SVE-NEXT: mov z3.d, z2.d
+; SVE-NEXT: ptrue p0.d
+; SVE-NEXT: subr z2.d, z2.d, #0 // =0x0
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: and z2.d, z2.d, #0x3f
+; SVE-NEXT: and z3.d, z3.d, #0x3f
+; SVE-NEXT: movprfx z1, z0
+; SVE-NEXT: lsl z1.d, p0/m, z1.d, z3.d
+; SVE-NEXT: lsr z0.d, p0/m, z0.d, z2.d
+; SVE-NEXT: orr z0.d, z1.d, z0.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv2i64_l_neg1:
+; SVE2: // %bb.0:
+; SVE2-NEXT: mov z3.d, z2.d
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: subr z2.d, z2.d, #0 // =0x0
+; SVE2-NEXT: eor z0.d, z0.d, z1.d
+; SVE2-NEXT: and z2.d, z2.d, #0x3f
+; SVE2-NEXT: and z3.d, z3.d, #0x3f
+; SVE2-NEXT: movprfx z1, z0
+; SVE2-NEXT: lsl z1.d, p0/m, z1.d, z3.d
+; SVE2-NEXT: lsr z0.d, p0/m, z0.d, z2.d
+; SVE2-NEXT: orr z0.d, z1.d, z0.d
+; SVE2-NEXT: ret
+ %a = xor <vscale x 2 x i64> %x, %y
+ %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %z)
+ ret <vscale x 2 x i64> %b
+}
+
+; TODO: We could use usra instruction here.
+define <vscale x 2 x i64> @xar_nxv2i64_l_neg2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; SVE-LABEL: xar_nxv2i64_l_neg2:
+; SVE: // %bb.0:
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.d, z0.d, #4
+; SVE-NEXT: lsl z0.d, z0.d, #60
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv2i64_l_neg2:
+; SVE2: // %bb.0:
+; SVE2-NEXT: orr z0.d, z0.d, z1.d
+; SVE2-NEXT: lsr z1.d, z0.d, #4
+; SVE2-NEXT: lsl z0.d, z0.d, #60
+; SVE2-NEXT: orr z0.d, z0.d, z1.d
+; SVE2-NEXT: ret
+ %a = or <vscale x 2 x i64> %x, %y
+ %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 60, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+ ret <vscale x 2 x i64> %b
+}
+
+declare <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
def AArch64xar : PatFrags<(ops node:$op1, node:$op2, node:$op3), | ||
[(int_aarch64_sve_xar node:$op1, node:$op2, node:$op3), | ||
(AArch64xar_node node:$op1, node:$op2, node:$op3)]>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since you're creating XAR_I
to match the exact behaviour of the instruction we'd then typically extend performIntrinsicCombine
to lower int_aarch64_sve_xar
to XAR_I
so that future combines catch the most cases and then you'll not need a PatFrags.
It is better to avoid new DAG nodes if we can, as they can have some downsides and if we can match from the existing nodes that is generally beneficial. Can this be performed like the Neon instruction, using a method similar to trySelectXAR? |
This reverts commit b3e26b3.
Bitwise exclusive OR and rotate right by immediate Select xar (x, y, imm) for the following pattern: or (shl (xor x, y), nBits-imm), (shr (xor x, y), imm) This is essentially: rotr (xor(x, y), imm) Change-Id: I55eac358745085e4f37c159ad3008113ac80a78b
Updated the patch to use code similar to how it is done in NEON. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Other than the comments below this LGTM, thanks for moving it.
Change-Id: I5852bd8ced920d9d3a4ab4b407ad309134f0f96d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. LGTM
Bitwise exclusive OR and rotate right by immediate Select xar (x, y, imm) for the following pattern: or (shl (xor x, y), nBits-imm), (shr (xor x, y), imm) This is essentially: rotr (xor(x, y), imm)
Bitwise exclusive OR and rotate right by immediate
Select xar (x, y, imm) for the following pattern:
or (shl (xor x, y), nBits-imm), (shr (xor x, y), imm)
This is essentially:
rotr (xor(x, y), imm)