@@ -4606,7 +4606,33 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4606
4606
return false ;
4607
4607
}
4608
4608
4609
- if (!Subtarget->hasSHA3 ())
4609
+ // We have Neon SHA3 XAR operation for v2i64 but for types
4610
+ // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4611
+ // is available.
4612
+ EVT SVT;
4613
+ switch (VT.getSimpleVT ().SimpleTy ) {
4614
+ case MVT::v4i32:
4615
+ case MVT::v2i32:
4616
+ SVT = MVT::nxv4i32;
4617
+ break ;
4618
+ case MVT::v8i16:
4619
+ case MVT::v4i16:
4620
+ SVT = MVT::nxv8i16;
4621
+ break ;
4622
+ case MVT::v16i8:
4623
+ case MVT::v8i8:
4624
+ SVT = MVT::nxv16i8;
4625
+ break ;
4626
+ case MVT::v2i64:
4627
+ case MVT::v1i64:
4628
+ SVT = Subtarget->hasSHA3 () ? MVT::v2i64 : MVT::nxv2i64;
4629
+ break ;
4630
+ default :
4631
+ return false ;
4632
+ }
4633
+
4634
+ if ((!SVT.isScalableVector () && !Subtarget->hasSHA3 ()) ||
4635
+ (SVT.isScalableVector () && !Subtarget->hasSVE2 ()))
4610
4636
return false ;
4611
4637
4612
4638
if (N0->getOpcode () != AArch64ISD::VSHL ||
@@ -4632,41 +4658,85 @@ bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4632
4658
SDValue Imm = CurDAG->getTargetConstant (
4633
4659
ShAmt, DL, N0.getOperand (1 ).getValueType (), false );
4634
4660
4635
- if (ShAmt + HsAmt != 64 )
4661
+ unsigned VTSizeInBits = VT.getScalarSizeInBits ();
4662
+ if (ShAmt + HsAmt != VTSizeInBits)
4636
4663
return false ;
4637
4664
4638
4665
if (!IsXOROperand) {
4639
4666
SDValue Zero = CurDAG->getTargetConstant (0 , DL, MVT::i64 );
4640
4667
SDNode *MOV =
4641
4668
CurDAG->getMachineNode (AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4642
4669
SDValue MOVIV = SDValue (MOV, 0 );
4670
+
4643
4671
R1 = N1->getOperand (0 );
4644
4672
R2 = MOVIV;
4645
4673
}
4646
4674
4647
- // If the input is a v1i64, widen to a v2i64 to use XAR.
4648
- assert ((VT == MVT::v1i64 || VT == MVT::v2i64) && " Unexpected XAR type!" );
4649
- if (VT == MVT::v1i64) {
4650
- EVT SVT = MVT::v2i64;
4675
+ if (SVT != VT) {
4651
4676
SDValue Undef =
4652
- SDValue (CurDAG->getMachineNode (AArch64::IMPLICIT_DEF, DL, SVT), 0 );
4653
- SDValue DSub = CurDAG->getTargetConstant (AArch64::dsub, DL, MVT::i32 );
4677
+ SDValue (CurDAG->getMachineNode (TargetOpcode::IMPLICIT_DEF, DL, SVT), 0 );
4678
+
4679
+ if (SVT.isScalableVector () && VT.is64BitVector ()) {
4680
+ EVT QVT = VT.getDoubleNumVectorElementsVT (*CurDAG->getContext ());
4681
+
4682
+ SDValue UndefQ = SDValue (
4683
+ CurDAG->getMachineNode (TargetOpcode::IMPLICIT_DEF, DL, QVT), 0 );
4684
+ SDValue DSub = CurDAG->getTargetConstant (AArch64::dsub, DL, MVT::i32 );
4685
+
4686
+ R1 = SDValue (CurDAG->getMachineNode (AArch64::INSERT_SUBREG, DL, QVT,
4687
+ UndefQ, R1, DSub),
4688
+ 0 );
4689
+ if (R2.getValueType () == VT)
4690
+ R2 = SDValue (CurDAG->getMachineNode (AArch64::INSERT_SUBREG, DL, QVT,
4691
+ UndefQ, R2, DSub),
4692
+ 0 );
4693
+ }
4694
+
4695
+ SDValue SubReg = CurDAG->getTargetConstant (
4696
+ (SVT.isScalableVector () ? AArch64::zsub : AArch64::dsub), DL, MVT::i32 );
4697
+
4654
4698
R1 = SDValue (CurDAG->getMachineNode (AArch64::INSERT_SUBREG, DL, SVT, Undef,
4655
- R1, DSub ),
4699
+ R1, SubReg ),
4656
4700
0 );
4657
- if (R2.getValueType () == MVT::v1i64)
4701
+
4702
+ if (SVT.isScalableVector () || R2.getValueType () != SVT)
4658
4703
R2 = SDValue (CurDAG->getMachineNode (AArch64::INSERT_SUBREG, DL, SVT,
4659
- Undef, R2, DSub ),
4704
+ Undef, R2, SubReg ),
4660
4705
0 );
4661
4706
}
4662
4707
4663
4708
SDValue Ops[] = {R1, R2, Imm};
4664
- SDNode *XAR = CurDAG->getMachineNode (AArch64::XAR, DL, MVT::v2i64, Ops);
4709
+ SDNode *XAR = nullptr ;
4710
+
4711
+ if (SVT.isScalableVector ()) {
4712
+ if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4713
+ SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4714
+ AArch64::XAR_ZZZI_D}))
4715
+ XAR = CurDAG->getMachineNode (Opc, DL, SVT, Ops);
4716
+ } else {
4717
+ XAR = CurDAG->getMachineNode (AArch64::XAR, DL, SVT, Ops);
4718
+ }
4665
4719
4666
- if (VT == MVT::v1i64) {
4667
- SDValue DSub = CurDAG->getTargetConstant (AArch64::dsub, DL, MVT::i32 );
4668
- XAR = CurDAG->getMachineNode (AArch64::EXTRACT_SUBREG, DL, VT,
4669
- SDValue (XAR, 0 ), DSub);
4720
+ assert (XAR && " Unexpected NULL value for XAR instruction in DAG" );
4721
+
4722
+ if (SVT != VT) {
4723
+ if (VT.is64BitVector () && SVT.isScalableVector ()) {
4724
+ EVT QVT = VT.getDoubleNumVectorElementsVT (*CurDAG->getContext ());
4725
+
4726
+ SDValue ZSub = CurDAG->getTargetConstant (AArch64::zsub, DL, MVT::i32 );
4727
+ SDNode *Q = CurDAG->getMachineNode (AArch64::EXTRACT_SUBREG, DL, QVT,
4728
+ SDValue (XAR, 0 ), ZSub);
4729
+
4730
+ SDValue DSub = CurDAG->getTargetConstant (AArch64::dsub, DL, MVT::i32 );
4731
+ XAR = CurDAG->getMachineNode (AArch64::EXTRACT_SUBREG, DL, VT,
4732
+ SDValue (Q, 0 ), DSub);
4733
+ } else {
4734
+ SDValue SubReg = CurDAG->getTargetConstant (
4735
+ (SVT.isScalableVector () ? AArch64::zsub : AArch64::dsub), DL,
4736
+ MVT::i32 );
4737
+ XAR = CurDAG->getMachineNode (AArch64::EXTRACT_SUBREG, DL, VT,
4738
+ SDValue (XAR, 0 ), SubReg);
4739
+ }
4670
4740
}
4671
4741
ReplaceNode (N, XAR);
4672
4742
return true ;
0 commit comments