@@ -1677,6 +1677,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1677
1677
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1678
1678
1679
1679
setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1680
+
1681
+ for (auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
1682
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, VT, Custom);
1680
1683
}
1681
1684
1682
1685
if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
@@ -5748,8 +5751,24 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
5748
5751
case Intrinsic::get_active_lane_mask: {
5749
5752
SDValue ID =
5750
5753
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64);
5751
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
5752
- Op.getOperand(1), Op.getOperand(2));
5754
+
5755
+ EVT VT = Op.getValueType();
5756
+ if (VT.isScalableVector())
5757
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID, Op.getOperand(1),
5758
+ Op.getOperand(2));
5759
+
5760
+ // We can use the SVE whilelo instruction to lower this intrinsic by
5761
+ // creating the appropriate sequence of scalable vector operations and
5762
+ // then extracting a fixed-width subvector from the scalable vector.
5763
+
5764
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
5765
+ EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
5766
+
5767
+ SDValue Mask = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WhileVT, ID,
5768
+ Op.getOperand(1), Op.getOperand(2));
5769
+ SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, dl, ContainerVT, Mask);
5770
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, MaskAsInt,
5771
+ DAG.getVectorIdxConstant(0, dl));
5753
5772
}
5754
5773
case Intrinsic::aarch64_neon_uaddlv: {
5755
5774
EVT OpVT = Op.getOperand(1).getValueType();
@@ -20530,39 +20549,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
20530
20549
switch (IID) {
20531
20550
default:
20532
20551
break;
20533
- case Intrinsic::get_active_lane_mask: {
20534
- SDValue Res = SDValue();
20535
- EVT VT = N->getValueType(0);
20536
- if (VT.isFixedLengthVector()) {
20537
- // We can use the SVE whilelo instruction to lower this intrinsic by
20538
- // creating the appropriate sequence of scalable vector operations and
20539
- // then extracting a fixed-width subvector from the scalable vector.
20540
-
20541
- SDLoc DL(N);
20542
- SDValue ID =
20543
- DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
20544
-
20545
- EVT WhileVT = EVT::getVectorVT(
20546
- *DAG.getContext(), MVT::i1,
20547
- ElementCount::getScalable(VT.getVectorNumElements()));
20548
-
20549
- // Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
20550
- EVT PromVT = getPromotedVTForPredicate(WhileVT);
20551
-
20552
- // Get the fixed-width equivalent of PromVT for extraction.
20553
- EVT ExtVT =
20554
- EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(),
20555
- VT.getVectorElementCount());
20556
-
20557
- Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
20558
- N->getOperand(1), N->getOperand(2));
20559
- Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
20560
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
20561
- DAG.getConstant(0, DL, MVT::i64));
20562
- Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
20563
- }
20564
- return Res;
20565
- }
20566
20552
case Intrinsic::aarch64_neon_vcvtfxs2fp:
20567
20553
case Intrinsic::aarch64_neon_vcvtfxu2fp:
20568
20554
return tryCombineFixedPointConvert(N, DCI, DAG);
@@ -25636,15 +25622,15 @@ void AArch64TargetLowering::ReplaceNodeResults(
25636
25622
return;
25637
25623
case ISD::INTRINSIC_WO_CHAIN: {
25638
25624
EVT VT = N->getValueType(0);
25639
- assert((VT == MVT::i8 || VT == MVT::i16) &&
25640
- "custom lowering for unexpected type");
25641
25625
25642
25626
Intrinsic::ID IntID =
25643
25627
static_cast<Intrinsic::ID>(N->getConstantOperandVal(0));
25644
25628
switch (IntID) {
25645
25629
default:
25646
25630
return;
25647
25631
case Intrinsic::aarch64_sve_clasta_n: {
25632
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
25633
+ "custom lowering for unexpected type");
25648
25634
SDLoc DL(N);
25649
25635
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
25650
25636
auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
@@ -25653,6 +25639,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
25653
25639
return;
25654
25640
}
25655
25641
case Intrinsic::aarch64_sve_clastb_n: {
25642
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
25643
+ "custom lowering for unexpected type");
25656
25644
SDLoc DL(N);
25657
25645
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
25658
25646
auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
@@ -25661,19 +25649,37 @@ void AArch64TargetLowering::ReplaceNodeResults(
25661
25649
return;
25662
25650
}
25663
25651
case Intrinsic::aarch64_sve_lasta: {
25652
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
25653
+ "custom lowering for unexpected type");
25664
25654
SDLoc DL(N);
25665
25655
auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
25666
25656
N->getOperand(1), N->getOperand(2));
25667
25657
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
25668
25658
return;
25669
25659
}
25670
25660
case Intrinsic::aarch64_sve_lastb: {
25661
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
25662
+ "custom lowering for unexpected type");
25671
25663
SDLoc DL(N);
25672
25664
auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
25673
25665
N->getOperand(1), N->getOperand(2));
25674
25666
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
25675
25667
return;
25676
25668
}
25669
+ case Intrinsic::get_active_lane_mask: {
25670
+ if (!VT.isFixedLengthVector() || VT.getVectorElementType() != MVT::i1)
25671
+ return;
25672
+
25673
+ // NOTE: Only trivial type promotion is supported.
25674
+ EVT NewVT = getTypeToTransformTo(*DAG.getContext(), VT);
25675
+ if (NewVT.getVectorNumElements() != VT.getVectorNumElements())
25676
+ return;
25677
+
25678
+ SDLoc DL(N);
25679
+ auto V = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, NewVT, N->ops());
25680
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
25681
+ return;
25682
+ }
25677
25683
}
25678
25684
}
25679
25685
case ISD::READ_REGISTER: {
0 commit comments