@@ -1674,6 +1674,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1674
1674
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1675
1675
1676
1676
setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1677
+
1678
+ for (auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
1679
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, VT, Custom);
1677
1680
}
1678
1681
1679
1682
if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
@@ -5686,8 +5689,24 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
5686
5689
case Intrinsic::get_active_lane_mask: {
5687
5690
SDValue ID =
5688
5691
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64);
5689
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
5690
- Op.getOperand(1), Op.getOperand(2));
5692
+
5693
+ EVT VT = Op.getValueType();
5694
+ if (VT.isScalableVector())
5695
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID, Op.getOperand(1),
5696
+ Op.getOperand(2));
5697
+
5698
+ // We can use the SVE whilelo instruction to lower this intrinsic by
5699
+ // creating the appropriate sequence of scalable vector operations and
5700
+ // then extracting a fixed-width subvector from the scalable vector.
5701
+
5702
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
5703
+ EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
5704
+
5705
+ SDValue Mask = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WhileVT, ID,
5706
+ Op.getOperand(1), Op.getOperand(2));
5707
+ SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, dl, ContainerVT, Mask);
5708
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, MaskAsInt,
5709
+ DAG.getVectorIdxConstant(0, dl));
5691
5710
}
5692
5711
case Intrinsic::aarch64_neon_uaddlv: {
5693
5712
EVT OpVT = Op.getOperand(1).getValueType();
@@ -20462,39 +20481,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
20462
20481
switch (IID) {
20463
20482
default:
20464
20483
break;
20465
- case Intrinsic::get_active_lane_mask: {
20466
- SDValue Res = SDValue();
20467
- EVT VT = N->getValueType(0);
20468
- if (VT.isFixedLengthVector()) {
20469
- // We can use the SVE whilelo instruction to lower this intrinsic by
20470
- // creating the appropriate sequence of scalable vector operations and
20471
- // then extracting a fixed-width subvector from the scalable vector.
20472
-
20473
- SDLoc DL(N);
20474
- SDValue ID =
20475
- DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
20476
-
20477
- EVT WhileVT = EVT::getVectorVT(
20478
- *DAG.getContext(), MVT::i1,
20479
- ElementCount::getScalable(VT.getVectorNumElements()));
20480
-
20481
- // Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
20482
- EVT PromVT = getPromotedVTForPredicate(WhileVT);
20483
-
20484
- // Get the fixed-width equivalent of PromVT for extraction.
20485
- EVT ExtVT =
20486
- EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(),
20487
- VT.getVectorElementCount());
20488
-
20489
- Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
20490
- N->getOperand(1), N->getOperand(2));
20491
- Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
20492
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
20493
- DAG.getConstant(0, DL, MVT::i64));
20494
- Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
20495
- }
20496
- return Res;
20497
- }
20498
20484
case Intrinsic::aarch64_neon_vcvtfxs2fp:
20499
20485
case Intrinsic::aarch64_neon_vcvtfxu2fp:
20500
20486
return tryCombineFixedPointConvert(N, DCI, DAG);
@@ -25568,15 +25554,15 @@ void AArch64TargetLowering::ReplaceNodeResults(
25568
25554
return;
25569
25555
case ISD::INTRINSIC_WO_CHAIN: {
25570
25556
EVT VT = N->getValueType(0);
25571
- assert((VT == MVT::i8 || VT == MVT::i16) &&
25572
- "custom lowering for unexpected type");
25573
25557
25574
25558
Intrinsic::ID IntID =
25575
25559
static_cast<Intrinsic::ID>(N->getConstantOperandVal(0));
25576
25560
switch (IntID) {
25577
25561
default:
25578
25562
return;
25579
25563
case Intrinsic::aarch64_sve_clasta_n: {
25564
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
25565
+ "custom lowering for unexpected type");
25580
25566
SDLoc DL(N);
25581
25567
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
25582
25568
auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
@@ -25585,6 +25571,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
25585
25571
return;
25586
25572
}
25587
25573
case Intrinsic::aarch64_sve_clastb_n: {
25574
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
25575
+ "custom lowering for unexpected type");
25588
25576
SDLoc DL(N);
25589
25577
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
25590
25578
auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
@@ -25593,19 +25581,39 @@ void AArch64TargetLowering::ReplaceNodeResults(
25593
25581
return;
25594
25582
}
25595
25583
case Intrinsic::aarch64_sve_lasta: {
25584
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
25585
+ "custom lowering for unexpected type");
25596
25586
SDLoc DL(N);
25597
25587
auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
25598
25588
N->getOperand(1), N->getOperand(2));
25599
25589
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
25600
25590
return;
25601
25591
}
25602
25592
case Intrinsic::aarch64_sve_lastb: {
25593
+ assert((VT == MVT::i8 || VT == MVT::i16) &&
25594
+ "custom lowering for unexpected type");
25603
25595
SDLoc DL(N);
25604
25596
auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
25605
25597
N->getOperand(1), N->getOperand(2));
25606
25598
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
25607
25599
return;
25608
25600
}
25601
+ case Intrinsic::get_active_lane_mask: {
25602
+ if (!VT.isFixedLengthVector())
25603
+ return;
25604
+ if (VT.getVectorElementType() != MVT::i1)
25605
+ return;
25606
+
25607
+ // NOTE: Only trivial type promotion is supported.
25608
+ EVT NewVT = getTypeToTransformTo(*DAG.getContext(), VT);
25609
+ if (NewVT.getVectorNumElements() != VT.getVectorNumElements())
25610
+ return;
25611
+
25612
+ SDLoc DL(N);
25613
+ auto V = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, NewVT, N->ops());
25614
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
25615
+ return;
25616
+ }
25609
25617
}
25610
25618
}
25611
25619
case ISD::READ_REGISTER: {
0 commit comments