Skip to content

Commit f8b4460

Browse files
[LLVM][AArch64] Improve big endian code generation for SVE BITCASTs. (#104769)
For the most part I've tried to maintain the use of ISD::BITCAST wherever possible so as to keep access to more DAG combines.
1 parent d43a809 commit f8b4460

File tree

3 files changed

+347
-895
lines changed

3 files changed

+347
-895
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,7 +1496,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14961496
setOperationAction(ISD::AVGCEILU, VT, Custom);
14971497

14981498
if (!Subtarget->isLittleEndian())
1499-
setOperationAction(ISD::BITCAST, VT, Expand);
1499+
setOperationAction(ISD::BITCAST, VT, Custom);
15001500

15011501
if (Subtarget->hasSVE2() ||
15021502
(Subtarget->hasSME() && Subtarget->isStreaming()))
@@ -1510,9 +1510,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
15101510
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
15111511
}
15121512

1513-
// Legalize unpacked bitcasts to REINTERPRET_CAST.
1514-
for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1515-
MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1513+
// Type legalize unpacked bitcasts.
1514+
for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
15161515
setOperationAction(ISD::BITCAST, VT, Custom);
15171516

15181517
for (auto VT :
@@ -1587,6 +1586,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
15871586

15881587
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
15891588
MVT::nxv4f32, MVT::nxv2f64}) {
1589+
setOperationAction(ISD::BITCAST, VT, Custom);
15901590
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
15911591
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
15921592
setOperationAction(ISD::MLOAD, VT, Custom);
@@ -1658,20 +1658,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
16581658
setCondCodeAction(ISD::SETUGT, VT, Expand);
16591659
setCondCodeAction(ISD::SETUEQ, VT, Expand);
16601660
setCondCodeAction(ISD::SETONE, VT, Expand);
1661-
1662-
if (!Subtarget->isLittleEndian())
1663-
setOperationAction(ISD::BITCAST, VT, Expand);
16641661
}
16651662

16661663
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1664+
setOperationAction(ISD::BITCAST, VT, Custom);
16671665
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
16681666
setOperationAction(ISD::MLOAD, VT, Custom);
16691667
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
16701668
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
16711669
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1672-
1673-
if (!Subtarget->isLittleEndian())
1674-
setOperationAction(ISD::BITCAST, VT, Expand);
16751670
}
16761671

16771672
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
@@ -4962,22 +4957,35 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
49624957
return LowerFixedLengthBitcastToSVE(Op, DAG);
49634958

49644959
if (OpVT.isScalableVector()) {
4965-
// Bitcasting between unpacked vector types of different element counts is
4966-
// not a NOP because the live elements are laid out differently.
4967-
// 01234567
4968-
// e.g. nxv2i32 = XX??XX??
4969-
// nxv4f16 = X?X?X?X?
4970-
if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount())
4971-
return SDValue();
4960+
assert(isTypeLegal(OpVT) && "Unexpected result type!");
49724961

4973-
if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
4962+
// Handle type legalisation first.
4963+
if (!isTypeLegal(ArgVT)) {
49744964
assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&
49754965
"Expected int->fp bitcast!");
4966+
4967+
// Bitcasting between unpacked vector types of different element counts is
4968+
// not a NOP because the live elements are laid out differently.
4969+
// 01234567
4970+
// e.g. nxv2i32 = XX??XX??
4971+
// nxv4f16 = X?X?X?X?
4972+
if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount())
4973+
return SDValue();
4974+
49764975
SDValue ExtResult =
49774976
DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
49784977
Op.getOperand(0));
49794978
return getSVESafeBitCast(OpVT, ExtResult, DAG);
49804979
}
4980+
4981+
// Bitcasts between legal types with the same element count are legal.
4982+
if (OpVT.getVectorElementCount() == ArgVT.getVectorElementCount())
4983+
return Op;
4984+
4985+
// getSVESafeBitCast does not support casting between unpacked types.
4986+
if (!isPackedVectorType(OpVT, DAG))
4987+
return SDValue();
4988+
49814989
return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
49824990
}
49834991

@@ -28906,7 +28914,22 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
2890628914
if (InVT != PackedInVT)
2890728915
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
2890828916

28909-
Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
28917+
if (Subtarget->isLittleEndian() ||
28918+
PackedVT.getScalarSizeInBits() == PackedInVT.getScalarSizeInBits())
28919+
Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
28920+
else {
28921+
EVT PackedVTAsInt = PackedVT.changeTypeToInteger();
28922+
EVT PackedInVTAsInt = PackedInVT.changeTypeToInteger();
28923+
28924+
// Simulate the effect of casting through memory.
28925+
Op = DAG.getNode(ISD::BITCAST, DL, PackedInVTAsInt, Op);
28926+
if (PackedInVTAsInt.getScalarSizeInBits() != 8)
28927+
Op = DAG.getNode(ISD::BSWAP, DL, PackedInVTAsInt, Op);
28928+
Op = DAG.getNode(AArch64ISD::NVCAST, DL, PackedVTAsInt, Op);
28929+
if (PackedVTAsInt.getScalarSizeInBits() != 8)
28930+
Op = DAG.getNode(ISD::BSWAP, DL, PackedVTAsInt, Op);
28931+
Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
28932+
}
2891028933

2891128934
// Unpack result if required.
2891228935
if (VT != PackedVT)

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 56 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -2650,113 +2650,62 @@ let Predicates = [HasSVEorSME] in {
26502650
sub_32)>;
26512651
}
26522652

2653-
// FIXME: BigEndian requires an additional REV instruction to satisfy the
2654-
// constraint that none of the bits change when stored to memory as one
2655-
// type, and reloaded as another type.
2656-
let Predicates = [IsLE] in {
2657-
def : Pat<(nxv16i8 (bitconvert nxv8i16:$src)), (nxv16i8 ZPR:$src)>;
2658-
def : Pat<(nxv16i8 (bitconvert nxv4i32:$src)), (nxv16i8 ZPR:$src)>;
2659-
def : Pat<(nxv16i8 (bitconvert nxv2i64:$src)), (nxv16i8 ZPR:$src)>;
2660-
def : Pat<(nxv16i8 (bitconvert nxv8f16:$src)), (nxv16i8 ZPR:$src)>;
2661-
def : Pat<(nxv16i8 (bitconvert nxv4f32:$src)), (nxv16i8 ZPR:$src)>;
2662-
def : Pat<(nxv16i8 (bitconvert nxv2f64:$src)), (nxv16i8 ZPR:$src)>;
2663-
2664-
def : Pat<(nxv8i16 (bitconvert nxv16i8:$src)), (nxv8i16 ZPR:$src)>;
2665-
def : Pat<(nxv8i16 (bitconvert nxv4i32:$src)), (nxv8i16 ZPR:$src)>;
2666-
def : Pat<(nxv8i16 (bitconvert nxv2i64:$src)), (nxv8i16 ZPR:$src)>;
2667-
def : Pat<(nxv8i16 (bitconvert nxv8f16:$src)), (nxv8i16 ZPR:$src)>;
2668-
def : Pat<(nxv8i16 (bitconvert nxv4f32:$src)), (nxv8i16 ZPR:$src)>;
2669-
def : Pat<(nxv8i16 (bitconvert nxv2f64:$src)), (nxv8i16 ZPR:$src)>;
2670-
2671-
def : Pat<(nxv4i32 (bitconvert nxv16i8:$src)), (nxv4i32 ZPR:$src)>;
2672-
def : Pat<(nxv4i32 (bitconvert nxv8i16:$src)), (nxv4i32 ZPR:$src)>;
2673-
def : Pat<(nxv4i32 (bitconvert nxv2i64:$src)), (nxv4i32 ZPR:$src)>;
2674-
def : Pat<(nxv4i32 (bitconvert nxv8f16:$src)), (nxv4i32 ZPR:$src)>;
2675-
def : Pat<(nxv4i32 (bitconvert nxv4f32:$src)), (nxv4i32 ZPR:$src)>;
2676-
def : Pat<(nxv4i32 (bitconvert nxv2f64:$src)), (nxv4i32 ZPR:$src)>;
2677-
2678-
def : Pat<(nxv2i64 (bitconvert nxv16i8:$src)), (nxv2i64 ZPR:$src)>;
2679-
def : Pat<(nxv2i64 (bitconvert nxv8i16:$src)), (nxv2i64 ZPR:$src)>;
2680-
def : Pat<(nxv2i64 (bitconvert nxv4i32:$src)), (nxv2i64 ZPR:$src)>;
2681-
def : Pat<(nxv2i64 (bitconvert nxv8f16:$src)), (nxv2i64 ZPR:$src)>;
2682-
def : Pat<(nxv2i64 (bitconvert nxv4f32:$src)), (nxv2i64 ZPR:$src)>;
2683-
def : Pat<(nxv2i64 (bitconvert nxv2f64:$src)), (nxv2i64 ZPR:$src)>;
2684-
2685-
def : Pat<(nxv8f16 (bitconvert nxv16i8:$src)), (nxv8f16 ZPR:$src)>;
2686-
def : Pat<(nxv8f16 (bitconvert nxv8i16:$src)), (nxv8f16 ZPR:$src)>;
2687-
def : Pat<(nxv8f16 (bitconvert nxv4i32:$src)), (nxv8f16 ZPR:$src)>;
2688-
def : Pat<(nxv8f16 (bitconvert nxv2i64:$src)), (nxv8f16 ZPR:$src)>;
2689-
def : Pat<(nxv8f16 (bitconvert nxv4f32:$src)), (nxv8f16 ZPR:$src)>;
2690-
def : Pat<(nxv8f16 (bitconvert nxv2f64:$src)), (nxv8f16 ZPR:$src)>;
2691-
2692-
def : Pat<(nxv4f32 (bitconvert nxv16i8:$src)), (nxv4f32 ZPR:$src)>;
2693-
def : Pat<(nxv4f32 (bitconvert nxv8i16:$src)), (nxv4f32 ZPR:$src)>;
2694-
def : Pat<(nxv4f32 (bitconvert nxv4i32:$src)), (nxv4f32 ZPR:$src)>;
2695-
def : Pat<(nxv4f32 (bitconvert nxv2i64:$src)), (nxv4f32 ZPR:$src)>;
2696-
def : Pat<(nxv4f32 (bitconvert nxv8f16:$src)), (nxv4f32 ZPR:$src)>;
2697-
def : Pat<(nxv4f32 (bitconvert nxv2f64:$src)), (nxv4f32 ZPR:$src)>;
2698-
2699-
def : Pat<(nxv2f64 (bitconvert nxv16i8:$src)), (nxv2f64 ZPR:$src)>;
2700-
def : Pat<(nxv2f64 (bitconvert nxv8i16:$src)), (nxv2f64 ZPR:$src)>;
2701-
def : Pat<(nxv2f64 (bitconvert nxv4i32:$src)), (nxv2f64 ZPR:$src)>;
2702-
def : Pat<(nxv2f64 (bitconvert nxv2i64:$src)), (nxv2f64 ZPR:$src)>;
2703-
def : Pat<(nxv2f64 (bitconvert nxv8f16:$src)), (nxv2f64 ZPR:$src)>;
2704-
def : Pat<(nxv2f64 (bitconvert nxv4f32:$src)), (nxv2f64 ZPR:$src)>;
2705-
2706-
def : Pat<(nxv8bf16 (bitconvert nxv16i8:$src)), (nxv8bf16 ZPR:$src)>;
2707-
def : Pat<(nxv8bf16 (bitconvert nxv8i16:$src)), (nxv8bf16 ZPR:$src)>;
2708-
def : Pat<(nxv8bf16 (bitconvert nxv4i32:$src)), (nxv8bf16 ZPR:$src)>;
2709-
def : Pat<(nxv8bf16 (bitconvert nxv2i64:$src)), (nxv8bf16 ZPR:$src)>;
2710-
def : Pat<(nxv8bf16 (bitconvert nxv8f16:$src)), (nxv8bf16 ZPR:$src)>;
2711-
def : Pat<(nxv8bf16 (bitconvert nxv4f32:$src)), (nxv8bf16 ZPR:$src)>;
2712-
def : Pat<(nxv8bf16 (bitconvert nxv2f64:$src)), (nxv8bf16 ZPR:$src)>;
2713-
2714-
def : Pat<(nxv16i8 (bitconvert nxv8bf16:$src)), (nxv16i8 ZPR:$src)>;
2715-
def : Pat<(nxv8i16 (bitconvert nxv8bf16:$src)), (nxv8i16 ZPR:$src)>;
2716-
def : Pat<(nxv4i32 (bitconvert nxv8bf16:$src)), (nxv4i32 ZPR:$src)>;
2717-
def : Pat<(nxv2i64 (bitconvert nxv8bf16:$src)), (nxv2i64 ZPR:$src)>;
2718-
def : Pat<(nxv8f16 (bitconvert nxv8bf16:$src)), (nxv8f16 ZPR:$src)>;
2719-
def : Pat<(nxv4f32 (bitconvert nxv8bf16:$src)), (nxv4f32 ZPR:$src)>;
2720-
def : Pat<(nxv2f64 (bitconvert nxv8bf16:$src)), (nxv2f64 ZPR:$src)>;
2721-
2722-
def : Pat<(nxv16i1 (bitconvert aarch64svcount:$src)), (nxv16i1 PPR:$src)>;
2723-
def : Pat<(aarch64svcount (bitconvert nxv16i1:$src)), (aarch64svcount PNR:$src)>;
2724-
}
2725-
2726-
// These allow casting from/to unpacked predicate types.
2727-
def : Pat<(nxv16i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2728-
def : Pat<(nxv16i1 (reinterpret_cast nxv8i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2729-
def : Pat<(nxv16i1 (reinterpret_cast nxv4i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2730-
def : Pat<(nxv16i1 (reinterpret_cast nxv2i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2731-
def : Pat<(nxv16i1 (reinterpret_cast nxv1i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2732-
def : Pat<(nxv8i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2733-
def : Pat<(nxv8i1 (reinterpret_cast nxv4i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2734-
def : Pat<(nxv8i1 (reinterpret_cast nxv2i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2735-
def : Pat<(nxv8i1 (reinterpret_cast nxv1i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2736-
def : Pat<(nxv4i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2737-
def : Pat<(nxv4i1 (reinterpret_cast nxv8i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2738-
def : Pat<(nxv4i1 (reinterpret_cast nxv2i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2739-
def : Pat<(nxv4i1 (reinterpret_cast nxv1i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2740-
def : Pat<(nxv2i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2741-
def : Pat<(nxv2i1 (reinterpret_cast nxv8i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2742-
def : Pat<(nxv2i1 (reinterpret_cast nxv4i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2743-
def : Pat<(nxv2i1 (reinterpret_cast nxv1i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2744-
def : Pat<(nxv1i1 (reinterpret_cast nxv16i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2745-
def : Pat<(nxv1i1 (reinterpret_cast nxv8i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2746-
def : Pat<(nxv1i1 (reinterpret_cast nxv4i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2747-
def : Pat<(nxv1i1 (reinterpret_cast nxv2i1:$src)), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2748-
2749-
// These allow casting from/to unpacked floating-point types.
2750-
def : Pat<(nxv2f16 (reinterpret_cast nxv8f16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2751-
def : Pat<(nxv8f16 (reinterpret_cast nxv2f16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2752-
def : Pat<(nxv4f16 (reinterpret_cast nxv8f16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2753-
def : Pat<(nxv8f16 (reinterpret_cast nxv4f16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2754-
def : Pat<(nxv2f32 (reinterpret_cast nxv4f32:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2755-
def : Pat<(nxv4f32 (reinterpret_cast nxv2f32:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2756-
def : Pat<(nxv2bf16 (reinterpret_cast nxv8bf16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2757-
def : Pat<(nxv8bf16 (reinterpret_cast nxv2bf16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2758-
def : Pat<(nxv4bf16 (reinterpret_cast nxv8bf16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2759-
def : Pat<(nxv8bf16 (reinterpret_cast nxv4bf16:$src)), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2653+
// For big endian, only BITCASTs involving same sized vector types with same
2654+
// size vector elements can be isel'd directly.
2655+
let Predicates = [IsLE] in
2656+
foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
2657+
foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
2658+
if !ne(VT,VT2) then
2659+
def : Pat<(VT (bitconvert (VT2 ZPR:$src))), (VT ZPR:$src)>;
2660+
2661+
def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>;
2662+
def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
2663+
2664+
def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>;
2665+
def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>;
2666+
2667+
def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>;
2668+
def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>;
2669+
2670+
def : Pat<(nxv8i16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8i16 ZPR:$src)>;
2671+
def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
2672+
2673+
def : Pat<(nxv8bf16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
2674+
def : Pat<(nxv8f16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
2675+
2676+
def : Pat<(nxv4bf16 (bitconvert (nxv4f16 ZPR:$src))), (nxv4bf16 ZPR:$src)>;
2677+
def : Pat<(nxv4f16 (bitconvert (nxv4bf16 ZPR:$src))), (nxv4f16 ZPR:$src)>;
2678+
2679+
def : Pat<(nxv2bf16 (bitconvert (nxv2f16 ZPR:$src))), (nxv2bf16 ZPR:$src)>;
2680+
def : Pat<(nxv2f16 (bitconvert (nxv2bf16 ZPR:$src))), (nxv2f16 ZPR:$src)>;
2681+
2682+
def : Pat<(nxv16i1 (bitconvert (aarch64svcount PNR:$src))), (nxv16i1 PPR:$src)>;
2683+
def : Pat<(aarch64svcount (bitconvert (nxv16i1 PPR:$src))), (aarch64svcount PNR:$src)>;
2684+
2685+
// These allow nop casting between predicate vector types.
2686+
foreach VT = [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ] in
2687+
foreach VT2 = [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ] in
2688+
def : Pat<(VT (reinterpret_cast (VT2 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
2689+
2690+
// These allow nop casting between half vector types.
2691+
foreach VT = [ nxv2f16, nxv4f16, nxv8f16 ] in
2692+
foreach VT2 = [ nxv2f16, nxv4f16, nxv8f16 ] in
2693+
def : Pat<(VT (reinterpret_cast (VT2 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2694+
2695+
// These allow nop casting between float vector types.
2696+
foreach VT = [ nxv2f32, nxv4f32 ] in
2697+
foreach VT2 = [ nxv2f32, nxv4f32 ] in
2698+
def : Pat<(VT (reinterpret_cast (VT2 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2699+
2700+
// These allow nop casting between bfloat vector types.
2701+
foreach VT = [ nxv2bf16, nxv4bf16, nxv8bf16 ] in
2702+
foreach VT2 = [ nxv2bf16, nxv4bf16, nxv8bf16 ] in
2703+
def : Pat<(VT (reinterpret_cast (VT2 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
2704+
2705+
// These allow nop casting between all packed vector types.
2706+
foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
2707+
foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
2708+
def : Pat<(VT (AArch64NvCast (VT2 ZPR:$src))), (VT ZPR:$src)>;
27602709

27612710
def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)),
27622711
(AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>;

0 commit comments

Comments
 (0)