Skip to content

[AArch64][SME2] Refine fcvtu/fcvts/scvtf/ucvtf #77947

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion clang/include/clang/Basic/TargetBuiltins.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,9 @@ namespace clang {
bool isZExtReturn() const { return Flags & IsZExtReturn; }
bool isByteIndexed() const { return Flags & IsByteIndexed; }
bool isOverloadNone() const { return Flags & IsOverloadNone; }
bool isOverloadWhile() const { return Flags & IsOverloadWhile; }
bool isOverloadWhileOrMultiVecCvt() const {
return Flags & IsOverloadWhileOrMultiVecCvt;
}
bool isOverloadDefault() const { return !(Flags & OverloadKindMask); }
bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; }
bool isOverloadCvt() const { return Flags & IsOverloadCvt; }
Expand Down
48 changes: 24 additions & 24 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -749,14 +749,14 @@ def SVCMPLS_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone,
////////////////////////////////////////////////////////////////////////////////
// While comparisons

def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;

////////////////////////////////////////////////////////////////////////////////
// Counting bit
Expand Down Expand Up @@ -1336,14 +1336,14 @@ let TargetGuard = "sve2p1|sme2" in {
////////////////////////////////////////////////////////////////////////////////
// SVE2 WhileGE/GT
let TargetGuard = "sve2" in {
def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>;
def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>;
}

let TargetGuard = "sve2p1|sme2" in {
Expand Down Expand Up @@ -2244,15 +2244,15 @@ let TargetGuard = "sme2" in {
def SVCVT_F16_X2 : SInst<"svcvt_f16[_f32_x2]", "e2", "f", MergeNone, "aarch64_sve_fcvt_x2", [IsStreaming],[]>;
def SVCVT_BF16_X2 : SInst<"svcvt_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvt_x2", [IsOverloadNone, IsStreaming],[]>;

def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming], []>;
def SVCVT_U32_F32_X2 : SInst<"svcvt_u32[_{d}_x2]", "2.u2.d", "f", MergeNone, "aarch64_sve_fcvtu_x2", [IsStreaming], []>;
def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming], []>;
def SVCVT_S32_F32_X2 : SInst<"svcvt_s32[_{d}_x2]", "2.x2.d", "f", MergeNone, "aarch64_sve_fcvts_x2", [IsStreaming], []>;
def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
def SVCVT_U32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
def SVCVT_S32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "i", MergeNone, "aarch64_sve_fcvtzs_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;

def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming], []>;
def SVCVT_U32_F32_X4 : SInst<"svcvt_u32[_{d}_x4]", "4.u4.d", "f", MergeNone, "aarch64_sve_fcvtu_x4", [IsStreaming], []>;
def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming], []>;
def SVCVT_S32_F32_X4 : SInst<"svcvt_s32[_{d}_x4]", "4.x4.d", "f", MergeNone, "aarch64_sve_fcvts_x4", [IsStreaming], []>;
def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
def SVCVT_U32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
def SVCVT_S32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "i", MergeNone, "aarch64_sve_fcvtzs_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>;
}

//
Expand Down
70 changes: 35 additions & 35 deletions clang/include/clang/Basic/arm_sve_sme_incl.td
Original file line number Diff line number Diff line change
Expand Up @@ -194,41 +194,41 @@ def FirstSplatOperand : FlagType<0x00000400>;
// These flags are used to specify which scalar operand
// needs to be duplicated/splatted into a vector.
// : :
def SplatOperandMask : FlagType<0x00001C00>;
def IsLoad : FlagType<0x00002000>;
def IsStore : FlagType<0x00004000>;
def IsGatherLoad : FlagType<0x00008000>;
def IsScatterStore : FlagType<0x00010000>;
def IsStructLoad : FlagType<0x00020000>;
def IsStructStore : FlagType<0x00040000>;
def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default
def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types.
def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types.
def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types.
def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
def IsByteIndexed : FlagType<0x01000000>;
def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
def IsGatherPrefetch : FlagType<0x10000000>;
def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type.
def IsTupleCreate : FlagType<0x100000000>;
def IsTupleGet : FlagType<0x200000000>;
def IsTupleSet : FlagType<0x400000000>;
def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X.
def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X.
def IsStreaming : FlagType<0x2000000000>;
def IsStreamingCompatible : FlagType<0x4000000000>;
def IsReadZA : FlagType<0x8000000000>;
def IsWriteZA : FlagType<0x10000000000>;
def IsReductionQV : FlagType<0x20000000000>;
def IsStreamingOrSVE2p1 : FlagType<0x40000000000>; // Use for intrinsics that are common between sme/sme2 and sve2p1.
def IsInZA : FlagType<0x80000000000>;
def IsOutZA : FlagType<0x100000000000>;
def IsInOutZA : FlagType<0x200000000000>;
def SplatOperandMask : FlagType<0x00001C00>;
def IsLoad : FlagType<0x00002000>;
def IsStore : FlagType<0x00004000>;
def IsGatherLoad : FlagType<0x00008000>;
def IsScatterStore : FlagType<0x00010000>;
def IsStructLoad : FlagType<0x00020000>;
def IsStructStore : FlagType<0x00040000>;
def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default
def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types.
def IsOverloadWhileOrMultiVecCvt : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types.
def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types.
def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
def IsByteIndexed : FlagType<0x01000000>;
def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
def IsGatherPrefetch : FlagType<0x10000000>;
def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped.
def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type.
def IsTupleCreate : FlagType<0x100000000>;
def IsTupleGet : FlagType<0x200000000>;
def IsTupleSet : FlagType<0x400000000>;
def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X.
def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X.
def IsStreaming : FlagType<0x2000000000>;
def IsStreamingCompatible : FlagType<0x4000000000>;
def IsReadZA : FlagType<0x8000000000>;
def IsWriteZA : FlagType<0x10000000000>;
def IsReductionQV : FlagType<0x20000000000>;
def IsStreamingOrSVE2p1 : FlagType<0x40000000000>; // Use for intrinsics that are common between sme/sme2 and sve2p1.
def IsInZA : FlagType<0x80000000000>;
def IsOutZA : FlagType<0x100000000000>;
def IsInOutZA : FlagType<0x200000000000>;

// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
class ImmCheckType<int val> {
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10056,7 +10056,7 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,

llvm::Type *DefaultType = getSVEType(TypeFlags);

if (TypeFlags.isOverloadWhile())
if (TypeFlags.isOverloadWhileOrMultiVecCvt())
return {DefaultType, Ops[1]->getType()};

if (TypeFlags.isOverloadWhileRW())
Expand Down
Loading