Skip to content

Commit 0ddb203

Browse files
committed
[SveEmitter] Add builtins for compares and ReverseCompare flag.
The IsReverseCompare flag tells CGBuiltin to swap the operands, so that a LT/LE intrinsics can be expressed in terms of GE/GT intrinsics. This patch also adds builtins for the wide-variants of the compares. Reviewers: SjoerdMeijer, efriedma, ctetreau Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78747
1 parent aed67a3 commit 0ddb203

File tree

11 files changed

+2160
-0
lines changed

11 files changed

+2160
-0
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ namespace clang {
239239
bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; }
240240
bool isOverloadCvt() const { return Flags & IsOverloadCvt; }
241241
bool isPrefetch() const { return Flags & IsPrefetch; }
242+
bool isReverseCompare() const { return Flags & ReverseCompare; }
242243

243244
uint64_t getBits() const { return Flags; }
244245
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }

clang/include/clang/Basic/arm_sve.td

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@
7070
// q: 1/4 width elements, 4x element count
7171
// o: 4x width elements, 1/4 element count
7272
//
73+
// w: vector of element type promoted to 64bits, vector maintains
74+
// signedness of its element type.
75+
// j: element type promoted to 64bits (splat to vector type)
7376
// i: constant uint64_t
7477
// k: int32_t
7578
// l: int64_t
@@ -179,6 +182,7 @@ def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0),
179182
def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
180183
def IsByteIndexed : FlagType<0x01000000>;
181184
def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches.
185+
def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped.
182186

183187
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
184188
class ImmCheckType<int val> {
@@ -610,6 +614,53 @@ def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNo
610614
// Shifts
611615
def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>;
612616

617+
////////////////////////////////////////////////////////////////////////////////
618+
// Integer comparisons
619+
620+
def SVCMPEQ : SInst<"svcmpeq[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq">;
621+
def SVCMPNE : SInst<"svcmpne[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne">;
622+
def SVCMPGE : SInst<"svcmpge[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge">;
623+
def SVCMPGT : SInst<"svcmpgt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt">;
624+
def SVCMPLE : SInst<"svcmple[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare]>;
625+
def SVCMPLT : SInst<"svcmplt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare]>;
626+
def SVCMPHI : SInst<"svcmpgt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi">;
627+
def SVCMPHS : SInst<"svcmpge[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs">;
628+
def SVCMPLO : SInst<"svcmplt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare]>;
629+
def SVCMPLS : SInst<"svcmple[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare]>;
630+
631+
def SVCMPEQ_N : SInst<"svcmpeq[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq">;
632+
def SVCMPNE_N : SInst<"svcmpne[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne">;
633+
def SVCMPGE_N : SInst<"svcmpge[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge">;
634+
def SVCMPGT_N : SInst<"svcmpgt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt">;
635+
def SVCMPLE_N : SInst<"svcmple[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare]>;
636+
def SVCMPLT_N : SInst<"svcmplt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare]>;
637+
def SVCMPHS_N : SInst<"svcmpge[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs">;
638+
def SVCMPHI_N : SInst<"svcmpgt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi">;
639+
def SVCMPLS_N : SInst<"svcmple[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare]>;
640+
def SVCMPLO_N : SInst<"svcmplt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare]>;
641+
642+
def SVCMPEQ_WIDE : SInst<"svcmpeq_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpeq_wide">;
643+
def SVCMPNE_WIDE : SInst<"svcmpne_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpne_wide">;
644+
def SVCMPGE_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpge_wide">;
645+
def SVCMPGT_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpgt_wide">;
646+
def SVCMPLE_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmple_wide">;
647+
def SVCMPLT_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmplt_wide">;
648+
def SVCMPHI_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide">;
649+
def SVCMPHS_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide">;
650+
def SVCMPLO_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide">;
651+
def SVCMPLS_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide">;
652+
653+
def SVCMPEQ_WIDE_N : SInst<"svcmpeq_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpeq_wide">;
654+
def SVCMPNE_WIDE_N : SInst<"svcmpne_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpne_wide">;
655+
def SVCMPGE_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpge_wide">;
656+
def SVCMPGT_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpgt_wide">;
657+
def SVCMPLE_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmple_wide">;
658+
def SVCMPLT_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmplt_wide">;
659+
def SVCMPHS_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide">;
660+
def SVCMPHI_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide">;
661+
def SVCMPLO_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide">;
662+
def SVCMPLS_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide">;
663+
613664
////////////////////////////////////////////////////////////////////////////////
614665
// SVE2 - Narrowing DSP operations
615666
let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {
@@ -645,6 +696,36 @@ def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftma
645696
def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
646697
def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>,
647698
ImmCheck<4, ImmCheckComplexRotAll90>]>;
699+
700+
////////////////////////////////////////////////////////////////////////////////
701+
// Floating-point comparisons
702+
703+
def SVACGE : SInst<"svacge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge">;
704+
def SVACGT : SInst<"svacgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt">;
705+
def SVACLE : SInst<"svacle[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge", [ReverseCompare]>;
706+
def SVACLT : SInst<"svaclt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt", [ReverseCompare]>;
707+
def SVCMPUO : SInst<"svcmpuo[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpuo">;
708+
709+
def SVACGE_N : SInst<"svacge[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facge">;
710+
def SVACGT_N : SInst<"svacgt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facgt">;
711+
def SVACLE_N : SInst<"svacle[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facge", [ReverseCompare]>;
712+
def SVACLT_N : SInst<"svaclt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facgt", [ReverseCompare]>;
713+
def SVCMPUO_N : SInst<"svcmpuo[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpuo">;
714+
715+
def SVCMPEQ_F : SInst<"svcmpeq[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpeq">;
716+
def SVCMPNE_F : SInst<"svcmpne[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpne">;
717+
def SVCMPGE_F : SInst<"svcmpge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge">;
718+
def SVCMPGT_F : SInst<"svcmpgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt">;
719+
def SVCMPLE_F : SInst<"svcmple[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare]>;
720+
def SVCMPLT_F : SInst<"svcmplt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare]>;
721+
722+
def SVCMPEQ_F_N : SInst<"svcmpeq[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpeq">;
723+
def SVCMPNE_F_N : SInst<"svcmpne[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpne">;
724+
def SVCMPGE_F_N : SInst<"svcmpge[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge">;
725+
def SVCMPGT_F_N : SInst<"svcmpgt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt">;
726+
def SVCMPLE_F_N : SInst<"svcmple[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare]>;
727+
def SVCMPLT_F_N : SInst<"svcmplt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare]>;
728+
648729
////////////////////////////////////////////////////////////////////////////////
649730
// Floating-point conversions
650731

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7892,6 +7892,9 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
78927892
Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
78937893
}
78947894

7895+
if (TypeFlags.isReverseCompare())
7896+
std::swap(Ops[1], Ops[2]);
7897+
78957898
// Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
78967899
if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
78977900
llvm::Type *OpndTy = Ops[1]->getType();

0 commit comments

Comments
 (0)