Skip to content

Commit 27e42e1

Browse files
committed
[AARCH64][Neon] switch to using bitcasts in arm_neon.h where appropriate
1 parent 606e0b4 commit 27e42e1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+56329
-36394
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ namespace clang {
263263
EltType ET = getEltType();
264264
return ET == Poly8 || ET == Poly16 || ET == Poly64;
265265
}
266+
bool isFloatingPoint() const {
267+
EltType ET = getEltType();
268+
return ET == Float16 || ET == Float32 || ET == Float64 || ET == BFloat16;
269+
}
266270
bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
267271
bool isQuad() const { return (Flags & QuadFlag) != 0; }
268272
unsigned getEltSizeInBits() const {

clang/include/clang/Basic/arm_neon.td

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ def OP_MLAL : Op<(op "+", $p0, (call "vmull", $p1, $p2))>;
3131
def OP_MULLHi : Op<(call "vmull", (call "vget_high", $p0),
3232
(call "vget_high", $p1))>;
3333
def OP_MULLHi_P64 : Op<(call "vmull",
34-
(cast "poly64_t", (call "vget_high", $p0)),
35-
(cast "poly64_t", (call "vget_high", $p1)))>;
34+
(bitcast "poly64_t", (call "vget_high", $p0)),
35+
(bitcast "poly64_t", (call "vget_high", $p1)))>;
3636
def OP_MULLHi_N : Op<(call "vmull_n", (call "vget_high", $p0), $p1)>;
3737
def OP_MLALHi : Op<(call "vmlal", $p0, (call "vget_high", $p1),
3838
(call "vget_high", $p2))>;
@@ -95,11 +95,11 @@ def OP_TRN2 : Op<(shuffle $p0, $p1, (interleave
9595
def OP_ZIP2 : Op<(shuffle $p0, $p1, (highhalf (interleave mask0, mask1)))>;
9696
def OP_UZP2 : Op<(shuffle $p0, $p1, (add (decimate (rotl mask0, 1), 2),
9797
(decimate (rotl mask1, 1), 2)))>;
98-
def OP_EQ : Op<(cast "R", (op "==", $p0, $p1))>;
99-
def OP_GE : Op<(cast "R", (op ">=", $p0, $p1))>;
100-
def OP_LE : Op<(cast "R", (op "<=", $p0, $p1))>;
101-
def OP_GT : Op<(cast "R", (op ">", $p0, $p1))>;
102-
def OP_LT : Op<(cast "R", (op "<", $p0, $p1))>;
98+
def OP_EQ : Op<(bitcast "R", (op "==", $p0, $p1))>;
99+
def OP_GE : Op<(bitcast "R", (op ">=", $p0, $p1))>;
100+
def OP_LE : Op<(bitcast "R", (op "<=", $p0, $p1))>;
101+
def OP_GT : Op<(bitcast "R", (op ">", $p0, $p1))>;
102+
def OP_LT : Op<(bitcast "R", (op "<", $p0, $p1))>;
103103
def OP_NEG : Op<(op "-", $p0)>;
104104
def OP_NOT : Op<(op "~", $p0)>;
105105
def OP_AND : Op<(op "&", $p0, $p1)>;
@@ -108,33 +108,33 @@ def OP_XOR : Op<(op "^", $p0, $p1)>;
108108
def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>;
109109
def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>;
110110
def OP_CAST : LOp<[(save_temp $promote, $p0),
111-
(cast "R", $promote)]>;
111+
(bitcast "R", $promote)]>;
112112
def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>;
113113
def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>;
114114
def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>;
115115
def OP_DUP : Op<(dup $p0)>;
116116
def OP_DUP_LN : Op<(call_mangled "splat_lane", $p0, $p1)>;
117-
def OP_SEL : Op<(cast "R", (op "|",
118-
(op "&", $p0, (cast $p0, $p1)),
119-
(op "&", (op "~", $p0), (cast $p0, $p2))))>;
117+
def OP_SEL : Op<(bitcast "R", (op "|",
118+
(op "&", $p0, (bitcast $p0, $p1)),
119+
(op "&", (op "~", $p0), (bitcast $p0, $p2))))>;
120120
def OP_REV16 : Op<(shuffle $p0, $p0, (rev 16, mask0))>;
121121
def OP_REV32 : Op<(shuffle $p0, $p0, (rev 32, mask0))>;
122122
def OP_REV64 : Op<(shuffle $p0, $p0, (rev 64, mask0))>;
123123
def OP_XTN : Op<(call "vcombine", $p0, (call "vmovn", $p1))>;
124-
def OP_SQXTUN : Op<(call "vcombine", (cast $p0, "U", $p0),
124+
def OP_SQXTUN : Op<(call "vcombine", (bitcast $p0, "U", $p0),
125125
(call "vqmovun", $p1))>;
126126
def OP_QXTN : Op<(call "vcombine", $p0, (call "vqmovn", $p1))>;
127127
def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16_f32", $p1))>;
128128
def OP_VCVT_NA_HI_F32 : Op<(call "vcombine", $p0, (call "vcvt_f32_f64", $p1))>;
129129
def OP_VCVT_EX_HI_F32 : Op<(call "vcvt_f32_f16", (call "vget_high", $p0))>;
130130
def OP_VCVT_EX_HI_F64 : Op<(call "vcvt_f64_f32", (call "vget_high", $p0))>;
131131
def OP_VCVTX_HI : Op<(call "vcombine", $p0, (call "vcvtx_f32", $p1))>;
132-
def OP_REINT : Op<(cast "R", $p0)>;
132+
def OP_REINT : Op<(bitcast "R", $p0)>;
133133
def OP_ADDHNHi : Op<(call "vcombine", $p0, (call "vaddhn", $p1, $p2))>;
134134
def OP_RADDHNHi : Op<(call "vcombine", $p0, (call "vraddhn", $p1, $p2))>;
135135
def OP_SUBHNHi : Op<(call "vcombine", $p0, (call "vsubhn", $p1, $p2))>;
136136
def OP_RSUBHNHi : Op<(call "vcombine", $p0, (call "vrsubhn", $p1, $p2))>;
137-
def OP_ABDL : Op<(cast "R", (call "vmovl", (cast $p0, "U",
137+
def OP_ABDL : Op<(bitcast "R", (call "vmovl", (bitcast $p0, "U",
138138
(call "vabd", $p0, $p1))))>;
139139
def OP_ABDLHi : Op<(call "vabdl", (call "vget_high", $p0),
140140
(call "vget_high", $p1))>;
@@ -152,15 +152,15 @@ def OP_QDMLSLHi : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
152152
(call "vget_high", $p2))>;
153153
def OP_QDMLSLHi_N : Op<(call "vqdmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
154154
def OP_DIV : Op<(op "/", $p0, $p1)>;
155-
def OP_LONG_HI : Op<(cast "R", (call (name_replace "_high_", "_"),
155+
def OP_LONG_HI : Op<(bitcast "R", (call (name_replace "_high_", "_"),
156156
(call "vget_high", $p0), $p1))>;
157-
def OP_NARROW_HI : Op<(cast "R", (call "vcombine",
158-
(cast "R", "H", $p0),
159-
(cast "R", "H",
157+
def OP_NARROW_HI : Op<(bitcast "R", (call "vcombine",
158+
(bitcast "R", "H", $p0),
159+
(bitcast "R", "H",
160160
(call (name_replace "_high_", "_"),
161161
$p1, $p2))))>;
162162
def OP_MOVL_HI : LOp<[(save_temp $a1, (call "vget_high", $p0)),
163-
(cast "R",
163+
(bitcast "R",
164164
(call "vshll_n", $a1, (literal "int32_t", "0")))]>;
165165
def OP_COPY_LN : Op<(call "vset_lane", (call "vget_lane", $p2, $p3), $p0, $p1)>;
166166
def OP_SCALAR_MUL_LN : Op<(op "*", $p0, (call "vget_lane", $p1, $p2))>;
@@ -221,18 +221,18 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1,
221221

222222
def OP_USDOT_LN
223223
: Op<(call "vusdot", $p0, $p1,
224-
(cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
224+
(bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
225225
def OP_USDOT_LNQ
226226
: Op<(call "vusdot", $p0, $p1,
227-
(cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
227+
(bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
228228

229229
// sudot splats the second vector and then calls vusdot
230230
def OP_SUDOT_LN
231231
: Op<(call "vusdot", $p0,
232-
(cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
232+
(bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
233233
def OP_SUDOT_LNQ
234234
: Op<(call "vusdot", $p0,
235-
(cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
235+
(bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
236236

237237
def OP_BFDOT_LN
238238
: Op<(call "vbfdot", $p0, $p1,
@@ -263,7 +263,7 @@ def OP_VCVT_BF16_F32_A32
263263
: Op<(call "__a32_vcvt_bf16", $p0)>;
264264

265265
def OP_VCVT_BF16_F32_LO_A32
266-
: Op<(call "vcombine", (cast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
266+
: Op<(call "vcombine", (bitcast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
267267
(call "__a32_vcvt_bf16", $p0))>;
268268
def OP_VCVT_BF16_F32_HI_A32
269269
: Op<(call "vcombine", (call "__a32_vcvt_bf16", $p1),
@@ -924,12 +924,12 @@ def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>;
924924
def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>;
925925
def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>;
926926

927-
def CMEQ : SInst<"vceqz", "U.",
927+
def CMEQ : SInst<"vceqz", "U(.!)",
928928
"csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">;
929-
def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">;
930-
def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">;
931-
def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">;
932-
def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">;
929+
def CMGE : SInst<"vcgez", "U(.!)", "csilfdQcQsQiQlQfQd">;
930+
def CMLE : SInst<"vclez", "U(.!)", "csilfdQcQsQiQlQfQd">;
931+
def CMGT : SInst<"vcgtz", "U(.!)", "csilfdQcQsQiQlQfQd">;
932+
def CMLT : SInst<"vcltz", "U(.!)", "csilfdQcQsQiQlQfQd">;
933933

934934
////////////////////////////////////////////////////////////////////////////////
935935
// Max/Min Integer
@@ -1667,11 +1667,11 @@ let TargetGuard = "fullfp16,neon" in {
16671667
// ARMv8.2-A FP16 one-operand vector intrinsics.
16681668

16691669
// Comparison
1670-
def CMEQH : SInst<"vceqz", "U.", "hQh">;
1671-
def CMGEH : SInst<"vcgez", "U.", "hQh">;
1672-
def CMGTH : SInst<"vcgtz", "U.", "hQh">;
1673-
def CMLEH : SInst<"vclez", "U.", "hQh">;
1674-
def CMLTH : SInst<"vcltz", "U.", "hQh">;
1670+
def CMEQH : SInst<"vceqz", "U(.!)", "hQh">;
1671+
def CMGEH : SInst<"vcgez", "U(.!)", "hQh">;
1672+
def CMGTH : SInst<"vcgtz", "U(.!)", "hQh">;
1673+
def CMLEH : SInst<"vclez", "U(.!)", "hQh">;
1674+
def CMLTH : SInst<"vcltz", "U(.!)", "hQh">;
16751675

16761676
// Vector conversion
16771677
def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">;

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4694,10 +4694,10 @@ class CodeGenFunction : public CodeGenTypeCache {
46944694
llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
46954695
ReturnValueSlot ReturnValue);
46964696

4697-
llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
4698-
const llvm::CmpInst::Predicate Fp,
4699-
const llvm::CmpInst::Predicate Ip,
4700-
const llvm::Twine &Name = "");
4697+
llvm::Value *
4698+
EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
4699+
const llvm::CmpInst::Predicate Pred,
4700+
const llvm::Twine &Name = "");
47014701
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
47024702
ReturnValueSlot ReturnValue,
47034703
llvm::Triple::ArchType Arch);

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 66 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,8 +1750,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
17501750

17511751
// Determine the type of this overloaded NEON intrinsic.
17521752
NeonTypeFlags Type(NeonTypeConst->getZExtValue());
1753-
bool Usgn = Type.isUnsigned();
1754-
bool Quad = Type.isQuad();
1753+
const bool Usgn = Type.isUnsigned();
1754+
const bool Quad = Type.isQuad();
1755+
const bool Floating = Type.isFloatingPoint();
17551756
const bool HasLegalHalfType = getTarget().hasLegalHalfType();
17561757
const bool AllowBFloatArgsAndRet =
17571758
getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
@@ -1852,24 +1853,28 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
18521853
}
18531854
case NEON::BI__builtin_neon_vceqz_v:
18541855
case NEON::BI__builtin_neon_vceqzq_v:
1855-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
1856-
ICmpInst::ICMP_EQ, "vceqz");
1856+
return EmitAArch64CompareBuiltinExpr(
1857+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
18571858
case NEON::BI__builtin_neon_vcgez_v:
18581859
case NEON::BI__builtin_neon_vcgezq_v:
1859-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
1860-
ICmpInst::ICMP_SGE, "vcgez");
1860+
return EmitAArch64CompareBuiltinExpr(
1861+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
1862+
"vcgez");
18611863
case NEON::BI__builtin_neon_vclez_v:
18621864
case NEON::BI__builtin_neon_vclezq_v:
1863-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
1864-
ICmpInst::ICMP_SLE, "vclez");
1865+
return EmitAArch64CompareBuiltinExpr(
1866+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
1867+
"vclez");
18651868
case NEON::BI__builtin_neon_vcgtz_v:
18661869
case NEON::BI__builtin_neon_vcgtzq_v:
1867-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
1868-
ICmpInst::ICMP_SGT, "vcgtz");
1870+
return EmitAArch64CompareBuiltinExpr(
1871+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
1872+
"vcgtz");
18691873
case NEON::BI__builtin_neon_vcltz_v:
18701874
case NEON::BI__builtin_neon_vcltzq_v:
1871-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
1872-
ICmpInst::ICMP_SLT, "vcltz");
1875+
return EmitAArch64CompareBuiltinExpr(
1876+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
1877+
"vcltz");
18731878
case NEON::BI__builtin_neon_vclz_v:
18741879
case NEON::BI__builtin_neon_vclzq_v:
18751880
// We generate target-independent intrinsic, which needs a second argument
@@ -2432,28 +2437,32 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
24322437
return Builder.CreateBitCast(Result, ResultType, NameHint);
24332438
}
24342439

2435-
Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
2436-
Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
2437-
const CmpInst::Predicate Ip, const Twine &Name) {
2438-
llvm::Type *OTy = Op->getType();
2439-
2440-
// FIXME: this is utterly horrific. We should not be looking at previous
2441-
// codegen context to find out what needs doing. Unfortunately TableGen
2442-
// currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
2443-
// (etc).
2444-
if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
2445-
OTy = BI->getOperand(0)->getType();
2446-
2447-
Op = Builder.CreateBitCast(Op, OTy);
2448-
if (OTy->getScalarType()->isFloatingPointTy()) {
2449-
if (Fp == CmpInst::FCMP_OEQ)
2450-
Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
2440+
Value *
2441+
CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
2442+
const CmpInst::Predicate Pred,
2443+
const Twine &Name) {
2444+
2445+
if (isa<FixedVectorType>(Ty)) {
2446+
// Vector types are cast to i8 vectors. Recover original type.
2447+
Op = Builder.CreateBitCast(Op, Ty);
2448+
}
2449+
2450+
if (CmpInst::isFPPredicate(Pred)) {
2451+
if (Pred == CmpInst::FCMP_OEQ)
2452+
Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
24512453
else
2452-
Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
2454+
Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
24532455
} else {
2454-
Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
2456+
Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
24552457
}
2456-
return Builder.CreateSExt(Op, Ty, Name);
2458+
2459+
llvm::Type *ResTy = Ty;
2460+
if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
2461+
ResTy = FixedVectorType::get(
2462+
IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
2463+
VTy->getNumElements());
2464+
2465+
return Builder.CreateSExt(Op, ResTy, Name);
24572466
}
24582467

24592468
static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
@@ -5955,45 +5964,66 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
59555964
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
59565965
}
59575966
case NEON::BI__builtin_neon_vceqzd_s64:
5967+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
5968+
return EmitAArch64CompareBuiltinExpr(
5969+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5970+
ICmpInst::ICMP_EQ, "vceqz");
59585971
case NEON::BI__builtin_neon_vceqzd_f64:
59595972
case NEON::BI__builtin_neon_vceqzs_f32:
59605973
case NEON::BI__builtin_neon_vceqzh_f16:
59615974
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59625975
return EmitAArch64CompareBuiltinExpr(
59635976
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5964-
ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5977+
ICmpInst::FCMP_OEQ, "vceqz");
59655978
case NEON::BI__builtin_neon_vcgezd_s64:
5979+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
5980+
return EmitAArch64CompareBuiltinExpr(
5981+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5982+
ICmpInst::ICMP_SGE, "vcgez");
59665983
case NEON::BI__builtin_neon_vcgezd_f64:
59675984
case NEON::BI__builtin_neon_vcgezs_f32:
59685985
case NEON::BI__builtin_neon_vcgezh_f16:
59695986
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59705987
return EmitAArch64CompareBuiltinExpr(
59715988
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5972-
ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5989+
ICmpInst::FCMP_OGE, "vcgez");
59735990
case NEON::BI__builtin_neon_vclezd_s64:
5991+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
5992+
return EmitAArch64CompareBuiltinExpr(
5993+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5994+
ICmpInst::ICMP_SLE, "vclez");
59745995
case NEON::BI__builtin_neon_vclezd_f64:
59755996
case NEON::BI__builtin_neon_vclezs_f32:
59765997
case NEON::BI__builtin_neon_vclezh_f16:
59775998
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59785999
return EmitAArch64CompareBuiltinExpr(
59796000
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5980-
ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
6001+
ICmpInst::FCMP_OLE, "vclez");
59816002
case NEON::BI__builtin_neon_vcgtzd_s64:
6003+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
6004+
return EmitAArch64CompareBuiltinExpr(
6005+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
6006+
ICmpInst::ICMP_SGT, "vcgtz");
59826007
case NEON::BI__builtin_neon_vcgtzd_f64:
59836008
case NEON::BI__builtin_neon_vcgtzs_f32:
59846009
case NEON::BI__builtin_neon_vcgtzh_f16:
59856010
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59866011
return EmitAArch64CompareBuiltinExpr(
59876012
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5988-
ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
6013+
ICmpInst::FCMP_OGT, "vcgtz");
59896014
case NEON::BI__builtin_neon_vcltzd_s64:
6015+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
6016+
return EmitAArch64CompareBuiltinExpr(
6017+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
6018+
ICmpInst::ICMP_SLT, "vcltz");
6019+
59906020
case NEON::BI__builtin_neon_vcltzd_f64:
59916021
case NEON::BI__builtin_neon_vcltzs_f32:
59926022
case NEON::BI__builtin_neon_vcltzh_f16:
59936023
Ops.push_back(EmitScalarExpr(E->getArg(0)));
59946024
return EmitAArch64CompareBuiltinExpr(
59956025
Ops[0], ConvertType(E->getCallReturnType(getContext())),
5996-
ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
6026+
ICmpInst::FCMP_OLT, "vcltz");
59976027

59986028
case NEON::BI__builtin_neon_vceqzd_u64: {
59996029
Ops.push_back(EmitScalarExpr(E->getArg(0)));

0 commit comments

Comments
 (0)