Skip to content

Commit 8aa3ab1

Browse files
committed
[AARCH64][Neon] switch to using bitcasts in arm_neon.h where appropriate
1 parent 499d39e commit 8aa3ab1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+56329
-36394
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ namespace clang {
263263
EltType ET = getEltType();
264264
return ET == Poly8 || ET == Poly16 || ET == Poly64;
265265
}
266+
bool isFloatingPoint() const {
267+
EltType ET = getEltType();
268+
return ET == Float16 || ET == Float32 || ET == Float64 || ET == BFloat16;
269+
}
266270
bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
267271
bool isQuad() const { return (Flags & QuadFlag) != 0; }
268272
unsigned getEltSizeInBits() const {

clang/include/clang/Basic/arm_neon.td

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ def OP_MLAL : Op<(op "+", $p0, (call "vmull", $p1, $p2))>;
3131
def OP_MULLHi : Op<(call "vmull", (call "vget_high", $p0),
3232
(call "vget_high", $p1))>;
3333
def OP_MULLHi_P64 : Op<(call "vmull",
34-
(cast "poly64_t", (call "vget_high", $p0)),
35-
(cast "poly64_t", (call "vget_high", $p1)))>;
34+
(bitcast "poly64_t", (call "vget_high", $p0)),
35+
(bitcast "poly64_t", (call "vget_high", $p1)))>;
3636
def OP_MULLHi_N : Op<(call "vmull_n", (call "vget_high", $p0), $p1)>;
3737
def OP_MLALHi : Op<(call "vmlal", $p0, (call "vget_high", $p1),
3838
(call "vget_high", $p2))>;
@@ -95,11 +95,11 @@ def OP_TRN2 : Op<(shuffle $p0, $p1, (interleave
9595
def OP_ZIP2 : Op<(shuffle $p0, $p1, (highhalf (interleave mask0, mask1)))>;
9696
def OP_UZP2 : Op<(shuffle $p0, $p1, (add (decimate (rotl mask0, 1), 2),
9797
(decimate (rotl mask1, 1), 2)))>;
98-
def OP_EQ : Op<(cast "R", (op "==", $p0, $p1))>;
99-
def OP_GE : Op<(cast "R", (op ">=", $p0, $p1))>;
100-
def OP_LE : Op<(cast "R", (op "<=", $p0, $p1))>;
101-
def OP_GT : Op<(cast "R", (op ">", $p0, $p1))>;
102-
def OP_LT : Op<(cast "R", (op "<", $p0, $p1))>;
98+
def OP_EQ : Op<(bitcast "R", (op "==", $p0, $p1))>;
99+
def OP_GE : Op<(bitcast "R", (op ">=", $p0, $p1))>;
100+
def OP_LE : Op<(bitcast "R", (op "<=", $p0, $p1))>;
101+
def OP_GT : Op<(bitcast "R", (op ">", $p0, $p1))>;
102+
def OP_LT : Op<(bitcast "R", (op "<", $p0, $p1))>;
103103
def OP_NEG : Op<(op "-", $p0)>;
104104
def OP_NOT : Op<(op "~", $p0)>;
105105
def OP_AND : Op<(op "&", $p0, $p1)>;
@@ -108,33 +108,33 @@ def OP_XOR : Op<(op "^", $p0, $p1)>;
108108
def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>;
109109
def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>;
110110
def OP_CAST : LOp<[(save_temp $promote, $p0),
111-
(cast "R", $promote)]>;
111+
(bitcast "R", $promote)]>;
112112
def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>;
113113
def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>;
114114
def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>;
115115
def OP_DUP : Op<(dup $p0)>;
116116
def OP_DUP_LN : Op<(call_mangled "splat_lane", $p0, $p1)>;
117-
def OP_SEL : Op<(cast "R", (op "|",
118-
(op "&", $p0, (cast $p0, $p1)),
119-
(op "&", (op "~", $p0), (cast $p0, $p2))))>;
117+
def OP_SEL : Op<(bitcast "R", (op "|",
118+
(op "&", $p0, (bitcast $p0, $p1)),
119+
(op "&", (op "~", $p0), (bitcast $p0, $p2))))>;
120120
def OP_REV16 : Op<(shuffle $p0, $p0, (rev 16, mask0))>;
121121
def OP_REV32 : Op<(shuffle $p0, $p0, (rev 32, mask0))>;
122122
def OP_REV64 : Op<(shuffle $p0, $p0, (rev 64, mask0))>;
123123
def OP_XTN : Op<(call "vcombine", $p0, (call "vmovn", $p1))>;
124-
def OP_SQXTUN : Op<(call "vcombine", (cast $p0, "U", $p0),
124+
def OP_SQXTUN : Op<(call "vcombine", (bitcast $p0, "U", $p0),
125125
(call "vqmovun", $p1))>;
126126
def OP_QXTN : Op<(call "vcombine", $p0, (call "vqmovn", $p1))>;
127127
def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16_f32", $p1))>;
128128
def OP_VCVT_NA_HI_F32 : Op<(call "vcombine", $p0, (call "vcvt_f32_f64", $p1))>;
129129
def OP_VCVT_EX_HI_F32 : Op<(call "vcvt_f32_f16", (call "vget_high", $p0))>;
130130
def OP_VCVT_EX_HI_F64 : Op<(call "vcvt_f64_f32", (call "vget_high", $p0))>;
131131
def OP_VCVTX_HI : Op<(call "vcombine", $p0, (call "vcvtx_f32", $p1))>;
132-
def OP_REINT : Op<(cast "R", $p0)>;
132+
def OP_REINT : Op<(bitcast "R", $p0)>;
133133
def OP_ADDHNHi : Op<(call "vcombine", $p0, (call "vaddhn", $p1, $p2))>;
134134
def OP_RADDHNHi : Op<(call "vcombine", $p0, (call "vraddhn", $p1, $p2))>;
135135
def OP_SUBHNHi : Op<(call "vcombine", $p0, (call "vsubhn", $p1, $p2))>;
136136
def OP_RSUBHNHi : Op<(call "vcombine", $p0, (call "vrsubhn", $p1, $p2))>;
137-
def OP_ABDL : Op<(cast "R", (call "vmovl", (cast $p0, "U",
137+
def OP_ABDL : Op<(bitcast "R", (call "vmovl", (bitcast $p0, "U",
138138
(call "vabd", $p0, $p1))))>;
139139
def OP_ABDLHi : Op<(call "vabdl", (call "vget_high", $p0),
140140
(call "vget_high", $p1))>;
@@ -152,15 +152,15 @@ def OP_QDMLSLHi : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
152152
(call "vget_high", $p2))>;
153153
def OP_QDMLSLHi_N : Op<(call "vqdmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
154154
def OP_DIV : Op<(op "/", $p0, $p1)>;
155-
def OP_LONG_HI : Op<(cast "R", (call (name_replace "_high_", "_"),
155+
def OP_LONG_HI : Op<(bitcast "R", (call (name_replace "_high_", "_"),
156156
(call "vget_high", $p0), $p1))>;
157-
def OP_NARROW_HI : Op<(cast "R", (call "vcombine",
158-
(cast "R", "H", $p0),
159-
(cast "R", "H",
157+
def OP_NARROW_HI : Op<(bitcast "R", (call "vcombine",
158+
(bitcast "R", "H", $p0),
159+
(bitcast "R", "H",
160160
(call (name_replace "_high_", "_"),
161161
$p1, $p2))))>;
162162
def OP_MOVL_HI : LOp<[(save_temp $a1, (call "vget_high", $p0)),
163-
(cast "R",
163+
(bitcast "R",
164164
(call "vshll_n", $a1, (literal "int32_t", "0")))]>;
165165
def OP_COPY_LN : Op<(call "vset_lane", (call "vget_lane", $p2, $p3), $p0, $p1)>;
166166
def OP_SCALAR_MUL_LN : Op<(op "*", $p0, (call "vget_lane", $p1, $p2))>;
@@ -221,18 +221,18 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1,
221221

222222
def OP_USDOT_LN
223223
: Op<(call "vusdot", $p0, $p1,
224-
(cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
224+
(bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
225225
def OP_USDOT_LNQ
226226
: Op<(call "vusdot", $p0, $p1,
227-
(cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
227+
(bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
228228

229229
// sudot splats the second vector and then calls vusdot
230230
def OP_SUDOT_LN
231231
: Op<(call "vusdot", $p0,
232-
(cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
232+
(bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
233233
def OP_SUDOT_LNQ
234234
: Op<(call "vusdot", $p0,
235-
(cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
235+
(bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
236236

237237
def OP_BFDOT_LN
238238
: Op<(call "vbfdot", $p0, $p1,
@@ -263,7 +263,7 @@ def OP_VCVT_BF16_F32_A32
263263
: Op<(call "__a32_vcvt_bf16", $p0)>;
264264

265265
def OP_VCVT_BF16_F32_LO_A32
266-
: Op<(call "vcombine", (cast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
266+
: Op<(call "vcombine", (bitcast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
267267
(call "__a32_vcvt_bf16", $p0))>;
268268
def OP_VCVT_BF16_F32_HI_A32
269269
: Op<(call "vcombine", (call "__a32_vcvt_bf16", $p1),
@@ -924,12 +924,12 @@ def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>;
924924
def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>;
925925
def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>;
926926

927-
def CMEQ : SInst<"vceqz", "U.",
927+
def CMEQ : SInst<"vceqz", "U(.!)",
928928
"csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">;
929-
def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">;
930-
def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">;
931-
def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">;
932-
def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">;
929+
def CMGE : SInst<"vcgez", "U(.!)", "csilfdQcQsQiQlQfQd">;
930+
def CMLE : SInst<"vclez", "U(.!)", "csilfdQcQsQiQlQfQd">;
931+
def CMGT : SInst<"vcgtz", "U(.!)", "csilfdQcQsQiQlQfQd">;
932+
def CMLT : SInst<"vcltz", "U(.!)", "csilfdQcQsQiQlQfQd">;
933933

934934
////////////////////////////////////////////////////////////////////////////////
935935
// Max/Min Integer
@@ -1667,11 +1667,11 @@ let TargetGuard = "fullfp16,neon" in {
16671667
// ARMv8.2-A FP16 one-operand vector intrinsics.
16681668

16691669
// Comparison
1670-
def CMEQH : SInst<"vceqz", "U.", "hQh">;
1671-
def CMGEH : SInst<"vcgez", "U.", "hQh">;
1672-
def CMGTH : SInst<"vcgtz", "U.", "hQh">;
1673-
def CMLEH : SInst<"vclez", "U.", "hQh">;
1674-
def CMLTH : SInst<"vcltz", "U.", "hQh">;
1670+
def CMEQH : SInst<"vceqz", "U(.!)", "hQh">;
1671+
def CMGEH : SInst<"vcgez", "U(.!)", "hQh">;
1672+
def CMGTH : SInst<"vcgtz", "U(.!)", "hQh">;
1673+
def CMLEH : SInst<"vclez", "U(.!)", "hQh">;
1674+
def CMLTH : SInst<"vcltz", "U(.!)", "hQh">;
16751675

16761676
// Vector conversion
16771677
def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">;

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 66 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8189,8 +8189,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
81898189

81908190
// Determine the type of this overloaded NEON intrinsic.
81918191
NeonTypeFlags Type(NeonTypeConst->getZExtValue());
8192-
bool Usgn = Type.isUnsigned();
8193-
bool Quad = Type.isQuad();
8192+
const bool Usgn = Type.isUnsigned();
8193+
const bool Quad = Type.isQuad();
8194+
const bool Floating = Type.isFloatingPoint();
81948195
const bool HasLegalHalfType = getTarget().hasLegalHalfType();
81958196
const bool AllowBFloatArgsAndRet =
81968197
getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
@@ -8291,24 +8292,28 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
82918292
}
82928293
case NEON::BI__builtin_neon_vceqz_v:
82938294
case NEON::BI__builtin_neon_vceqzq_v:
8294-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
8295-
ICmpInst::ICMP_EQ, "vceqz");
8295+
return EmitAArch64CompareBuiltinExpr(
8296+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OEQ : ICmpInst::ICMP_EQ, "vceqz");
82968297
case NEON::BI__builtin_neon_vcgez_v:
82978298
case NEON::BI__builtin_neon_vcgezq_v:
8298-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
8299-
ICmpInst::ICMP_SGE, "vcgez");
8299+
return EmitAArch64CompareBuiltinExpr(
8300+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OGE : ICmpInst::ICMP_SGE,
8301+
"vcgez");
83008302
case NEON::BI__builtin_neon_vclez_v:
83018303
case NEON::BI__builtin_neon_vclezq_v:
8302-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
8303-
ICmpInst::ICMP_SLE, "vclez");
8304+
return EmitAArch64CompareBuiltinExpr(
8305+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OLE : ICmpInst::ICMP_SLE,
8306+
"vclez");
83048307
case NEON::BI__builtin_neon_vcgtz_v:
83058308
case NEON::BI__builtin_neon_vcgtzq_v:
8306-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
8307-
ICmpInst::ICMP_SGT, "vcgtz");
8309+
return EmitAArch64CompareBuiltinExpr(
8310+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OGT : ICmpInst::ICMP_SGT,
8311+
"vcgtz");
83088312
case NEON::BI__builtin_neon_vcltz_v:
83098313
case NEON::BI__builtin_neon_vcltzq_v:
8310-
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
8311-
ICmpInst::ICMP_SLT, "vcltz");
8314+
return EmitAArch64CompareBuiltinExpr(
8315+
Ops[0], Ty, Floating ? ICmpInst::FCMP_OLT : ICmpInst::ICMP_SLT,
8316+
"vcltz");
83128317
case NEON::BI__builtin_neon_vclz_v:
83138318
case NEON::BI__builtin_neon_vclzq_v:
83148319
// We generate target-independent intrinsic, which needs a second argument
@@ -8871,28 +8876,32 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
88718876
return Builder.CreateBitCast(Result, ResultType, NameHint);
88728877
}
88738878

8874-
Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
8875-
Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8876-
const CmpInst::Predicate Ip, const Twine &Name) {
8877-
llvm::Type *OTy = Op->getType();
8878-
8879-
// FIXME: this is utterly horrific. We should not be looking at previous
8880-
// codegen context to find out what needs doing. Unfortunately TableGen
8881-
// currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8882-
// (etc).
8883-
if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8884-
OTy = BI->getOperand(0)->getType();
8885-
8886-
Op = Builder.CreateBitCast(Op, OTy);
8887-
if (OTy->getScalarType()->isFloatingPointTy()) {
8888-
if (Fp == CmpInst::FCMP_OEQ)
8889-
Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8879+
Value *
8880+
CodeGenFunction::EmitAArch64CompareBuiltinExpr(Value *Op, llvm::Type *Ty,
8881+
const CmpInst::Predicate Pred,
8882+
const Twine &Name) {
8883+
8884+
if (isa<FixedVectorType>(Ty)) {
8885+
// Vector types are cast to i8 vectors. Recover original type.
8886+
Op = Builder.CreateBitCast(Op, Ty);
8887+
}
8888+
8889+
if (CmpInst::isFPPredicate(Pred)) {
8890+
if (Pred == CmpInst::FCMP_OEQ)
8891+
Op = Builder.CreateFCmp(Pred, Op, Constant::getNullValue(Op->getType()));
88908892
else
8891-
Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8893+
Op = Builder.CreateFCmpS(Pred, Op, Constant::getNullValue(Op->getType()));
88928894
} else {
8893-
Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8895+
Op = Builder.CreateICmp(Pred, Op, Constant::getNullValue(Op->getType()));
88948896
}
8895-
return Builder.CreateSExt(Op, Ty, Name);
8897+
8898+
llvm::Type *ResTy = Ty;
8899+
if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
8900+
ResTy = FixedVectorType::get(
8901+
IntegerType::get(getLLVMContext(), VTy->getScalarSizeInBits()),
8902+
VTy->getNumElements());
8903+
8904+
return Builder.CreateSExt(Op, ResTy, Name);
88968905
}
88978906

88988907
static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
@@ -12400,45 +12409,66 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1240012409
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
1240112410
}
1240212411
case NEON::BI__builtin_neon_vceqzd_s64:
12412+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
12413+
return EmitAArch64CompareBuiltinExpr(
12414+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12415+
ICmpInst::ICMP_EQ, "vceqz");
1240312416
case NEON::BI__builtin_neon_vceqzd_f64:
1240412417
case NEON::BI__builtin_neon_vceqzs_f32:
1240512418
case NEON::BI__builtin_neon_vceqzh_f16:
1240612419
Ops.push_back(EmitScalarExpr(E->getArg(0)));
1240712420
return EmitAArch64CompareBuiltinExpr(
1240812421
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12409-
ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
12422+
ICmpInst::FCMP_OEQ, "vceqz");
1241012423
case NEON::BI__builtin_neon_vcgezd_s64:
12424+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
12425+
return EmitAArch64CompareBuiltinExpr(
12426+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12427+
ICmpInst::ICMP_SGE, "vcgez");
1241112428
case NEON::BI__builtin_neon_vcgezd_f64:
1241212429
case NEON::BI__builtin_neon_vcgezs_f32:
1241312430
case NEON::BI__builtin_neon_vcgezh_f16:
1241412431
Ops.push_back(EmitScalarExpr(E->getArg(0)));
1241512432
return EmitAArch64CompareBuiltinExpr(
1241612433
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12417-
ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
12434+
ICmpInst::FCMP_OGE, "vcgez");
1241812435
case NEON::BI__builtin_neon_vclezd_s64:
12436+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
12437+
return EmitAArch64CompareBuiltinExpr(
12438+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12439+
ICmpInst::ICMP_SLE, "vclez");
1241912440
case NEON::BI__builtin_neon_vclezd_f64:
1242012441
case NEON::BI__builtin_neon_vclezs_f32:
1242112442
case NEON::BI__builtin_neon_vclezh_f16:
1242212443
Ops.push_back(EmitScalarExpr(E->getArg(0)));
1242312444
return EmitAArch64CompareBuiltinExpr(
1242412445
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12425-
ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
12446+
ICmpInst::FCMP_OLE, "vclez");
1242612447
case NEON::BI__builtin_neon_vcgtzd_s64:
12448+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
12449+
return EmitAArch64CompareBuiltinExpr(
12450+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12451+
ICmpInst::ICMP_SGT, "vcgtz");
1242712452
case NEON::BI__builtin_neon_vcgtzd_f64:
1242812453
case NEON::BI__builtin_neon_vcgtzs_f32:
1242912454
case NEON::BI__builtin_neon_vcgtzh_f16:
1243012455
Ops.push_back(EmitScalarExpr(E->getArg(0)));
1243112456
return EmitAArch64CompareBuiltinExpr(
1243212457
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12433-
ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
12458+
ICmpInst::FCMP_OGT, "vcgtz");
1243412459
case NEON::BI__builtin_neon_vcltzd_s64:
12460+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
12461+
return EmitAArch64CompareBuiltinExpr(
12462+
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12463+
ICmpInst::ICMP_SLT, "vcltz");
12464+
1243512465
case NEON::BI__builtin_neon_vcltzd_f64:
1243612466
case NEON::BI__builtin_neon_vcltzs_f32:
1243712467
case NEON::BI__builtin_neon_vcltzh_f16:
1243812468
Ops.push_back(EmitScalarExpr(E->getArg(0)));
1243912469
return EmitAArch64CompareBuiltinExpr(
1244012470
Ops[0], ConvertType(E->getCallReturnType(getContext())),
12441-
ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
12471+
ICmpInst::FCMP_OLT, "vcltz");
1244212472

1244312473
case NEON::BI__builtin_neon_vceqzd_u64: {
1244412474
Ops.push_back(EmitScalarExpr(E->getArg(0)));

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4642,10 +4642,10 @@ class CodeGenFunction : public CodeGenTypeCache {
46424642
llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
46434643
ReturnValueSlot ReturnValue);
46444644

4645-
llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
4646-
const llvm::CmpInst::Predicate Fp,
4647-
const llvm::CmpInst::Predicate Ip,
4648-
const llvm::Twine &Name = "");
4645+
llvm::Value *
4646+
EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
4647+
const llvm::CmpInst::Predicate Pred,
4648+
const llvm::Twine &Name = "");
46494649
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
46504650
ReturnValueSlot ReturnValue,
46514651
llvm::Triple::ArchType Arch);

0 commit comments

Comments
 (0)