Skip to content

Commit 136bc11

Browse files
committed
[AARCH64][Neon] switch to using bitcasts in arm_neon.h where appropriate
1 parent 606e0b4 commit 136bc11

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+56263
-36358
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ namespace clang {
263263
EltType ET = getEltType();
264264
return ET == Poly8 || ET == Poly16 || ET == Poly64;
265265
}
266+
bool isFloatingPoint() const {
267+
EltType ET = getEltType();
268+
return ET == Float16 || ET == Float32 || ET == Float64 || ET == BFloat16;
269+
}
266270
bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
267271
bool isQuad() const { return (Flags & QuadFlag) != 0; }
268272
unsigned getEltSizeInBits() const {

clang/include/clang/Basic/arm_neon.td

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ def OP_MLAL : Op<(op "+", $p0, (call "vmull", $p1, $p2))>;
3131
def OP_MULLHi : Op<(call "vmull", (call "vget_high", $p0),
3232
(call "vget_high", $p1))>;
3333
def OP_MULLHi_P64 : Op<(call "vmull",
34-
(cast "poly64_t", (call "vget_high", $p0)),
35-
(cast "poly64_t", (call "vget_high", $p1)))>;
34+
(bitcast "poly64_t", (call "vget_high", $p0)),
35+
(bitcast "poly64_t", (call "vget_high", $p1)))>;
3636
def OP_MULLHi_N : Op<(call "vmull_n", (call "vget_high", $p0), $p1)>;
3737
def OP_MLALHi : Op<(call "vmlal", $p0, (call "vget_high", $p1),
3838
(call "vget_high", $p2))>;
@@ -95,11 +95,11 @@ def OP_TRN2 : Op<(shuffle $p0, $p1, (interleave
9595
def OP_ZIP2 : Op<(shuffle $p0, $p1, (highhalf (interleave mask0, mask1)))>;
9696
def OP_UZP2 : Op<(shuffle $p0, $p1, (add (decimate (rotl mask0, 1), 2),
9797
(decimate (rotl mask1, 1), 2)))>;
98-
def OP_EQ : Op<(cast "R", (op "==", $p0, $p1))>;
99-
def OP_GE : Op<(cast "R", (op ">=", $p0, $p1))>;
100-
def OP_LE : Op<(cast "R", (op "<=", $p0, $p1))>;
101-
def OP_GT : Op<(cast "R", (op ">", $p0, $p1))>;
102-
def OP_LT : Op<(cast "R", (op "<", $p0, $p1))>;
98+
def OP_EQ : Op<(bitcast "R", (op "==", $p0, $p1))>;
99+
def OP_GE : Op<(bitcast "R", (op ">=", $p0, $p1))>;
100+
def OP_LE : Op<(bitcast "R", (op "<=", $p0, $p1))>;
101+
def OP_GT : Op<(bitcast "R", (op ">", $p0, $p1))>;
102+
def OP_LT : Op<(bitcast "R", (op "<", $p0, $p1))>;
103103
def OP_NEG : Op<(op "-", $p0)>;
104104
def OP_NOT : Op<(op "~", $p0)>;
105105
def OP_AND : Op<(op "&", $p0, $p1)>;
@@ -108,33 +108,33 @@ def OP_XOR : Op<(op "^", $p0, $p1)>;
108108
def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>;
109109
def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>;
110110
def OP_CAST : LOp<[(save_temp $promote, $p0),
111-
(cast "R", $promote)]>;
111+
(bitcast "R", $promote)]>;
112112
def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>;
113113
def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>;
114114
def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>;
115115
def OP_DUP : Op<(dup $p0)>;
116116
def OP_DUP_LN : Op<(call_mangled "splat_lane", $p0, $p1)>;
117-
def OP_SEL : Op<(cast "R", (op "|",
118-
(op "&", $p0, (cast $p0, $p1)),
119-
(op "&", (op "~", $p0), (cast $p0, $p2))))>;
117+
def OP_SEL : Op<(bitcast "R", (op "|",
118+
(op "&", $p0, (bitcast $p0, $p1)),
119+
(op "&", (op "~", $p0), (bitcast $p0, $p2))))>;
120120
def OP_REV16 : Op<(shuffle $p0, $p0, (rev 16, mask0))>;
121121
def OP_REV32 : Op<(shuffle $p0, $p0, (rev 32, mask0))>;
122122
def OP_REV64 : Op<(shuffle $p0, $p0, (rev 64, mask0))>;
123123
def OP_XTN : Op<(call "vcombine", $p0, (call "vmovn", $p1))>;
124-
def OP_SQXTUN : Op<(call "vcombine", (cast $p0, "U", $p0),
124+
def OP_SQXTUN : Op<(call "vcombine", (bitcast $p0, "U", $p0),
125125
(call "vqmovun", $p1))>;
126126
def OP_QXTN : Op<(call "vcombine", $p0, (call "vqmovn", $p1))>;
127127
def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16_f32", $p1))>;
128128
def OP_VCVT_NA_HI_F32 : Op<(call "vcombine", $p0, (call "vcvt_f32_f64", $p1))>;
129129
def OP_VCVT_EX_HI_F32 : Op<(call "vcvt_f32_f16", (call "vget_high", $p0))>;
130130
def OP_VCVT_EX_HI_F64 : Op<(call "vcvt_f64_f32", (call "vget_high", $p0))>;
131131
def OP_VCVTX_HI : Op<(call "vcombine", $p0, (call "vcvtx_f32", $p1))>;
132-
def OP_REINT : Op<(cast "R", $p0)>;
132+
def OP_REINT : Op<(bitcast "R", $p0)>;
133133
def OP_ADDHNHi : Op<(call "vcombine", $p0, (call "vaddhn", $p1, $p2))>;
134134
def OP_RADDHNHi : Op<(call "vcombine", $p0, (call "vraddhn", $p1, $p2))>;
135135
def OP_SUBHNHi : Op<(call "vcombine", $p0, (call "vsubhn", $p1, $p2))>;
136136
def OP_RSUBHNHi : Op<(call "vcombine", $p0, (call "vrsubhn", $p1, $p2))>;
137-
def OP_ABDL : Op<(cast "R", (call "vmovl", (cast $p0, "U",
137+
def OP_ABDL : Op<(bitcast "R", (call "vmovl", (bitcast $p0, "U",
138138
(call "vabd", $p0, $p1))))>;
139139
def OP_ABDLHi : Op<(call "vabdl", (call "vget_high", $p0),
140140
(call "vget_high", $p1))>;
@@ -152,15 +152,15 @@ def OP_QDMLSLHi : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
152152
(call "vget_high", $p2))>;
153153
def OP_QDMLSLHi_N : Op<(call "vqdmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
154154
def OP_DIV : Op<(op "/", $p0, $p1)>;
155-
def OP_LONG_HI : Op<(cast "R", (call (name_replace "_high_", "_"),
155+
def OP_LONG_HI : Op<(bitcast "R", (call (name_replace "_high_", "_"),
156156
(call "vget_high", $p0), $p1))>;
157-
def OP_NARROW_HI : Op<(cast "R", (call "vcombine",
158-
(cast "R", "H", $p0),
159-
(cast "R", "H",
157+
def OP_NARROW_HI : Op<(bitcast "R", (call "vcombine",
158+
(bitcast "R", "H", $p0),
159+
(bitcast "R", "H",
160160
(call (name_replace "_high_", "_"),
161161
$p1, $p2))))>;
162162
def OP_MOVL_HI : LOp<[(save_temp $a1, (call "vget_high", $p0)),
163-
(cast "R",
163+
(bitcast "R",
164164
(call "vshll_n", $a1, (literal "int32_t", "0")))]>;
165165
def OP_COPY_LN : Op<(call "vset_lane", (call "vget_lane", $p2, $p3), $p0, $p1)>;
166166
def OP_SCALAR_MUL_LN : Op<(op "*", $p0, (call "vget_lane", $p1, $p2))>;
@@ -221,18 +221,18 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1,
221221

222222
def OP_USDOT_LN
223223
: Op<(call "vusdot", $p0, $p1,
224-
(cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
224+
(bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
225225
def OP_USDOT_LNQ
226226
: Op<(call "vusdot", $p0, $p1,
227-
(cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
227+
(bitcast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
228228

229229
// sudot splats the second vector and then calls vusdot
230230
def OP_SUDOT_LN
231231
: Op<(call "vusdot", $p0,
232-
(cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
232+
(bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
233233
def OP_SUDOT_LNQ
234234
: Op<(call "vusdot", $p0,
235-
(cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
235+
(bitcast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
236236

237237
def OP_BFDOT_LN
238238
: Op<(call "vbfdot", $p0, $p1,
@@ -263,7 +263,7 @@ def OP_VCVT_BF16_F32_A32
263263
: Op<(call "__a32_vcvt_bf16", $p0)>;
264264

265265
def OP_VCVT_BF16_F32_LO_A32
266-
: Op<(call "vcombine", (cast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
266+
: Op<(call "vcombine", (bitcast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
267267
(call "__a32_vcvt_bf16", $p0))>;
268268
def OP_VCVT_BF16_F32_HI_A32
269269
: Op<(call "vcombine", (call "__a32_vcvt_bf16", $p1),
@@ -924,12 +924,12 @@ def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>;
924924
def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>;
925925
def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>;
926926

927-
def CMEQ : SInst<"vceqz", "U.",
927+
def CMEQ : SInst<"vceqz", "U(.!)",
928928
"csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">;
929-
def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">;
930-
def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">;
931-
def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">;
932-
def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">;
929+
def CMGE : SInst<"vcgez", "U(.!)", "csilfdQcQsQiQlQfQd">;
930+
def CMLE : SInst<"vclez", "U(.!)", "csilfdQcQsQiQlQfQd">;
931+
def CMGT : SInst<"vcgtz", "U(.!)", "csilfdQcQsQiQlQfQd">;
932+
def CMLT : SInst<"vcltz", "U(.!)", "csilfdQcQsQiQlQfQd">;
933933

934934
////////////////////////////////////////////////////////////////////////////////
935935
// Max/Min Integer
@@ -1667,11 +1667,11 @@ let TargetGuard = "fullfp16,neon" in {
16671667
// ARMv8.2-A FP16 one-operand vector intrinsics.
16681668

16691669
// Comparison
1670-
def CMEQH : SInst<"vceqz", "U.", "hQh">;
1671-
def CMGEH : SInst<"vcgez", "U.", "hQh">;
1672-
def CMGTH : SInst<"vcgtz", "U.", "hQh">;
1673-
def CMLEH : SInst<"vclez", "U.", "hQh">;
1674-
def CMLTH : SInst<"vcltz", "U.", "hQh">;
1670+
def CMEQH : SInst<"vceqz", "U(.!)", "hQh">;
1671+
def CMGEH : SInst<"vcgez", "U(.!)", "hQh">;
1672+
def CMGTH : SInst<"vcgtz", "U(.!)", "hQh">;
1673+
def CMLEH : SInst<"vclez", "U(.!)", "hQh">;
1674+
def CMLTH : SInst<"vcltz", "U(.!)", "hQh">;
16751675

16761676
// Vector conversion
16771677
def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">;

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4694,10 +4694,10 @@ class CodeGenFunction : public CodeGenTypeCache {
46944694
llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
46954695
ReturnValueSlot ReturnValue);
46964696

4697-
llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
4698-
const llvm::CmpInst::Predicate Fp,
4699-
const llvm::CmpInst::Predicate Ip,
4700-
const llvm::Twine &Name = "");
4697+
llvm::Value *
4698+
EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
4699+
const llvm::CmpInst::Predicate Pred,
4700+
const llvm::Twine &Name = "");
47014701
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
47024702
ReturnValueSlot ReturnValue,
47034703
llvm::Triple::ArchType Arch);

0 commit comments

Comments
 (0)