Skip to content

Commit a5246fd

Browse files
Hao LiuHao Liu
authored andcommitted
[AArch64]Add missing floating point convert, round and misc intrinsics.
E.g. int64x1_t vcvt_s64_f64(float64x1_t a) -> FCVTZS Dd, Dn llvm-svn: 196211
1 parent dca64f4 commit a5246fd

File tree

4 files changed

+221
-37
lines changed

4 files changed

+221
-37
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -669,9 +669,9 @@ def USQADD : SInst<"vsqadd", "ddd", "UcUsUiUlQUcQUsQUiQUl">;
669669

670670
////////////////////////////////////////////////////////////////////////////////
671671
// Reciprocal/Sqrt
672-
// With additional Qd type.
673-
def FRECPS : IInst<"vrecps", "ddd", "fQfQd">;
674-
def FRSQRTS : IInst<"vrsqrts", "ddd", "fQfQd">;
672+
// With additional d, Qd type.
673+
def FRECPS : IInst<"vrecps", "ddd", "fdQfQd">;
674+
def FRSQRTS : IInst<"vrsqrts", "ddd", "fdQfQd">;
675675

676676
////////////////////////////////////////////////////////////////////////////////
677677
// bitwise reverse
@@ -695,39 +695,39 @@ def VCVT_HIGH_F16 : SOpInst<"vcvt_high_f16", "qhj", "f", OP_VCVT_NA_HI>;
695695
def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI>;
696696
def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "fj", "d">;
697697
def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI>;
698-
def VCVT_F64_F32 : SInst<"vcvt_f64", "wd", "f">;
699-
def VCVT_F64 : SInst<"vcvt_f64", "Fd", "QlQUl">;
698+
def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "wd", "f">;
699+
def VCVT_F64 : SInst<"vcvt_f64", "Fd", "lUlQlQUl">;
700700
def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI>;
701701
def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">;
702702
def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>;
703-
def FRINTN : SInst<"vrndn", "dd", "fQfQd">;
704-
def FRINTA : SInst<"vrnda", "dd", "fQfQd">;
705-
def FRINTP : SInst<"vrndp", "dd", "fQfQd">;
706-
def FRINTM : SInst<"vrndm", "dd", "fQfQd">;
707-
def FRINTX : SInst<"vrndx", "dd", "fQfQd">;
708-
def FRINTZ : SInst<"vrnd", "dd", "fQfQd">;
709-
def FRINTI : SInst<"vrndi", "dd", "fQfQd">;
710-
def VCVT_S64 : SInst<"vcvt_s64", "xd", "Qd">;
711-
def VCVT_U64 : SInst<"vcvt_u64", "ud", "Qd">;
703+
def FRINTN : SInst<"vrndn", "dd", "fdQfQd">;
704+
def FRINTA : SInst<"vrnda", "dd", "fdQfQd">;
705+
def FRINTP : SInst<"vrndp", "dd", "fdQfQd">;
706+
def FRINTM : SInst<"vrndm", "dd", "fdQfQd">;
707+
def FRINTX : SInst<"vrndx", "dd", "fdQfQd">;
708+
def FRINTZ : SInst<"vrnd", "dd", "fdQfQd">;
709+
def FRINTI : SInst<"vrndi", "dd", "fdQfQd">;
710+
def VCVT_S64 : SInst<"vcvt_s64", "xd", "dQd">;
711+
def VCVT_U64 : SInst<"vcvt_u64", "ud", "dQd">;
712712
def FCVTNS_S32 : SInst<"vcvtn_s32", "xd", "fQf">;
713-
def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "Qd">;
713+
def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "dQd">;
714714
def FCVTNU_S32 : SInst<"vcvtn_u32", "ud", "fQf">;
715-
def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "Qd">;
715+
def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "dQd">;
716716
def FCVTPS_S32 : SInst<"vcvtp_s32", "xd", "fQf">;
717-
def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "Qd">;
717+
def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "dQd">;
718718
def FCVTPU_S32 : SInst<"vcvtp_u32", "ud", "fQf">;
719-
def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "Qd">;
719+
def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "dQd">;
720720
def FCVTMS_S32 : SInst<"vcvtm_s32", "xd", "fQf">;
721-
def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "Qd">;
721+
def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "dQd">;
722722
def FCVTMU_S32 : SInst<"vcvtm_u32", "ud", "fQf">;
723-
def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "Qd">;
723+
def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "dQd">;
724724
def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">;
725-
def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "Qd">;
725+
def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">;
726726
def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">;
727-
def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "Qd">;
728-
def FRECPE : SInst<"vrecpe", "dd", "fUiQfQUiQd">;
729-
def FRSQRTE : SInst<"vrsqrte", "dd", "fUiQfQUiQd">;
730-
def FSQRT : SInst<"vsqrt", "dd", "fQfQd">;
727+
def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">;
728+
def FRECPE : SInst<"vrecpe", "dd", "fdUiQfQUiQd">;
729+
def FRSQRTE : SInst<"vrsqrte", "dd", "fdUiQfQUiQd">;
730+
def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">;
731731

732732
////////////////////////////////////////////////////////////////////////////////
733733
// Comparison
@@ -742,7 +742,7 @@ def FCALT : IInst<"vcalt", "udd", "fQfQd">;
742742
def CMTST : WInst<"vtst", "udd",
743743
"csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">;
744744
def CFMEQ : SOpInst<"vceq", "udd",
745-
"csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>;
745+
"csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>;
746746
def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>;
747747
def CFMLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LE>;
748748
def CFMGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GT>;
@@ -816,9 +816,9 @@ def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "hmdi",
816816
def VMOVL_HIGH : SOpInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
817817

818818
let isVCVT_N = 1 in {
819-
def CVTF_N_F64 : SInst<"vcvt_n_f64", "Fdi", "QlQUl">;
820-
def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "Qd">;
821-
def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "Qd">;
819+
def CVTF_N_F64 : SInst<"vcvt_n_f64", "Fdi", "lUlQlQUl">;
820+
def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "dQd">;
821+
def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "dQd">;
822822
}
823823

824824
////////////////////////////////////////////////////////////////////////////////

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2952,6 +2952,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
29522952
// Determine the type of this overloaded NEON intrinsic.
29532953
NeonTypeFlags Type(Result.getZExtValue());
29542954
bool usgn = Type.isUnsigned();
2955+
bool quad = Type.isQuad();
29552956

29562957
llvm::VectorType *VTy = GetNeonType(this, Type);
29572958
llvm::Type *Ty = VTy;
@@ -3212,9 +3213,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
32123213
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_f32_v, E);
32133214
case AArch64::BI__builtin_neon_vcvtq_n_f32_v:
32143215
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_f32_v, E);
3216+
case AArch64::BI__builtin_neon_vcvt_n_f64_v:
32153217
case AArch64::BI__builtin_neon_vcvtq_n_f64_v: {
32163218
llvm::Type *FloatTy =
3217-
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
3219+
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
32183220
llvm::Type *Tys[2] = { FloatTy, Ty };
32193221
Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
32203222
: Intrinsic::arm_neon_vcvtfxs2fp;
@@ -3229,10 +3231,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
32293231
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvt_n_u32_v, E);
32303232
case AArch64::BI__builtin_neon_vcvtq_n_u32_v:
32313233
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_n_u32_v, E);
3234+
case AArch64::BI__builtin_neon_vcvt_n_s64_v:
3235+
case AArch64::BI__builtin_neon_vcvt_n_u64_v:
32323236
case AArch64::BI__builtin_neon_vcvtq_n_s64_v:
32333237
case AArch64::BI__builtin_neon_vcvtq_n_u64_v: {
32343238
llvm::Type *FloatTy =
3235-
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
3239+
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
32363240
llvm::Type *Tys[2] = { Ty, FloatTy };
32373241
Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
32383242
: Intrinsic::arm_neon_vcvtfp2fxs;
@@ -3477,7 +3481,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
34773481
Quad = true;
34783482
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
34793483
llvm::Type *VTy = GetNeonType(this,
3480-
NeonTypeFlags(NeonTypeFlags::Float64, false, Quad ? true : false));
3484+
NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
34813485
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
34823486
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
34833487
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
@@ -3671,15 +3675,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
36713675
Int = Intrinsic::aarch64_neon_fcvtxn;
36723676
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64");
36733677
}
3674-
case AArch64::BI__builtin_neon_vcvt_f64_v: {
3678+
case AArch64::BI__builtin_neon_vcvt_f64_f32: {
36753679
llvm::Type *OpTy =
36763680
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false));
36773681
Ops[0] = Builder.CreateBitCast(Ops[0], OpTy);
36783682
return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
36793683
}
3684+
case AArch64::BI__builtin_neon_vcvt_f64_v:
36803685
case AArch64::BI__builtin_neon_vcvtq_f64_v: {
36813686
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3682-
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
3687+
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
36833688
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
36843689
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
36853690
}
@@ -3723,10 +3728,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
37233728
case AArch64::BI__builtin_neon_vcvtq_s32_v:
37243729
case AArch64::BI__builtin_neon_vcvtq_u32_v:
37253730
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcvtq_u32_v, E);
3731+
case AArch64::BI__builtin_neon_vcvt_s64_v:
3732+
case AArch64::BI__builtin_neon_vcvt_u64_v:
37263733
case AArch64::BI__builtin_neon_vcvtq_s64_v:
37273734
case AArch64::BI__builtin_neon_vcvtq_u64_v: {
37283735
llvm::Type *DoubleTy =
3729-
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));
3736+
GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
37303737
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
37313738
return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
37323739
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
@@ -3738,6 +3745,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
37383745
Int = Intrinsic::aarch64_neon_fcvtns;
37393746
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtns_f32");
37403747
}
3748+
case AArch64::BI__builtin_neon_vcvtn_s64_v:
37413749
case AArch64::BI__builtin_neon_vcvtnq_s64_v: {
37423750
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
37433751
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3751,6 +3759,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
37513759
Int = Intrinsic::aarch64_neon_fcvtnu;
37523760
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtnu_f32");
37533761
}
3762+
case AArch64::BI__builtin_neon_vcvtn_u64_v:
37543763
case AArch64::BI__builtin_neon_vcvtnq_u64_v: {
37553764
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
37563765
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3764,6 +3773,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
37643773
Int = Intrinsic::aarch64_neon_fcvtps;
37653774
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtps_f32");
37663775
}
3776+
case AArch64::BI__builtin_neon_vcvtp_s64_v:
37673777
case AArch64::BI__builtin_neon_vcvtpq_s64_v: {
37683778
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
37693779
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3777,6 +3787,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
37773787
Int = Intrinsic::aarch64_neon_fcvtpu;
37783788
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtpu_f32");
37793789
}
3790+
case AArch64::BI__builtin_neon_vcvtp_u64_v:
37803791
case AArch64::BI__builtin_neon_vcvtpq_u64_v: {
37813792
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
37823793
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3790,6 +3801,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
37903801
Int = Intrinsic::aarch64_neon_fcvtms;
37913802
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtms_f32");
37923803
}
3804+
case AArch64::BI__builtin_neon_vcvtm_s64_v:
37933805
case AArch64::BI__builtin_neon_vcvtmq_s64_v: {
37943806
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
37953807
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3803,6 +3815,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
38033815
Int = Intrinsic::aarch64_neon_fcvtmu;
38043816
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtmu_f32");
38053817
}
3818+
case AArch64::BI__builtin_neon_vcvtm_u64_v:
38063819
case AArch64::BI__builtin_neon_vcvtmq_u64_v: {
38073820
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
38083821
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3816,6 +3829,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
38163829
Int = Intrinsic::aarch64_neon_fcvtas;
38173830
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtas_f32");
38183831
}
3832+
case AArch64::BI__builtin_neon_vcvta_s64_v:
38193833
case AArch64::BI__builtin_neon_vcvtaq_s64_v: {
38203834
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
38213835
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3829,6 +3843,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
38293843
Int = Intrinsic::aarch64_neon_fcvtau;
38303844
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtau_f32");
38313845
}
3846+
case AArch64::BI__builtin_neon_vcvta_u64_v:
38323847
case AArch64::BI__builtin_neon_vcvtaq_u64_v: {
38333848
llvm::Type *OpTy = llvm::VectorType::get(DoubleTy, VTy->getNumElements());
38343849
llvm::Type *Tys[2] = { Ty, OpTy };
@@ -3843,7 +3858,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
38433858
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrte_v, E);
38443859
case AArch64::BI__builtin_neon_vsqrt_v:
38453860
case AArch64::BI__builtin_neon_vsqrtq_v: {
3846-
Int = Intrinsic::aarch64_neon_fsqrt;
3861+
Int = Intrinsic::sqrt;
38473862
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
38483863
}
38493864
case AArch64::BI__builtin_neon_vcvt_f32_v:

0 commit comments

Comments
 (0)