Skip to content

Commit 7a9bba4

Browse files
author
Chad Rosier
committed
[AArch64] Refactor the Neon vector/scalar floating-point convert intrinsics so
that they use float/double rather than the vector equivalents when appropriate. llvm-svn: 196930
1 parent 763ec2b commit 7a9bba4

File tree

4 files changed

+103
-84
lines changed

4 files changed

+103
-84
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ def int_aarch64_neon_xtn :
3636
// Vector floating-point convert
3737
def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
3838
def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
39-
def int_aarch64_neon_fcvtxn :
39+
def int_aarch64_neon_vcvtxn :
4040
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
41-
def int_aarch64_neon_fcvtzs :
41+
def int_aarch64_neon_vcvtzs :
4242
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
43-
def int_aarch64_neon_fcvtzu :
43+
def int_aarch64_neon_vcvtzu :
4444
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
4545

4646
// Vector maxNum (Floating Point)
@@ -240,6 +240,30 @@ def int_aarch64_neon_vcvtf32_u32 :
240240
def int_aarch64_neon_vcvtf64_u64 :
241241
Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>;
242242

243+
// Scalar Floating-point Convert
244+
def int_aarch64_neon_fcvtxn :
245+
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
246+
def int_aarch64_neon_fcvtns :
247+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
248+
def int_aarch64_neon_fcvtnu :
249+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
250+
def int_aarch64_neon_fcvtps :
251+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
252+
def int_aarch64_neon_fcvtpu :
253+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
254+
def int_aarch64_neon_fcvtms :
255+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
256+
def int_aarch64_neon_fcvtmu :
257+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
258+
def int_aarch64_neon_fcvtas :
259+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
260+
def int_aarch64_neon_fcvtau :
261+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
262+
def int_aarch64_neon_fcvtzs :
263+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
264+
def int_aarch64_neon_fcvtzu :
265+
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
266+
243267
// Scalar Floating-point Reciprocal Exponent
244268
def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic;
245269

llvm/lib/Target/AArch64/AArch64InstrNEON.td

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4307,18 +4307,23 @@ multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
43074307

43084308
class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
43094309
Instruction INSTD>
4310-
: Pat<(v1f32 (opnode (v1f64 FPR64:$Rn))),
4310+
: Pat<(f32 (opnode (f64 FPR64:$Rn))),
43114311
(INSTD FPR64:$Rn)>;
43124312

43134313
multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
43144314
Instruction INSTS,
43154315
Instruction INSTD> {
4316-
def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn))),
4316+
def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
43174317
(INSTS FPR32:$Rn)>;
4318-
def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
4318+
def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
43194319
(INSTD FPR64:$Rn)>;
43204320
}
43214321

4322+
class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
4323+
Instruction INSTD>
4324+
: Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
4325+
(INSTD FPR64:$Rn)>;
4326+
43224327
multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
43234328
SDPatternOperator Dopnode,
43244329
Instruction INSTS,
@@ -4982,44 +4987,56 @@ def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
49824987
FCVTXN>;
49834988

49844989
defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
4985-
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtns,
4990+
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
49864991
FCVTNSss, FCVTNSdd>;
4992+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
49874993

49884994
defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
4989-
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtnu,
4995+
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
49904996
FCVTNUss, FCVTNUdd>;
4997+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
49914998

49924999
defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
4993-
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtms,
5000+
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
49945001
FCVTMSss, FCVTMSdd>;
5002+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
49955003

49965004
defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
4997-
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtmu,
5005+
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
49985006
FCVTMUss, FCVTMUdd>;
5007+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
49995008

50005009
defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
5001-
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtas,
5010+
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
50025011
FCVTASss, FCVTASdd>;
5012+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
50035013

50045014
defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
5005-
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtau,
5015+
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
50065016
FCVTAUss, FCVTAUdd>;
5017+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
50075018

50085019
defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
5009-
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtps,
5020+
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
50105021
FCVTPSss, FCVTPSdd>;
5022+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
50115023

50125024
defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
5013-
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtpu,
5025+
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
50145026
FCVTPUss, FCVTPUdd>;
5027+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
50155028

50165029
defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
50175030
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
50185031
FCVTZSss, FCVTZSdd>;
5032+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
5033+
FCVTZSdd>;
50195034

50205035
defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
50215036
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
50225037
FCVTZUss, FCVTZUdd>;
5038+
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
5039+
FCVTZUdd>;
50235040

50245041
// Patterns For Convert Instructions Between v1f64 and v1i64
50255042
class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
@@ -8297,12 +8314,12 @@ multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
82978314
let Constraints = "$src = $Rd";
82988315
}
82998316

8300-
def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))),
8317+
def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
83018318
(!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
83028319

83038320
def : Pat<(v4f32 (concat_vectors
83048321
(v2f32 VPR64:$src),
8305-
(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))),
8322+
(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
83068323
(!cast<Instruction>(prefix # "2d4s")
83078324
(v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
83088325
VPR128:$Rn)>;

llvm/test/CodeGen/AArch64/neon-misc.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -894,13 +894,13 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
894894

895895
define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
896896
; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
897-
%vcvtx_f32_f641.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #4
897+
%vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4
898898
ret <2 x float> %vcvtx_f32_f641.i
899899
}
900900

901901
define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
902902
; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
903-
%vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #4
903+
%vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4
904904
%shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
905905
ret <4 x float> %shuffle.i
906906
}
@@ -1462,7 +1462,7 @@ declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2
14621462

14631463
declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2
14641464

1465-
declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) #2
1465+
declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2
14661466

14671467
declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2
14681468

0 commit comments

Comments
 (0)