Skip to content

Commit b5d8a03

Browse files
author
Rin Dobrescu
authored
[AArch64] Add missing ASIMD FP convert instructions to scheduling model (#115146)
Some ASIMD FP convert instructions have incorrect scheduling information. These instructions currently have latency 2, throughput 4 and utilise pipeline V. This patch corrects the scheduling models to match the relevant Software Optimization Guide. The V1 and V2 Software Optimization Guide show that ASIMD FP convert instructions should all utilise pipelines V02. Their execution latency and throughput should also differ depending on form. See section 3.17 "ASIMD floating-point instructions" in the Neoverse-V1 and Neoverse-V2 Software Optimization Guide for characteristics of instruction performance. Reference: - V1 SOG: https://developer.arm.com/documentation/109897/latest/ - V2 SOG: https://developer.arm.com/documentation/109898/latest/
1 parent fb90733 commit b5d8a03

File tree

4 files changed

+168
-144
lines changed

4 files changed

+168
-144
lines changed

llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,16 +1015,34 @@ def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
10151015
"^FCVTXN(v[24]f32|v1i64)$")>;
10161016

10171017
// ASIMD FP convert, other, D-form F32 and Q-form F64
1018-
def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
1019-
"^[SU]CVTFv2f(32|64)$")>;
1018+
def : InstRW<[V1Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
1019+
"^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
1020+
"^FCVT[AMNPZ][SU]v1i64$",
1021+
"^FCVTZ[SU]d$",
1022+
"^[SU]CVTFv2f(32|64)$",
1023+
"^[SU]CVTFv2i(32|64)_shift$",
1024+
"^[SU]CVTFv1i64$",
1025+
"^[SU]CVTFd$")>;
10201026

10211027
// ASIMD FP convert, other, D-form F16 and Q-form F32
1022-
def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
1023-
"^[SU]CVTFv4f(16|32)$")>;
1028+
def : InstRW<[V1Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
1029+
"^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
1030+
"^FCVT[AMNPZ][SU]v1i32$",
1031+
"^FCVTZ[SU]s$",
1032+
"^[SU]CVTFv4f(16|32)$",
1033+
"^[SU]CVTFv4i(16|32)_shift$",
1034+
"^[SU]CVTFv1i32$",
1035+
"^[SU]CVTFs$")>;
10241036

10251037
// ASIMD FP convert, other, Q-form F16
1026-
def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
1027-
"^[SU]CVTFv8f16$")>;
1038+
def : InstRW<[V1Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
1039+
"^FCVT[AMNPZ][SU]v8i16_shift$",
1040+
"^FCVT[AMNPZ][SU]v1f16$",
1041+
"^FCVTZ[SU]h$",
1042+
"^[SU]CVTFv8f16$",
1043+
"^[SU]CVTFv8i16_shift$",
1044+
"^[SU]CVTFv1i16$",
1045+
"^[SU]CVTFh$")>;
10281046

10291047
// ASIMD FP divide, D-form, F16
10301048
// ASIMD FP square root, D-form, F16

llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,25 +1567,31 @@ def : InstRW<[V2Write_3c_1V02], (instregex "^FCVTN(v2|v4)i32",
15671567

15681568
// ASIMD FP convert, other, D-form F32 and Q-form F64
15691569
def : InstRW<[V2Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
1570+
"^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
15701571
"^FCVT[AMNPZ][SU]v1i64$",
15711572
"^FCVTZ[SU]d$",
15721573
"^[SU]CVTFv2f(32|64)$",
1574+
"^[SU]CVTFv2i(32|64)_shift$",
15731575
"^[SU]CVTFv1i64$",
15741576
"^[SU]CVTFd$")>;
15751577

15761578
// ASIMD FP convert, other, D-form F16 and Q-form F32
15771579
def : InstRW<[V2Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
1580+
"^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
15781581
"^FCVT[AMNPZ][SU]v1i32$",
15791582
"^FCVTZ[SU]s$",
15801583
"^[SU]CVTFv4f(16|32)$",
1584+
"^[SU]CVTFv4i(16|32)_shift$",
15811585
"^[SU]CVTFv1i32$",
15821586
"^[SU]CVTFs$")>;
15831587

15841588
// ASIMD FP convert, other, Q-form F16
15851589
def : InstRW<[V2Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
1590+
"^FCVT[AMNPZ][SU]v8i16_shift$",
15861591
"^FCVT[AMNPZ][SU]v1f16$",
15871592
"^FCVTZ[SU]h$",
15881593
"^[SU]CVTFv8f16$",
1594+
"^[SU]CVTFv8i16_shift$",
15891595
"^[SU]CVTFv1i16$",
15901596
"^[SU]CVTFh$")>;
15911597

0 commit comments

Comments
 (0)