-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MCA] Adding missing instructions in AArch64 Neoverse V1 tests #128892
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
ee2749b
to
a1278e4
Compare
@llvm/pr-subscribers-backend-aarch64 Author: Julien Villette (jvillette38) ChangesAdded missing instructions for LLVM Opcodes coverage. It will help to maintain TableGen scheduling information of AArch64 Neoverse V1. Follow up of MR ##126703 No more asm instruction comments to maintain. Patch is 385.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128892.diff 5 Files Affected:
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
index a5330b9d6d2d6..fb0c8252c2421 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
@@ -19,26 +19,28 @@ add sp, x29, #3816
sub w0, wsp, #4077
sub w4, w20, #546, lsl #12
sub sp, sp, #288
+sub w13, wsp, w10
+sub x16, x2, w19, uxtb
sub wsp, w19, #16
adds w13, w23, #291, lsl #12
cmn w2, #4095
adds w20, wsp, #0
cmn x3, #1, lsl #12
+cmp wsp, #2342
cmp sp, #20, lsl #12
cmp x30, #4095
subs x4, sp, #3822
cmn w3, #291, lsl #12
cmn wsp, #1365
cmn sp, #1092, lsl #12
-mov sp, x30
-mov wsp, w20
-mov x11, sp
-mov w24, wsp
+mov x10, #-63432
#------------------------------------------------------------------------------
# Add-subtract (shifted register)
#------------------------------------------------------------------------------
+add wsp, wsp, w10
+add x25, x9, w25, uxtb
add w3, w5, w7
add wzr, w3, w5
add w20, wzr, w4
@@ -66,6 +68,8 @@ add x2, x3, x4, asr #0
add x5, x6, x7, asr #21
add x8, x9, x10, asr #63
adds w3, w5, w7
+adds w17, wsp, w25
+adds x13, x23, w8, uxtb
cmn w3, w5
adds w20, wzr, w4
adds w4, w6, wzr
@@ -115,6 +119,9 @@ sub x27, x28, x29, lsr #63
sub x2, x3, x4, asr #0
sub x5, x6, x7, asr #21
sub x8, x9, x10, asr #63
+sub w13, wsp, w10
+sub x16, x2, w19, uxtb
+subs x13, x15, x14, sxtx #1
subs w3, w5, w7
cmp w3, w5
subs w4, w6, wzr
@@ -173,6 +180,8 @@ cmp w14, w15, lsr #21
cmp w18, w19, asr #0
cmp w20, w21, asr #22
cmp w22, w23, asr #31
+cmp wsp, w26
+cmp x16, w27, uxtb
cmp x0, x3
cmp xzr, x4
cmp x5, xzr
@@ -187,6 +196,11 @@ cmp x20, x21, asr #55
cmp x22, x23, asr #63
cmp wzr, w0
cmp xzr, x0
+mov sp, x30
+mov wsp, w20
+mov x11, sp
+mov w24, wsp
+mov x30, v18.d[0]
#------------------------------------------------------------------------------
# Add-subtract (shifted register)
@@ -333,6 +347,12 @@ cbnz x26, #1048572
cbz wzr, #0
cbnz xzr, #0
+#------------------------------------------------------------------------------
+# Compare and branch (label)
+#------------------------------------------------------------------------------
+
+cbnz w21, test
+
#------------------------------------------------------------------------------
# Conditional branch (immediate)
#------------------------------------------------------------------------------
@@ -472,6 +492,10 @@ lsr w17, w18, w19
lsr x20, x21, x22
asr w23, w24, w25
asr x26, x27, x28
+eon w29, w4, w19
+eon x19, x12, x2
+eor w8, w27, w2
+eor x22, x16, x6
ror w0, w1, w2
ror x3, x4, x5
lsl w6, w7, w8
@@ -547,14 +571,20 @@ extr w3, w5, w7, #0
extr w11, w13, w17, #31
extr x3, x5, x7, #15
extr x11, x13, x17, #63
-ror x19, x23, #24
-ror x29, xzr, #63
-ror w9, w13, #31
+eor wsp, w4, #0xe00
+eor x27, x25, #0x1e00
+ror x19, x23, #24
+ror x29, xzr, #63
+ror w9, w13, #31
#------------------------------------------------------------------------------
# Floating-point compare
#------------------------------------------------------------------------------
+fcmp h5, h21
+fcmp h5, #0.0
+fcmpe h22, h21
+fcmpe h13, #0.0
fcmp s3, s5
fcmp s31, #0.0
fcmp s31, #0.0
@@ -578,6 +608,8 @@ fccmp s31, s15, #13, hs
fccmp d9, d31, #0, le
fccmp d3, d0, #15, gt
fccmp d31, d5, #7, ne
+fccmp h31, h3, #11, hs
+fccmpe h6, h1, #12, ne
fccmpe s1, s31, #0, eq
fccmpe s3, s0, #15, hs
fccmpe s31, s15, #13, hs
@@ -591,27 +623,38 @@ fccmpe d31, d5, #7, ne
fcsel s3, s20, s9, pl
fcsel d9, d10, d11, mi
+fcsel h26, h2, h11, hs
#------------------------------------------------------------------------------
# Floating-point data-processing (1 source)
#------------------------------------------------------------------------------
+fmov h18, h28
fmov s0, s1
fabs s2, s3
+fneg h2, h9
fneg s4, s5
fsqrt s6, s7
fcvt d8, s9
fcvt h10, s11
+frintn h12, h3
frintn s12, s13
+frintp h17, h31
frintp s14, s15
+frintm h0, h21
frintm s16, s17
+frintz h10, h29
frintz s18, s19
+frinta h22, h10
frinta s20, s21
+frintx h4, h5
frintx s22, s23
frinti s24, s25
+frinti h31, h14
fmov d0, d1
fabs d2, d3
fneg d4, d5
+fsqrt h13, h24
fsqrt d6, d7
fcvt s8, d9
fcvt h10, d11
@@ -630,13 +673,19 @@ fcvt d28, h29
#------------------------------------------------------------------------------
fmul s20, s19, s17
+fdiv h1, h26, h23
fdiv s1, s2, s3
+fadd h23, h27, h22
fadd s4, s5, s6
+fsub h20, h11, h18
fsub s7, s8, s9
fmax s10, s11, s12
+fmax h8, h7, h11
fmin s13, s14, s15
+fmaxnm h29, h13, h14
fmaxnm s16, s17, s18
fminnm s19, s20, s21
+fnmul h3, h15, h7
fnmul s22, s23, s2
fmul d20, d19, d17
fdiv d1, d2, d3
@@ -644,22 +693,28 @@ fadd d4, d5, d6
fsub d7, d8, d9
fmax d10, d11, d12
fmin d13, d14, d15
+fmin h4, h13, h17
fmaxnm d16, d17, d18
fminnm d19, d20, d21
+fminnm h29, h23, h17
fnmul d22, d23, d24
#------------------------------------------------------------------------------
# Floating-point data-processing (1 source)
#------------------------------------------------------------------------------
+fmadd h27, h0, h6, h28
fmadd s3, s5, s6, s31
fmadd d3, d13, d0, d23
+fmsub h25, h28, h12, h24
fmsub s3, s5, s6, s31
fmsub d3, d13, d0, d23
+fnmadd h3, h18, h31, h24
fnmadd s3, s5, s6, s31
fnmadd d3, d13, d0, d23
fnmsub s3, s5, s6, s31
fnmsub d3, d13, d0, d23
+fnmsub h3, h29, h24, h17
#------------------------------------------------------------------------------
# Floating-point <-> fixed-point conversion
@@ -814,8 +869,12 @@ fcvtas w25, d26
fcvtas x27, d28
fcvtau w29, d30
fcvtau xzr, d0
+fmov h6, w5
+fmov h16, x27
+fmov w15, h31
fmov w3, s9
fmov s9, w3
+fmov x21, h14
fmov x20, d31
fmov d1, x15
fmov x3, v12.d[1]
@@ -825,6 +884,7 @@ fmov v1.d[1], x19
# Floating-point immediate
#------------------------------------------------------------------------------
+fmov h29, #0.50000000
fmov s2, #0.12500000
fmov s3, #1.00000000
fmov d30, #16.00000000
@@ -886,6 +946,7 @@ stlr w26, [x29]
stlr x27, [x28]
stlr x27, [x28]
stlr x27, [x28]
+ldarb w16, [x21]
ldarb w23, [sp]
ldarh w22, [x30]
ldar wzr, [x29]
@@ -1102,16 +1163,23 @@ ldr w17, [sp, #16380]
ldr w13, [x2, #4]
ldrsw x2, [x5, #4]
ldrsw x23, [sp, #16380]
+ldrsw x21, [x25, x7]
ldrh w2, [x4]
ldrsh w23, [x6, #8190]
ldrsh wzr, [sp, #2]
ldrsh x29, [x2, #2]
+ldrsh x25, [x8, w13, uxtw]
ldrb w26, [x3, #121]
ldrb w12, [x2]
ldrsb w27, [sp, #4095]
ldrsb xzr, [x15]
+ldrsb x12, [x28, x27]
str x30, [sp]
str w20, [x4, #16380]
+str b5, [x11]
+str h23, [x15]
+str s25, [x19]
+str d15, [x2]
strh w17, [sp, #8190]
strb w23, [x3, #4095]
strb wzr, [x2]
@@ -1120,6 +1188,7 @@ ldr h20, [x2, #8190]
ldr s10, [x19, #16380]
ldr d3, [x10, #32760]
str q12, [sp, #65520]
+ldr q14, [x6, #4624]
#------------------------------------------------------------------------------
# Load/store (register offset)
@@ -1130,6 +1199,7 @@ ldrb w9, [x27, x6]
ldrsb w10, [x30, x7]
ldrb w11, [x29, x3, sxtx]
strb w12, [x28, xzr, sxtx]
+strb w5, [x26, w7, uxtw]
ldrb w14, [x26, w6, uxtw]
ldrsb w15, [x25, w7, uxtw]
ldrb w17, [x23, w9, sxtw]
@@ -1146,6 +1216,18 @@ ldrsh w16, [x24, w8, uxtw #1]
ldrh w17, [x23, w9, sxtw]
ldrh w18, [x22, w10, sxtw]
strh w19, [x21, wzr, sxtw #1]
+ldr b25, [x21, w8, uxtw]
+ldr b8, [x30, x10]
+str b14, [x13, x25]
+str b30, [x16, w26, uxtw]
+ldr h6, [x4, w4, uxtw]
+ldr h11, [x13, x9]
+str h16, [x5, x24]
+str h15, [x15, w15, uxtw]
+ldr s12, [x30, w5, uxtw]
+ldr d24, [x26, w7, uxtw]
+str s20, [x24, w10, uxtw]
+str d5, [x26, x6]
ldr w3, [sp, x5]
ldr s9, [x27, x6]
ldr w10, [x30, x7, lsl #2]
@@ -1166,6 +1248,7 @@ ldr x12, [x28, xzr, sxtx]
ldr x13, [x27, x5, sxtx #3]
prfm pldl1keep, [x26, w6, uxtw]
ldr x15, [x25, w7, uxtw]
+str x27, [x26, w24, uxtw]
ldr x16, [x24, w8, uxtw #3]
ldr x17, [x23, w9, sxtw]
ldr x18, [x22, w10, sxtw]
@@ -1200,8 +1283,11 @@ ldp x24, x25, [x4, #8]
ldp s29, s28, [sp, #252]
stp s27, s26, [sp, #-256]
ldp s1, s2, [x3, #44]
+stp x3, x6, [x16]
stp d3, d5, [x9, #504]
stp d7, d11, [x10, #-512]
+stnp x20, x16, [x8]
+stp x3, x6, [x16]
ldp d2, d3, [x30, #-8]
stp q3, q5, [sp]
stp q17, q19, [sp, #1008]
@@ -1280,6 +1366,10 @@ ldnp q23, q29, [x1, #-1024]
# Logical (immediate)
#------------------------------------------------------------------------------
+and wsp, w16, #0xe00
+and x2, x22, #0x1e00
+ands w14, w8, #0x70
+ands x4, x10, #0x60
mov w3, #983055
mov x10, #-6148914691236517206
@@ -1347,7 +1437,9 @@ adr x0, #262144
tbz x12, #62, #0
tbz x12, #62, #4
tbz x12, #62, #-32768
+tbz w17, #16, test
tbnz x12, #60, #32764
+tbnz w3, #28, test
#------------------------------------------------------------------------------
# Unconditional branch (immediate)
@@ -1368,6 +1460,12 @@ ret
eret
drps
+#------------------------------------------------------------------------------
+# Unconditional branch (label)
+#------------------------------------------------------------------------------
+
+bl test
+
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
@@ -1391,21 +1489,23 @@ drps
# CHECK-NEXT: 1 1 0.25 sub w0, wsp, #4077
# CHECK-NEXT: 1 1 0.25 sub w4, w20, #546, lsl #12
# CHECK-NEXT: 1 1 0.25 sub sp, sp, #288
+# CHECK-NEXT: 1 2 0.50 sub w13, wsp, w10
+# CHECK-NEXT: 1 2 0.50 sub x16, x2, w19, uxtb
# CHECK-NEXT: 1 1 0.25 sub wsp, w19, #16
# CHECK-NEXT: 1 1 0.33 adds w13, w23, #291, lsl #12
# CHECK-NEXT: 1 1 0.33 cmn w2, #4095
# CHECK-NEXT: 1 1 0.33 adds w20, wsp, #0
# CHECK-NEXT: 1 1 0.33 cmn x3, #1, lsl #12
+# CHECK-NEXT: 1 1 0.33 cmp wsp, #2342
# CHECK-NEXT: 1 1 0.33 cmp sp, #20, lsl #12
# CHECK-NEXT: 1 1 0.33 cmp x30, #4095
# CHECK-NEXT: 1 1 0.33 subs x4, sp, #3822
# CHECK-NEXT: 1 1 0.33 cmn w3, #291, lsl #12
# CHECK-NEXT: 1 1 0.33 cmn wsp, #1365
# CHECK-NEXT: 1 1 0.33 cmn sp, #1092, lsl #12
-# CHECK-NEXT: 1 1 0.25 mov sp, x30
-# CHECK-NEXT: 1 1 0.25 mov wsp, w20
-# CHECK-NEXT: 1 1 0.25 mov x11, sp
-# CHECK-NEXT: 1 1 0.25 mov w24, wsp
+# CHECK-NEXT: 1 1 0.25 mov x10, #-63432
+# CHECK-NEXT: 1 2 0.50 add wsp, wsp, w10
+# CHECK-NEXT: 1 2 0.50 add x25, x9, w25, uxtb
# CHECK-NEXT: 1 1 0.25 add w3, w5, w7
# CHECK-NEXT: 1 1 0.25 add wzr, w3, w5
# CHECK-NEXT: 1 1 0.25 add w20, wzr, w4
@@ -1433,6 +1533,8 @@ drps
# CHECK-NEXT: 1 2 0.50 add x5, x6, x7, asr #21
# CHECK-NEXT: 1 2 0.50 add x8, x9, x10, asr #63
# CHECK-NEXT: 1 1 0.33 adds w3, w5, w7
+# CHECK-NEXT: 1 2 0.50 adds w17, wsp, w25
+# CHECK-NEXT: 1 1 0.33 adds x13, x23, w8, uxtb
# CHECK-NEXT: 1 1 0.33 cmn w3, w5
# CHECK-NEXT: 1 1 0.33 adds w20, wzr, w4
# CHECK-NEXT: 1 1 0.33 adds w4, w6, wzr
@@ -1482,6 +1584,9 @@ drps
# CHECK-NEXT: 1 2 0.50 sub x2, x3, x4, asr #0
# CHECK-NEXT: 1 2 0.50 sub x5, x6, x7, asr #21
# CHECK-NEXT: 1 2 0.50 sub x8, x9, x10, asr #63
+# CHECK-NEXT: 1 2 0.50 sub w13, wsp, w10
+# CHECK-NEXT: 1 2 0.50 sub x16, x2, w19, uxtb
+# CHECK-NEXT: 1 2 0.50 subs x13, x15, x14, sxtx #1
# CHECK-NEXT: 1 1 0.33 subs w3, w5, w7
# CHECK-NEXT: 1 1 0.33 cmp w3, w5
# CHECK-NEXT: 1 1 0.33 subs w4, w6, wzr
@@ -1540,6 +1645,8 @@ drps
# CHECK-NEXT: 1 2 0.50 cmp w18, w19, asr #0
# CHECK-NEXT: 1 2 0.50 cmp w20, w21, asr #22
# CHECK-NEXT: 1 2 0.50 cmp w22, w23, asr #31
+# CHECK-NEXT: 1 2 0.50 cmp wsp, w26
+# CHECK-NEXT: 1 1 0.33 cmp x16, w27, uxtb
# CHECK-NEXT: 1 1 0.33 cmp x0, x3
# CHECK-NEXT: 1 1 0.33 cmp xzr, x4
# CHECK-NEXT: 1 1 0.33 cmp x5, xzr
@@ -1554,6 +1661,11 @@ drps
# CHECK-NEXT: 1 2 0.50 cmp x22, x23, asr #63
# CHECK-NEXT: 1 1 0.33 cmp wzr, w0
# CHECK-NEXT: 1 1 0.33 cmp xzr, x0
+# CHECK-NEXT: 1 1 0.25 mov sp, x30
+# CHECK-NEXT: 1 1 0.25 mov wsp, w20
+# CHECK-NEXT: 1 1 0.25 mov x11, sp
+# CHECK-NEXT: 1 1 0.25 mov w24, wsp
+# CHECK-NEXT: 1 2 0.25 mov x30, v18.d[0]
# CHECK-NEXT: 1 1 0.25 adc w29, w27, w25
# CHECK-NEXT: 1 1 0.25 adc wzr, w3, w4
# CHECK-NEXT: 1 1 0.25 adc w9, wzr, w10
@@ -1684,6 +1796,7 @@ drps
# CHECK-NEXT: 1 1 0.50 cbnz x26, #1048572
# CHECK-NEXT: 1 1 0.50 cbz wzr, #0
# CHECK-NEXT: 1 1 0.50 cbnz xzr, #0
+# CHECK-NEXT: 1 1 0.50 cbnz w21, test
# CHECK-NEXT: 1 1 0.50 b.ne #4
# CHECK-NEXT: 1 1 0.50 b.ge #1048572
# CHECK-NEXT: 1 1 0.50 b.ge #-4
@@ -1794,6 +1907,10 @@ drps
# CHECK-NEXT: 1 1 0.25 lsr x20, x21, x22
# CHECK-NEXT: 1 1 0.25 asr w23, w24, w25
# CHECK-NEXT: 1 1 0.25 asr x26, x27, x28
+# CHECK-NEXT: 1 1 0.25 eon w29, w4, w19
+# CHECK-NEXT: 1 1 0.25 eon x19, x12, x2
+# CHECK-NEXT: 1 1 0.25 eor w8, w27, w2
+# CHECK-NEXT: 1 1 0.25 eor x22, x16, x6
# CHECK-NEXT: 1 1 0.25 ror w0, w1, w2
# CHECK-NEXT: 1 1 0.25 ror x3, x4, x5
# CHECK-NEXT: 1 1 0.25 lsl w6, w7, w8
@@ -1859,9 +1976,15 @@ drps
# CHECK-NEXT: 2 3 0.50 extr w11, w13, w17, #31
# CHECK-NEXT: 2 3 0.50 extr x3, x5, x7, #15
# CHECK-NEXT: 2 3 0.50 extr x11, x13, x17, #63
+# CHECK-NEXT: 1 1 0.25 eor wsp, w4, #0xe00
+# CHECK-NEXT: 1 1 0.25 eor x27, x25, #0x1e00
# CHECK-NEXT: 1 1 0.25 ror x19, x23, #24
# CHECK-NEXT: 1 1 0.25 ror x29, xzr, #63
# CHECK-NEXT: 1 1 0.25 ror w9, w13, #31
+# CHECK-NEXT: 1 2 1.00 fcmp h5, h21
+# CHECK-NEXT: 1 2 1.00 fcmp h5, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmpe h22, h21
+# CHECK-NEXT: 1 2 1.00 fcmpe h13, #0.0
# CHECK-NEXT: 1 2 1.00 fcmp s3, s5
# CHECK-NEXT: 1 2 1.00 fcmp s31, #0.0
# CHECK-NEXT: 1 2 1.00 fcmp s31, #0.0
@@ -1880,6 +2003,8 @@ drps
# CHECK-NEXT: 1 2 1.00 fccmp d9, d31, #0, le
# CHECK-NEXT: 1 2 1.00 fccmp d3, d0, #15, gt
# CHECK-NEXT: 1 2 1.00 fccmp d31, d5, #7, ne
+# CHECK-NEXT: 1 2 1.00 fccmp h31, h3, #11, hs
+# CHECK-NEXT: 1 2 1.00 fccmpe h6, h1, #12, ne
# CHECK-NEXT: 1 2 1.00 fccmpe s1, s31, #0, eq
# CHECK-NEXT: 1 2 1.00 fccmpe s3, s0, #15, hs
# CHECK-NEXT: 1 2 1.00 fccmpe s31, s15, #13, hs
@@ -1888,22 +2013,33 @@ drps
# CHECK-NEXT: 1 2 1.00 fccmpe d31, d5, #7, ne
# CHECK-NEXT: 1 2 0.50 fcsel s3, s20, s9, pl
# CHECK-NEXT: 1 2 0.50 fcsel d9, d10, d11, mi
+# CHECK-NEXT: 1 2 0.50 fcsel h26, h2, h11, hs
+# CHECK-NEXT: 1 2 0.25 fmov h18, h28
# CHECK-NEXT: 1 2 0.25 fmov s0, s1
# CHECK-NEXT: 1 2 0.25 fabs s2, s3
+# CHECK-NEXT: 1 2 0.25 fneg h2, h9
# CHECK-NEXT: 1 2 0.25 fneg s4, s5
# CHECK-NEXT: 1 10 3.50 fsqrt s6, s7
# CHECK-NEXT: 1 3 0.50 fcvt d8, s9
# CHECK-NEXT: 1 3 0.50 fcvt h10, s11
+# CHECK-NEXT: 1 3 0.50 frintn h12, h3
# CHECK-NEXT: 1 3 0.50 frintn s12, s13
+# CHECK-NEXT: 1 3 0.50 frintp h17, h31
# CHECK-NEXT: 1 3 0.50 frintp s14, s15
+# CHECK-NEXT: 1 3 0.50 frintm h0, h21
# CHECK-NEXT: 1 3 0.50 frintm s16, s17
+# CHECK-NEXT: 1 3 0.50 frintz h10, h29
# CHECK-NEXT: 1 3 0.50 frintz s18, s19
+# CHECK-NEXT: 1 3 0.50 frinta h22, h10
# CHECK-NEXT: 1 3 0.50 frinta s20, s21
+# CHECK-NEXT: 1 3 0.50 frintx h4, h5
# CHECK-NEXT: 1 3 0.50 frintx s22, s23
# CHECK-NEXT: 1 3 0.50 frinti s24, s25
+# CHECK-NEXT: 1 3 0.50 frinti h31, h14
# CHECK-NEXT: 1 2 0.25 fmov d0, d1
# CHECK-NEXT: 1 2 0.25 fabs d2, d3
# CHECK-NEXT: 1 2 0.25 fneg d4, d5
+# CHECK-NEXT: 1 7 3.50 fsqrt h13, h24
# CHECK-NEXT: 1 16 3.50 fsqrt d6, d7
# CHECK-NEXT: 1 3 0.50 fcvt s8, d9
# CHECK...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for adding this, it looks like a nice addition to the test coverage especially the fp16 instructions. I left a number of comments looking through it.
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s
Outdated
Show resolved
Hide resolved
a1278e4
to
319c9f6
Compare
Thanks for the review! I've done changes. |
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
Outdated
Show resolved
Hide resolved
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s
Outdated
Show resolved
Hide resolved
Added missing instructions for LLVM Opcodes coverage. It will help to maintain TableGen scheduling information of AArch64 Neoverse V1.
Removes redundant instructions in: AArch64/Neoverse/V1-basic-instructions.s AArch64/Neoverse/V1-misc-instructions.s AArch64/Neoverse/V1-neon-instructions.s Fix MR remarks.
74b7ce9
to
4486374
Compare
Is there anything else to change? Thank you. |
It looks like there are still two unanswered questions to do with the |
74b7ce9
to
4486374
Compare
Sorry I pushed force this branch and lost commit with comment on crc instructions. I've pushed force back to resolve and come back again to the right commit... |
Thanks - I see part of the confusion was my fault as there isn't an existing test for bl, only for b and blr. The new test looks OK, although I would move it into the For the new crc instructions added - did you intent to test the forwarding of crcs into one another? If not it might be better to make sure there is test coverage in V1-basic-instructions.s instead. Either way they are not the biggest deal and this looks good to me otherwise. |
Moving bl <label> to unconditionnal branch immediate. Fixing crc32 code region name and w3 result operand.
I moved bl in unconditionnal branch (immediate section.
Where |
74b7ce9
to
63ede6d
Compare
It is shown in the pipeline diagram, which shows forwarding from crc32cb->crc32cb in the first operand, but not from mul->crc32cb and not from crc32cb->crc32cb with the second operand (or from crc32cb->mul, they all take 2 cycles as opposed to 1):
It sounds like it is worth adding the instructions to V1-basic-instructions.s to make sure we have complete coverage of them all separate from where we forward. |
Adding crc instructions in V1-basic-instructions.s test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - LGTM. (I would remove the new crc32b instructions from V1-forwarding.s too but either way it looks like a nice improvement and testing the forwarding of different crc32s has some benefit).
Are you happy for us to submit and merge?
Yes!! Thank you. |
@jvillette38 Congratulations on having your first Pull Request (PR) merged into the LLVM Project! Your changes will be combined with recent changes from other authors, then tested by our build bots. If there is a problem with a build, you may receive a report in an email or a comment on this PR. Please check whether problems have been caused by your change specifically, as the builds can include changes from many authors. It is not uncommon for your change to be included in a build that fails due to someone else's changes, or infrastructure issues. How to do this, and the rest of the post-merge process, is covered in detail here. If your change does cause a problem, it may be reverted, or you can revert it yourself. This is a normal part of LLVM development. You can fix your changes and open a new PR to merge them again. If you don't get any reports, no action is required from you. Your changes are working as expected, well done! |
Added missing instructions for LLVM Opcodes coverage. It will help to maintain TableGen scheduling information of AArch64 Neoverse V1.
Follow up of MR ##126703
This is a dispatch of new instructions of the big test: V1-scheduling-info.s
I have created a new test for special instructions without scheduling info in Software Optimization Guide: V1-misc-instructions.s
No more asm instruction comments to maintain.