Skip to content

[AArch64] Allow LDR merge with same destination register by renaming #71908

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
260 changes: 180 additions & 80 deletions llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,8 @@ define dso_local void @test_va_copy() {

; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var

; CHECK: ldr [[BLOCKB:q[0-9]+]], [x[[SRC]], #16]
; CHECK: ldp [[BLOCKA:q[0-9]+]], [[BLOCKB:q[0-9]+]], [x[[SRC]]]
; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list
; CHECK: ldr [[BLOCKA:q[0-9]+]], [x[[SRC]]]
; CHECK: stp [[BLOCKA]], [[BLOCKB]], [x[[DST]]]
ret void
; CHECK: ret
Expand Down
100 changes: 40 additions & 60 deletions llvm/test/CodeGen/AArch64/fexplog.ll
Original file line number Diff line number Diff line change
Expand Up @@ -713,14 +713,12 @@ define <7 x half> @exp_v7f16(<7 x half> %a) {
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
Expand Down Expand Up @@ -963,14 +961,12 @@ define <8 x half> @exp_v8f16(<8 x half> %a) {
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #48] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
Expand Down Expand Up @@ -1994,14 +1990,12 @@ define <7 x half> @exp2_v7f16(<7 x half> %a) {
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
Expand Down Expand Up @@ -2244,14 +2238,12 @@ define <8 x half> @exp2_v8f16(<8 x half> %a) {
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #48] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
Expand Down Expand Up @@ -3275,14 +3267,12 @@ define <7 x half> @log_v7f16(<7 x half> %a) {
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
Expand Down Expand Up @@ -3525,14 +3515,12 @@ define <8 x half> @log_v8f16(<8 x half> %a) {
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #48] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
Expand Down Expand Up @@ -4556,14 +4544,12 @@ define <7 x half> @log2_v7f16(<7 x half> %a) {
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
Expand Down Expand Up @@ -4806,14 +4792,12 @@ define <8 x half> @log2_v8f16(<8 x half> %a) {
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #48] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
Expand Down Expand Up @@ -5837,14 +5821,12 @@ define <7 x half> @log10_v7f16(<7 x half> %a) {
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
Expand Down Expand Up @@ -6087,14 +6069,12 @@ define <8 x half> @log10_v8f16(<8 x half> %a) {
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #48] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
Expand Down
74 changes: 34 additions & 40 deletions llvm/test/CodeGen/AArch64/fpow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -267,14 +267,13 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-GI-NEXT: fmov d1, d11
; CHECK-GI-NEXT: fmov d0, d9
; CHECK-GI-NEXT: bl pow
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q3, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v2.d[1], v3.d[0]
; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
; CHECK-GI-NEXT: mov v0.16b, v2.16b
; CHECK-GI-NEXT: add sp, sp, #112
Expand Down Expand Up @@ -715,14 +714,14 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) {
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: bl powf
; CHECK-SD-NEXT: fcvt h0, s0
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
; CHECK-SD-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT: mov h1, v1.h[2]
; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: mov h1, v1.h[2]
; CHECK-SD-NEXT: fcvt s0, h0
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: bl powf
; CHECK-SD-NEXT: fcvt h0, s0
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -858,14 +857,13 @@ define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) {
; CHECK-GI-NEXT: fcvt s1, h0
; CHECK-GI-NEXT: fmov s0, s2
; CHECK-GI-NEXT: bl powf
; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: fcvt h0, s0
; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
Expand Down Expand Up @@ -1020,14 +1018,14 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) {
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: bl powf
; CHECK-SD-NEXT: fcvt h0, s0
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
; CHECK-SD-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT: mov h1, v1.h[2]
; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: mov h1, v1.h[2]
; CHECK-SD-NEXT: fcvt s0, h0
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: bl powf
; CHECK-SD-NEXT: fcvt h0, s0
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1175,22 +1173,20 @@ define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) {
; CHECK-GI-NEXT: fcvt s1, h0
; CHECK-GI-NEXT: fmov s0, s2
; CHECK-GI-NEXT: bl powf
; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: fcvt h0, s0
; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp d9, d8, [sp, #160] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp, #176] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp d11, d10, [sp, #144] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp, #176] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
; CHECK-GI-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp d15, d14, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
Expand Down Expand Up @@ -1225,14 +1221,13 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) {
; CHECK-SD-NEXT: fcvt s0, h0
; CHECK-SD-NEXT: bl powf
; CHECK-SD-NEXT: fcvt h0, s0
; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp q1, q2, [sp, #48] // 32-byte Folded Reload
; CHECK-SD-NEXT: mov h1, v1.h[2]
; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: fcvt s0, h0
; CHECK-SD-NEXT: bl powf
; CHECK-SD-NEXT: fcvt h0, s0
Expand Down Expand Up @@ -1307,14 +1302,14 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) {
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: bl powf
; CHECK-SD-NEXT: fcvt h0, s0
; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
; CHECK-SD-NEXT: ldp q1, q2, [sp, #32] // 32-byte Folded Reload
; CHECK-SD-NEXT: mov h1, v1.h[2]
; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: mov h1, v1.h[2]
; CHECK-SD-NEXT: fcvt s0, h0
; CHECK-SD-NEXT: fcvt s1, h1
; CHECK-SD-NEXT: bl powf
; CHECK-SD-NEXT: fcvt h0, s0
; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
Expand Down Expand Up @@ -1578,9 +1573,8 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) {
; CHECK-GI-NEXT: mov v3.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #192] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #240] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v3.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #256] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp q4, q2, [sp, #240] // 32-byte Folded Reload
; CHECK-GI-NEXT: mov v3.h[4], v4.h[0]
; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v3.h[5], v2.h[0]
Expand Down
Loading