-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAG] Add users of operand of simplified extract_vector_elt to worklist #100074
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This helps to ensure we revisit the last extract_element uses of a node so that it can be optimized away in cases such as extract(insert(scalartovec(x), 1), 0).
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-selectiondag Author: David Green (davemgreen) ChangesThis helps to ensure we revisit the last extract_element uses of a node so that it can be optimized away in cases such as extract(insert(scalartovec(x), 1), 0). Patch is 170.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100074.diff 17 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa9032ea2574c..cd0440077f526 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22533,6 +22533,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
Index == VecOp.getOperand(2)) {
SDValue Elt = VecOp.getOperand(1);
+ AddUsersToWorklist(VecOp.getNode());
return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 00cc6b21ccaf8..abf2e1272d645 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -951,10 +951,8 @@ define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
; CHECK-SD-LABEL: sext_v1x64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: fmov x8, d0
-; CHECK-SD-NEXT: asr x1, x8, #63
-; CHECK-SD-NEXT: mov.d v0[1], x1
; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: asr x1, x0, #63
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sext_v1x64:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 178c229d04e47..62a79e3547b29 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1802,28 +1802,25 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-NEXT: mov.d x8, v0[1]
; CHECK-NEXT: mov.d x9, v1[1]
; CHECK-NEXT: fmov x10, d0
-; CHECK-NEXT: fmov x11, d1
-; CHECK-NEXT: asr x12, x10, #63
-; CHECK-NEXT: asr x13, x11, #63
-; CHECK-NEXT: subs x10, x10, x11
+; CHECK-NEXT: fmov x12, d1
+; CHECK-NEXT: asr x14, x10, #63
; CHECK-NEXT: asr x11, x8, #63
-; CHECK-NEXT: asr x14, x9, #63
-; CHECK-NEXT: sbc x12, x12, x13
+; CHECK-NEXT: asr x13, x9, #63
+; CHECK-NEXT: asr x15, x12, #63
; CHECK-NEXT: subs x8, x8, x9
-; CHECK-NEXT: sbc x9, x11, x14
-; CHECK-NEXT: asr x13, x12, #63
-; CHECK-NEXT: asr x11, x9, #63
-; CHECK-NEXT: eor x10, x10, x13
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: subs x2, x8, x11
-; CHECK-NEXT: eor x8, x12, x13
-; CHECK-NEXT: sbc x3, x9, x11
-; CHECK-NEXT: subs x9, x10, x13
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: sbc x1, x8, x13
-; CHECK-NEXT: mov.d v0[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: sbc x9, x11, x13
+; CHECK-NEXT: subs x10, x10, x12
+; CHECK-NEXT: sbc x11, x14, x15
+; CHECK-NEXT: asr x13, x9, #63
+; CHECK-NEXT: asr x12, x11, #63
+; CHECK-NEXT: eor x8, x8, x13
+; CHECK-NEXT: eor x9, x9, x13
+; CHECK-NEXT: eor x10, x10, x12
+; CHECK-NEXT: eor x11, x11, x12
+; CHECK-NEXT: subs x0, x10, x12
+; CHECK-NEXT: sbc x1, x11, x12
+; CHECK-NEXT: subs x2, x8, x13
+; CHECK-NEXT: sbc x3, x9, x13
; CHECK-NEXT: ret
%aext = sext <2 x i64> %a to <2 x i128>
%bext = sext <2 x i64> %b to <2 x i128>
diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
index 22440b79bdcd4..b4f179e992a0d 100644
--- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -241,21 +241,18 @@ define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) {
define <4 x i65> @sign_4xi65(<4 x i65> %a) {
; CHECK-LABEL: sign_4xi65:
; CHECK: // %bb.0:
-; CHECK-NEXT: sbfx x8, x1, #0, #1
-; CHECK-NEXT: sbfx x9, x5, #0, #1
-; CHECK-NEXT: sbfx x10, x3, #0, #1
-; CHECK-NEXT: lsr x1, x8, #63
-; CHECK-NEXT: orr x8, x8, #0x1
-; CHECK-NEXT: lsr x3, x10, #63
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: sbfx x8, x7, #0, #1
-; CHECK-NEXT: lsr x5, x9, #63
-; CHECK-NEXT: orr x2, x10, #0x1
-; CHECK-NEXT: orr x4, x9, #0x1
-; CHECK-NEXT: lsr x7, x8, #63
-; CHECK-NEXT: orr x6, x8, #0x1
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: sbfx x8, x5, #0, #1
+; CHECK-NEXT: sbfx x9, x3, #0, #1
+; CHECK-NEXT: sbfx x10, x1, #0, #1
+; CHECK-NEXT: sbfx x11, x7, #0, #1
+; CHECK-NEXT: lsr x1, x10, #63
+; CHECK-NEXT: lsr x3, x9, #63
+; CHECK-NEXT: lsr x5, x8, #63
+; CHECK-NEXT: lsr x7, x11, #63
+; CHECK-NEXT: orr x0, x10, #0x1
+; CHECK-NEXT: orr x2, x9, #0x1
+; CHECK-NEXT: orr x4, x8, #0x1
+; CHECK-NEXT: orr x6, x11, #0x1
; CHECK-NEXT: ret
%c = icmp sgt <4 x i65> %a, <i65 -1, i65 -1, i65 -1, i65 -1>
%res = select <4 x i1> %c, <4 x i65> <i65 1, i65 1, i65 1, i65 1>, <4 x i65 > <i65 -1, i65 -1, i65 -1, i65 -1>
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 4723ac01d6021..0c880592d955b 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -2287,20 +2287,19 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov d0, v0.d[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: bl __fixdfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: mov d0, v0.d[1]
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -2345,20 +2344,19 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov d0, v0.d[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: bl __fixunsdfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: mov d0, v0.d[1]
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -2407,28 +2405,26 @@ define <3 x i128> @fptos_v3f64_v3i128(<3 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: .cfi_offset b8, -56
; CHECK-SD-NEXT: .cfi_offset b9, -64
-; CHECK-SD-NEXT: fmov d9, d0
-; CHECK-SD-NEXT: fmov d0, d1
; CHECK-SD-NEXT: fmov d8, d2
+; CHECK-SD-NEXT: fmov d9, d1
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, d8
+; CHECK-SD-NEXT: fmov d0, d9
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, d9
+; CHECK-SD-NEXT: fmov d0, d8
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
@@ -2488,28 +2484,26 @@ define <3 x i128> @fptou_v3f64_v3i128(<3 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: .cfi_offset b8, -56
; CHECK-SD-NEXT: .cfi_offset b9, -64
-; CHECK-SD-NEXT: fmov d9, d0
-; CHECK-SD-NEXT: fmov d0, d1
; CHECK-SD-NEXT: fmov d8, d2
+; CHECK-SD-NEXT: fmov d9, d1
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, d8
+; CHECK-SD-NEXT: fmov d0, d9
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, d9
+; CHECK-SD-NEXT: fmov d0, d8
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
@@ -3694,20 +3688,19 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -3754,20 +3747,19 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixunssfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -3822,23 +3814,22 @@ define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) {
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x21
-; CHECK-SD-NEXT: mov x3, x22
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x21
+; CHECK-SD-NEXT: mov x1, x22
; CHECK-SD-NEXT: mov x4, x19
; CHECK-SD-NEXT: mov x5, x20
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -3904,23 +3895,22 @@ define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) {
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixunssfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x21
-; CHECK-SD-NEXT: mov x3, x22
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x21
+; CHECK-SD-NEXT: mov x1, x22
; CHECK-SD-NEXT: mov x4, x19
; CHECK-SD-NEXT: mov x5, x20
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -7034,20 +7024,19 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixhfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixhfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -7089,20 +7078,19 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixunshfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixunshfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -7147,28 +7135,27 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixhfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixhfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: bl __fixhfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -7220,28 +7207,27 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixunshfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixunshfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: bl __fixunshfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…st (#100074) Summary: This helps to ensure we revisit the last extract_element uses of a node so that it can be optimized away in cases such as extract(insert(scalartovec(x), 1), 0). Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251059
This helps to ensure we revisit the last extract_element uses of a node so that it can be optimized away in cases such as extract(insert(scalartovec(x), 1), 0).