Skip to content

Commit 41a6bb4

Browse files
[LLVM][CodeGen][SVE] Prefer NEON instructions when zeroing Z registers. (llvm#133929)
Several implementations have zero-latency instructions to zero registers. To-date no implementation has a dedicated SVE instruction but we can use the NEON equivalent because it is defined to zero bits 128..VL regardless of the immediate used. NOTE: The relevant instruction is not available in streaming mode, where the original SVE DUP instruction remains in use.
1 parent 722346c commit 41a6bb4

File tree

56 files changed

+2360
-1217
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2360
-1217
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7731,6 +7731,7 @@ def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
77317731
"movi", ".2d",
77327732
[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
77337733

7734+
let Predicates = [HasNEON] in {
77347735
def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
77357736
def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
77367737
def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
@@ -7740,6 +7741,23 @@ def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
77407741
def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
77417742
def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
77427743

7744+
// Prefer NEON instructions when zeroing ZPRs because they are potentially zero-latency.
7745+
let AddedComplexity = 5 in {
7746+
def : Pat<(nxv2i64 (splat_vector (i64 0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7747+
def : Pat<(nxv4i32 (splat_vector (i32 0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7748+
def : Pat<(nxv8i16 (splat_vector (i32 0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7749+
def : Pat<(nxv16i8 (splat_vector (i32 0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7750+
def : Pat<(nxv2f64 (splat_vector (f64 fpimm0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7751+
def : Pat<(nxv2f32 (splat_vector (f32 fpimm0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7752+
def : Pat<(nxv4f32 (splat_vector (f32 fpimm0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7753+
def : Pat<(nxv2f16 (splat_vector (f16 fpimm0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7754+
def : Pat<(nxv4f16 (splat_vector (f16 fpimm0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7755+
def : Pat<(nxv8f16 (splat_vector (f16 fpimm0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7756+
def : Pat<(nxv2bf16 (splat_vector (bf16 fpimm0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7757+
def : Pat<(nxv4bf16 (splat_vector (bf16 fpimm0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7758+
def : Pat<(nxv8bf16 (splat_vector (bf16 fpimm0))), (SUBREG_TO_REG (i32 0), (MOVIv2d_ns (i32 0)), zsub)>;
7759+
}
7760+
77437761
def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
77447762
def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
77457763
def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
@@ -7760,6 +7778,7 @@ def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
77607778
def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
77617779
def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
77627780
def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7781+
}
77637782

77647783
// EDIT per word & halfword: 2s, 4h, 4s, & 8h
77657784
let isReMaterializable = 1, isAsCheapAsAMove = 1 in

llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ entry:
5050
define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
5151
; CHECK-LABEL: mul_add_mull:
5252
; CHECK: // %bb.0: // %entry
53-
; CHECK-NEXT: mov z24.d, #0 // =0x0
54-
; CHECK-NEXT: mov z25.d, #0 // =0x0
55-
; CHECK-NEXT: mov z26.d, #0 // =0x0
56-
; CHECK-NEXT: mov z27.d, #0 // =0x0
53+
; CHECK-NEXT: movi v24.2d, #0000000000000000
54+
; CHECK-NEXT: movi v25.2d, #0000000000000000
55+
; CHECK-NEXT: movi v26.2d, #0000000000000000
56+
; CHECK-NEXT: movi v27.2d, #0000000000000000
5757
; CHECK-NEXT: ptrue p0.d
5858
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
5959
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
@@ -101,10 +101,10 @@ entry:
101101
define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
102102
; CHECK-LABEL: mul_sub_mull:
103103
; CHECK: // %bb.0: // %entry
104-
; CHECK-NEXT: mov z24.d, #0 // =0x0
105-
; CHECK-NEXT: mov z25.d, #0 // =0x0
106-
; CHECK-NEXT: mov z26.d, #0 // =0x0
107-
; CHECK-NEXT: mov z27.d, #0 // =0x0
104+
; CHECK-NEXT: movi v24.2d, #0000000000000000
105+
; CHECK-NEXT: movi v25.2d, #0000000000000000
106+
; CHECK-NEXT: movi v26.2d, #0000000000000000
107+
; CHECK-NEXT: movi v27.2d, #0000000000000000
108108
; CHECK-NEXT: ptrue p0.d
109109
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
110110
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
@@ -152,10 +152,10 @@ entry:
152152
define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
153153
; CHECK-LABEL: mul_conj_mull:
154154
; CHECK: // %bb.0: // %entry
155-
; CHECK-NEXT: mov z24.d, #0 // =0x0
156-
; CHECK-NEXT: mov z25.d, #0 // =0x0
157-
; CHECK-NEXT: mov z26.d, #0 // =0x0
158-
; CHECK-NEXT: mov z27.d, #0 // =0x0
155+
; CHECK-NEXT: movi v24.2d, #0000000000000000
156+
; CHECK-NEXT: movi v25.2d, #0000000000000000
157+
; CHECK-NEXT: movi v26.2d, #0000000000000000
158+
; CHECK-NEXT: movi v27.2d, #0000000000000000
159159
; CHECK-NEXT: ptrue p0.d
160160
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
161161
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
@@ -204,7 +204,7 @@ define <vscale x 4 x double> @mul_add_rot_mull(<vscale x 4 x double> %a, <vscale
204204
; CHECK-LABEL: mul_add_rot_mull:
205205
; CHECK: // %bb.0: // %entry
206206
; CHECK-NEXT: uzp2 z24.d, z4.d, z5.d
207-
; CHECK-NEXT: mov z25.d, #0 // =0x0
207+
; CHECK-NEXT: movi v25.2d, #0000000000000000
208208
; CHECK-NEXT: uzp1 z4.d, z4.d, z5.d
209209
; CHECK-NEXT: ptrue p0.d
210210
; CHECK-NEXT: mov z26.d, z24.d

llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ entry:
4141
define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
4242
; CHECK-LABEL: mul_add_mull:
4343
; CHECK: // %bb.0: // %entry
44-
; CHECK-NEXT: mov z24.d, #0 // =0x0
45-
; CHECK-NEXT: mov z25.d, #0 // =0x0
44+
; CHECK-NEXT: movi v24.2d, #0000000000000000
45+
; CHECK-NEXT: movi v25.2d, #0000000000000000
4646
; CHECK-NEXT: ptrue p0.d
4747
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #0
4848
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
@@ -90,8 +90,8 @@ entry:
9090
define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
9191
; CHECK-LABEL: mul_sub_mull:
9292
; CHECK: // %bb.0: // %entry
93-
; CHECK-NEXT: mov z24.d, #0 // =0x0
94-
; CHECK-NEXT: mov z25.d, #0 // =0x0
93+
; CHECK-NEXT: movi v24.2d, #0000000000000000
94+
; CHECK-NEXT: movi v25.2d, #0000000000000000
9595
; CHECK-NEXT: ptrue p0.d
9696
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #270
9797
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
@@ -139,8 +139,8 @@ entry:
139139
define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
140140
; CHECK-LABEL: mul_conj_mull:
141141
; CHECK: // %bb.0: // %entry
142-
; CHECK-NEXT: mov z24.d, #0 // =0x0
143-
; CHECK-NEXT: mov z25.d, #0 // =0x0
142+
; CHECK-NEXT: movi v24.2d, #0000000000000000
143+
; CHECK-NEXT: movi v25.2d, #0000000000000000
144144
; CHECK-NEXT: ptrue p0.d
145145
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
146146
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0

llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ entry:
4646
define <vscale x 8 x half> @complex_mul_v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
4747
; CHECK-LABEL: complex_mul_v8f16:
4848
; CHECK: // %bb.0: // %entry
49-
; CHECK-NEXT: mov z2.h, #0 // =0x0
49+
; CHECK-NEXT: movi v2.2d, #0000000000000000
5050
; CHECK-NEXT: ptrue p0.h
5151
; CHECK-NEXT: fcmla z2.h, p0/m, z1.h, z0.h, #0
5252
; CHECK-NEXT: fcmla z2.h, p0/m, z1.h, z0.h, #90
@@ -72,8 +72,8 @@ entry:
7272
define <vscale x 16 x half> @complex_mul_v16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {
7373
; CHECK-LABEL: complex_mul_v16f16:
7474
; CHECK: // %bb.0: // %entry
75-
; CHECK-NEXT: mov z4.h, #0 // =0x0
76-
; CHECK-NEXT: mov z5.h, #0 // =0x0
75+
; CHECK-NEXT: movi v4.2d, #0000000000000000
76+
; CHECK-NEXT: movi v5.2d, #0000000000000000
7777
; CHECK-NEXT: ptrue p0.h
7878
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #0
7979
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
@@ -103,10 +103,10 @@ entry:
103103
define <vscale x 32 x half> @complex_mul_v32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b) {
104104
; CHECK-LABEL: complex_mul_v32f16:
105105
; CHECK: // %bb.0: // %entry
106-
; CHECK-NEXT: mov z24.h, #0 // =0x0
107-
; CHECK-NEXT: mov z25.h, #0 // =0x0
108-
; CHECK-NEXT: mov z26.h, #0 // =0x0
109-
; CHECK-NEXT: mov z27.h, #0 // =0x0
106+
; CHECK-NEXT: movi v24.2d, #0000000000000000
107+
; CHECK-NEXT: movi v25.2d, #0000000000000000
108+
; CHECK-NEXT: movi v26.2d, #0000000000000000
109+
; CHECK-NEXT: movi v27.2d, #0000000000000000
110110
; CHECK-NEXT: ptrue p0.h
111111
; CHECK-NEXT: fcmla z24.h, p0/m, z4.h, z0.h, #0
112112
; CHECK-NEXT: fcmla z25.h, p0/m, z5.h, z1.h, #0

llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "aarch64"
77
define <vscale x 4 x float> @complex_mul_v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
88
; CHECK-LABEL: complex_mul_v4f32:
99
; CHECK: // %bb.0: // %entry
10-
; CHECK-NEXT: mov z2.s, #0 // =0x0
10+
; CHECK-NEXT: movi v2.2d, #0000000000000000
1111
; CHECK-NEXT: ptrue p0.s
1212
; CHECK-NEXT: fcmla z2.s, p0/m, z1.s, z0.s, #0
1313
; CHECK-NEXT: fcmla z2.s, p0/m, z1.s, z0.s, #90
@@ -34,8 +34,8 @@ entry:
3434
define <vscale x 8 x float> @complex_mul_v8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) {
3535
; CHECK-LABEL: complex_mul_v8f32:
3636
; CHECK: // %bb.0: // %entry
37-
; CHECK-NEXT: mov z4.s, #0 // =0x0
38-
; CHECK-NEXT: mov z5.s, #0 // =0x0
37+
; CHECK-NEXT: movi v4.2d, #0000000000000000
38+
; CHECK-NEXT: movi v5.2d, #0000000000000000
3939
; CHECK-NEXT: ptrue p0.s
4040
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #0
4141
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
@@ -65,10 +65,10 @@ entry:
6565
define <vscale x 16 x float> @complex_mul_v16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b) {
6666
; CHECK-LABEL: complex_mul_v16f32:
6767
; CHECK: // %bb.0: // %entry
68-
; CHECK-NEXT: mov z24.s, #0 // =0x0
69-
; CHECK-NEXT: mov z25.s, #0 // =0x0
70-
; CHECK-NEXT: mov z26.s, #0 // =0x0
71-
; CHECK-NEXT: mov z27.s, #0 // =0x0
68+
; CHECK-NEXT: movi v24.2d, #0000000000000000
69+
; CHECK-NEXT: movi v25.2d, #0000000000000000
70+
; CHECK-NEXT: movi v26.2d, #0000000000000000
71+
; CHECK-NEXT: movi v27.2d, #0000000000000000
7272
; CHECK-NEXT: ptrue p0.s
7373
; CHECK-NEXT: fcmla z24.s, p0/m, z4.s, z0.s, #0
7474
; CHECK-NEXT: fcmla z25.s, p0/m, z5.s, z1.s, #0

llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "aarch64"
77
define <vscale x 2 x double> @complex_mul_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
88
; CHECK-LABEL: complex_mul_v2f64:
99
; CHECK: // %bb.0: // %entry
10-
; CHECK-NEXT: mov z2.d, #0 // =0x0
10+
; CHECK-NEXT: movi v2.2d, #0000000000000000
1111
; CHECK-NEXT: ptrue p0.d
1212
; CHECK-NEXT: fcmla z2.d, p0/m, z1.d, z0.d, #0
1313
; CHECK-NEXT: fcmla z2.d, p0/m, z1.d, z0.d, #90
@@ -34,8 +34,8 @@ entry:
3434
define <vscale x 4 x double> @complex_mul_v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
3535
; CHECK-LABEL: complex_mul_v4f64:
3636
; CHECK: // %bb.0: // %entry
37-
; CHECK-NEXT: mov z4.d, #0 // =0x0
38-
; CHECK-NEXT: mov z5.d, #0 // =0x0
37+
; CHECK-NEXT: movi v4.2d, #0000000000000000
38+
; CHECK-NEXT: movi v5.2d, #0000000000000000
3939
; CHECK-NEXT: ptrue p0.d
4040
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #0
4141
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
@@ -65,10 +65,10 @@ entry:
6565
define <vscale x 8 x double> @complex_mul_v8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b) {
6666
; CHECK-LABEL: complex_mul_v8f64:
6767
; CHECK: // %bb.0: // %entry
68-
; CHECK-NEXT: mov z24.d, #0 // =0x0
69-
; CHECK-NEXT: mov z25.d, #0 // =0x0
70-
; CHECK-NEXT: mov z26.d, #0 // =0x0
71-
; CHECK-NEXT: mov z27.d, #0 // =0x0
68+
; CHECK-NEXT: movi v24.2d, #0000000000000000
69+
; CHECK-NEXT: movi v25.2d, #0000000000000000
70+
; CHECK-NEXT: movi v26.2d, #0000000000000000
71+
; CHECK-NEXT: movi v27.2d, #0000000000000000
7272
; CHECK-NEXT: ptrue p0.d
7373
; CHECK-NEXT: fcmla z24.d, p0/m, z4.d, z0.d, #0
7474
; CHECK-NEXT: fcmla z25.d, p0/m, z5.d, z1.d, #0

llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ entry:
4646
define <vscale x 8 x i16> @complex_mul_v8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
4747
; CHECK-LABEL: complex_mul_v8i16:
4848
; CHECK: // %bb.0: // %entry
49-
; CHECK-NEXT: mov z2.h, #0 // =0x0
49+
; CHECK-NEXT: movi v2.2d, #0000000000000000
5050
; CHECK-NEXT: cmla z2.h, z1.h, z0.h, #0
5151
; CHECK-NEXT: cmla z2.h, z1.h, z0.h, #90
5252
; CHECK-NEXT: mov z0.d, z2.d
@@ -71,8 +71,8 @@ entry:
7171
define <vscale x 16 x i16> @complex_mul_v16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b) {
7272
; CHECK-LABEL: complex_mul_v16i16:
7373
; CHECK: // %bb.0: // %entry
74-
; CHECK-NEXT: mov z4.h, #0 // =0x0
75-
; CHECK-NEXT: mov z5.h, #0 // =0x0
74+
; CHECK-NEXT: movi v4.2d, #0000000000000000
75+
; CHECK-NEXT: movi v5.2d, #0000000000000000
7676
; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #0
7777
; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #0
7878
; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #90
@@ -101,10 +101,10 @@ entry:
101101
define <vscale x 32 x i16> @complex_mul_v32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b) {
102102
; CHECK-LABEL: complex_mul_v32i16:
103103
; CHECK: // %bb.0: // %entry
104-
; CHECK-NEXT: mov z24.h, #0 // =0x0
105-
; CHECK-NEXT: mov z25.h, #0 // =0x0
106-
; CHECK-NEXT: mov z26.h, #0 // =0x0
107-
; CHECK-NEXT: mov z27.h, #0 // =0x0
104+
; CHECK-NEXT: movi v24.2d, #0000000000000000
105+
; CHECK-NEXT: movi v25.2d, #0000000000000000
106+
; CHECK-NEXT: movi v26.2d, #0000000000000000
107+
; CHECK-NEXT: movi v27.2d, #0000000000000000
108108
; CHECK-NEXT: cmla z24.h, z4.h, z0.h, #0
109109
; CHECK-NEXT: cmla z25.h, z5.h, z1.h, #0
110110
; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #0

llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "aarch64"
77
define <vscale x 4 x i32> @complex_mul_v4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
88
; CHECK-LABEL: complex_mul_v4i32:
99
; CHECK: // %bb.0: // %entry
10-
; CHECK-NEXT: mov z2.s, #0 // =0x0
10+
; CHECK-NEXT: movi v2.2d, #0000000000000000
1111
; CHECK-NEXT: cmla z2.s, z1.s, z0.s, #0
1212
; CHECK-NEXT: cmla z2.s, z1.s, z0.s, #90
1313
; CHECK-NEXT: mov z0.d, z2.d
@@ -33,8 +33,8 @@ entry:
3333
define <vscale x 8 x i32> @complex_mul_v8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
3434
; CHECK-LABEL: complex_mul_v8i32:
3535
; CHECK: // %bb.0: // %entry
36-
; CHECK-NEXT: mov z4.s, #0 // =0x0
37-
; CHECK-NEXT: mov z5.s, #0 // =0x0
36+
; CHECK-NEXT: movi v4.2d, #0000000000000000
37+
; CHECK-NEXT: movi v5.2d, #0000000000000000
3838
; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #0
3939
; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #0
4040
; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #90
@@ -63,10 +63,10 @@ entry:
6363
define <vscale x 16 x i32> @complex_mul_v16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b) {
6464
; CHECK-LABEL: complex_mul_v16i32:
6565
; CHECK: // %bb.0: // %entry
66-
; CHECK-NEXT: mov z24.s, #0 // =0x0
67-
; CHECK-NEXT: mov z25.s, #0 // =0x0
68-
; CHECK-NEXT: mov z26.s, #0 // =0x0
69-
; CHECK-NEXT: mov z27.s, #0 // =0x0
66+
; CHECK-NEXT: movi v24.2d, #0000000000000000
67+
; CHECK-NEXT: movi v25.2d, #0000000000000000
68+
; CHECK-NEXT: movi v26.2d, #0000000000000000
69+
; CHECK-NEXT: movi v27.2d, #0000000000000000
7070
; CHECK-NEXT: cmla z24.s, z4.s, z0.s, #0
7171
; CHECK-NEXT: cmla z25.s, z5.s, z1.s, #0
7272
; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #0

llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "aarch64"
77
define <vscale x 2 x i64> @complex_mul_v2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
88
; CHECK-LABEL: complex_mul_v2i64:
99
; CHECK: // %bb.0: // %entry
10-
; CHECK-NEXT: mov z2.d, #0 // =0x0
10+
; CHECK-NEXT: movi v2.2d, #0000000000000000
1111
; CHECK-NEXT: cmla z2.d, z1.d, z0.d, #0
1212
; CHECK-NEXT: cmla z2.d, z1.d, z0.d, #90
1313
; CHECK-NEXT: mov z0.d, z2.d
@@ -33,8 +33,8 @@ entry:
3333
define <vscale x 4 x i64> @complex_mul_v4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
3434
; CHECK-LABEL: complex_mul_v4i64:
3535
; CHECK: // %bb.0: // %entry
36-
; CHECK-NEXT: mov z4.d, #0 // =0x0
37-
; CHECK-NEXT: mov z5.d, #0 // =0x0
36+
; CHECK-NEXT: movi v4.2d, #0000000000000000
37+
; CHECK-NEXT: movi v5.2d, #0000000000000000
3838
; CHECK-NEXT: cmla z5.d, z2.d, z0.d, #0
3939
; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #0
4040
; CHECK-NEXT: cmla z5.d, z2.d, z0.d, #90
@@ -63,10 +63,10 @@ entry:
6363
define <vscale x 8 x i64> @complex_mul_v8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b) {
6464
; CHECK-LABEL: complex_mul_v8i64:
6565
; CHECK: // %bb.0: // %entry
66-
; CHECK-NEXT: mov z24.d, #0 // =0x0
67-
; CHECK-NEXT: mov z25.d, #0 // =0x0
68-
; CHECK-NEXT: mov z26.d, #0 // =0x0
69-
; CHECK-NEXT: mov z27.d, #0 // =0x0
66+
; CHECK-NEXT: movi v24.2d, #0000000000000000
67+
; CHECK-NEXT: movi v25.2d, #0000000000000000
68+
; CHECK-NEXT: movi v26.2d, #0000000000000000
69+
; CHECK-NEXT: movi v27.2d, #0000000000000000
7070
; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #0
7171
; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #0
7272
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #0
@@ -101,10 +101,10 @@ entry:
101101
define <vscale x 8 x i64> @complex_minus_mul_v8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b) {
102102
; CHECK-LABEL: complex_minus_mul_v8i64:
103103
; CHECK: // %bb.0: // %entry
104-
; CHECK-NEXT: mov z24.d, #0 // =0x0
105-
; CHECK-NEXT: mov z25.d, #0 // =0x0
106-
; CHECK-NEXT: mov z26.d, #0 // =0x0
107-
; CHECK-NEXT: mov z27.d, #0 // =0x0
104+
; CHECK-NEXT: movi v24.2d, #0000000000000000
105+
; CHECK-NEXT: movi v25.2d, #0000000000000000
106+
; CHECK-NEXT: movi v26.2d, #0000000000000000
107+
; CHECK-NEXT: movi v27.2d, #0000000000000000
108108
; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #270
109109
; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #270
110110
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #270

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ target triple = "aarch64"
1414
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
1515
; CHECK-LABEL: complex_mul_v2f64:
1616
; CHECK: // %bb.0: // %entry
17-
; CHECK-NEXT: mov z1.d, #0 // =0x0
17+
; CHECK-NEXT: movi v1.2d, #0000000000000000
1818
; CHECK-NEXT: mov w8, #100 // =0x64
1919
; CHECK-NEXT: cntd x9
2020
; CHECK-NEXT: whilelo p1.d, xzr, x8
@@ -111,7 +111,7 @@ exit.block: ; preds = %vector.body
111111
define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) {
112112
; CHECK-LABEL: complex_mul_predicated_v2f64:
113113
; CHECK: // %bb.0: // %entry
114-
; CHECK-NEXT: mov z1.d, #0 // =0x0
114+
; CHECK-NEXT: movi v1.2d, #0000000000000000
115115
; CHECK-NEXT: cntd x9
116116
; CHECK-NEXT: mov w11, #100 // =0x64
117117
; CHECK-NEXT: neg x10, x9
@@ -213,7 +213,7 @@ exit.block: ; preds = %vector.body
213213
define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) {
214214
; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
215215
; CHECK: // %bb.0: // %entry
216-
; CHECK-NEXT: mov z1.d, #0 // =0x0
216+
; CHECK-NEXT: movi v1.2d, #0000000000000000
217217
; CHECK-NEXT: mov w8, #100 // =0x64
218218
; CHECK-NEXT: cntd x9
219219
; CHECK-NEXT: whilelo p1.d, xzr, x8

0 commit comments

Comments
 (0)