@@ -15,36 +15,34 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
15
15
; CHECK-LABEL: complex_mul_v2f64:
16
16
; CHECK: // %bb.0: // %entry
17
17
; CHECK-NEXT: mov z1.d, #0 // =0x0
18
- ; CHECK-NEXT: mov w9 , #100 // =0x64
19
- ; CHECK-NEXT: cntd x10
20
- ; CHECK-NEXT: whilelo p1.d, xzr, x9
21
- ; CHECK-NEXT: mov x8, xzr
22
- ; CHECK-NEXT: rdvl x11, #2
18
+ ; CHECK-NEXT: mov w8 , #100 // =0x64
19
+ ; CHECK-NEXT: cntd x9
20
+ ; CHECK-NEXT: whilelo p1.d, xzr, x8
21
+ ; CHECK-NEXT: rdvl x10, #2
22
+ ; CHECK-NEXT: mov x11, x9
23
23
; CHECK-NEXT: ptrue p0.d
24
- ; CHECK-NEXT: mov x12, x10
25
24
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
26
25
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
27
26
; CHECK-NEXT: .LBB0_1: // %vector.body
28
27
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
29
28
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
30
- ; CHECK-NEXT: add x13, x0, x8
31
- ; CHECK-NEXT: add x14, x1, x8
32
- ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
33
29
; CHECK-NEXT: mov z6.d, z1.d
34
30
; CHECK-NEXT: mov z7.d, z0.d
35
- ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
36
- ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
37
- ; CHECK-NEXT: add x8, x8, x11
38
- ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x13]
39
- ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x14]
31
+ ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
32
+ ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
33
+ ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
34
+ ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
35
+ ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
36
+ ; CHECK-NEXT: add x1, x1, x10
37
+ ; CHECK-NEXT: add x0, x0, x10
40
38
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
41
39
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
42
40
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
43
41
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
44
42
; CHECK-NEXT: mov z0.d, p2/m, z7.d
45
43
; CHECK-NEXT: mov z1.d, p1/m, z6.d
46
- ; CHECK-NEXT: whilelo p1.d, x12, x9
47
- ; CHECK-NEXT: add x12, x12, x10
44
+ ; CHECK-NEXT: whilelo p1.d, x11, x8
45
+ ; CHECK-NEXT: add x11, x11, x9
48
46
; CHECK-NEXT: b.mi .LBB0_1
49
47
; CHECK-NEXT: // %bb.2: // %exit.block
50
48
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -114,39 +112,37 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
114
112
; CHECK-LABEL: complex_mul_predicated_v2f64:
115
113
; CHECK: // %bb.0: // %entry
116
114
; CHECK-NEXT: mov z1.d, #0 // =0x0
117
- ; CHECK-NEXT: cntd x10
118
- ; CHECK-NEXT: mov w12 , #100 // =0x64
119
- ; CHECK-NEXT: neg x11, x10
115
+ ; CHECK-NEXT: cntd x9
116
+ ; CHECK-NEXT: mov w11 , #100 // =0x64
117
+ ; CHECK-NEXT: neg x10, x9
120
118
; CHECK-NEXT: ptrue p0.d
121
119
; CHECK-NEXT: mov x8, xzr
122
- ; CHECK-NEXT: mov x9, xzr
123
- ; CHECK-NEXT: and x11, x11, x12
124
- ; CHECK-NEXT: rdvl x12, #2
120
+ ; CHECK-NEXT: and x10, x10, x11
121
+ ; CHECK-NEXT: rdvl x11, #2
125
122
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
126
123
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
127
124
; CHECK-NEXT: .LBB1_1: // %vector.body
128
125
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
129
- ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x9, lsl #2]
130
- ; CHECK-NEXT: add x13, x0, x8
131
- ; CHECK-NEXT: add x14, x1, x8
126
+ ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
132
127
; CHECK-NEXT: mov z6.d, z1.d
133
128
; CHECK-NEXT: mov z7.d, z0.d
134
- ; CHECK-NEXT: add x9, x9, x10
135
- ; CHECK-NEXT: add x8, x8, x12
136
- ; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
137
- ; CHECK-NEXT: cmp x11, x9
138
- ; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
139
- ; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
140
- ; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
141
- ; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
142
- ; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
143
- ; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
129
+ ; CHECK-NEXT: add x8, x8, x9
130
+ ; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
131
+ ; CHECK-NEXT: cmp x10, x8
132
+ ; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
133
+ ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
134
+ ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
135
+ ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
136
+ ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
137
+ ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
138
+ ; CHECK-NEXT: add x1, x1, x11
139
+ ; CHECK-NEXT: add x0, x0, x11
144
140
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
145
141
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
146
142
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
147
143
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
148
- ; CHECK-NEXT: mov z0.d, p1 /m, z7.d
149
- ; CHECK-NEXT: mov z1.d, p2 /m, z6.d
144
+ ; CHECK-NEXT: mov z0.d, p2 /m, z7.d
145
+ ; CHECK-NEXT: mov z1.d, p1 /m, z6.d
150
146
; CHECK-NEXT: b.ne .LBB1_1
151
147
; CHECK-NEXT: // %bb.2: // %exit.block
152
148
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
@@ -218,38 +214,38 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
218
214
; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
219
215
; CHECK: // %bb.0: // %entry
220
216
; CHECK-NEXT: mov z1.d, #0 // =0x0
221
- ; CHECK-NEXT: mov w10, #100 // =0x64
217
+ ; CHECK-NEXT: mov w8, #100 // =0x64
218
+ ; CHECK-NEXT: cntd x9
219
+ ; CHECK-NEXT: whilelo p1.d, xzr, x8
220
+ ; CHECK-NEXT: rdvl x10, #2
221
+ ; CHECK-NEXT: cnth x11
222
222
; CHECK-NEXT: ptrue p0.d
223
- ; CHECK-NEXT: whilelo p1.d, xzr, x10
224
- ; CHECK-NEXT: mov x8, xzr
225
- ; CHECK-NEXT: mov x9, xzr
226
- ; CHECK-NEXT: cntd x11
227
- ; CHECK-NEXT: rdvl x12, #2
223
+ ; CHECK-NEXT: mov x12, x9
228
224
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
229
225
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
230
226
; CHECK-NEXT: .LBB2_1: // %vector.body
231
227
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
232
- ; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2, x9, lsl #2]
233
- ; CHECK-NEXT: add x13, x0, x8
234
- ; CHECK-NEXT: add x14, x1, x8
228
+ ; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
235
229
; CHECK-NEXT: mov z6.d, z1.d
236
230
; CHECK-NEXT: mov z7.d, z0.d
237
- ; CHECK-NEXT: add x9, x9, x11
238
- ; CHECK-NEXT: add x8, x8, x12
239
- ; CHECK-NEXT: cmpne p2.d, p1/z, z2.d, #0
240
- ; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
241
- ; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
242
- ; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
243
- ; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
244
- ; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
245
- ; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
231
+ ; CHECK-NEXT: add x2, x2, x11
232
+ ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
233
+ ; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
234
+ ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
235
+ ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
236
+ ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
237
+ ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0]
238
+ ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1]
239
+ ; CHECK-NEXT: add x1, x1, x10
240
+ ; CHECK-NEXT: add x0, x0, x10
246
241
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
247
242
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
248
243
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
249
244
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
250
- ; CHECK-NEXT: mov z0.d, p1/m, z7.d
251
- ; CHECK-NEXT: whilelo p1.d, x9, x10
252
- ; CHECK-NEXT: mov z1.d, p2/m, z6.d
245
+ ; CHECK-NEXT: mov z0.d, p2/m, z7.d
246
+ ; CHECK-NEXT: mov z1.d, p1/m, z6.d
247
+ ; CHECK-NEXT: whilelo p1.d, x12, x8
248
+ ; CHECK-NEXT: add x12, x12, x9
253
249
; CHECK-NEXT: b.mi .LBB2_1
254
250
; CHECK-NEXT: // %bb.2: // %exit.block
255
251
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
0 commit comments