@@ -22,18 +22,19 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
22
22
; CHECK-NEXT: mov w10, #100 // =0x64
23
23
; CHECK-NEXT: mov x8, xzr
24
24
; CHECK-NEXT: and x10, x9, x10
25
+ ; CHECK-NEXT: rdvl x11, #2
25
26
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
26
27
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
27
28
; CHECK-NEXT: .LBB0_1: // %vector.body
28
29
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
29
- ; CHECK-NEXT: add x11 , x0, x8
30
- ; CHECK-NEXT: add x12 , x1, x8
30
+ ; CHECK-NEXT: add x12 , x0, x8
31
+ ; CHECK-NEXT: add x13 , x1, x8
31
32
; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
32
- ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x11 , #1, mul vl]
33
+ ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12 , #1, mul vl]
33
34
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
34
- ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x12 , #1, mul vl]
35
+ ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13 , #1, mul vl]
35
36
; CHECK-NEXT: adds x10, x10, x9
36
- ; CHECK-NEXT: addvl x8, x8, #2
37
+ ; CHECK-NEXT: add x8, x8, x11
37
38
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
38
39
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
39
40
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
@@ -110,21 +111,22 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
110
111
; CHECK-NEXT: neg x9, x9
111
112
; CHECK-NEXT: mov x8, xzr
112
113
; CHECK-NEXT: and x10, x9, x10
114
+ ; CHECK-NEXT: rdvl x11, #2
113
115
; CHECK-NEXT: sel z3.d, p0, z0.d, z1.d
114
116
; CHECK-NEXT: mov z1.d, p0/m, z2.d
115
117
; CHECK-NEXT: ptrue p0.d
116
118
; CHECK-NEXT: zip2 z0.d, z1.d, z3.d
117
119
; CHECK-NEXT: zip1 z1.d, z1.d, z3.d
118
120
; CHECK-NEXT: .LBB1_1: // %vector.body
119
121
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
120
- ; CHECK-NEXT: add x11 , x0, x8
121
- ; CHECK-NEXT: add x12 , x1, x8
122
+ ; CHECK-NEXT: add x12 , x0, x8
123
+ ; CHECK-NEXT: add x13 , x1, x8
122
124
; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
123
- ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x11 , #1, mul vl]
125
+ ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12 , #1, mul vl]
124
126
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
125
- ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x12 , #1, mul vl]
127
+ ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13 , #1, mul vl]
126
128
; CHECK-NEXT: adds x10, x10, x9
127
- ; CHECK-NEXT: addvl x8, x8, #2
129
+ ; CHECK-NEXT: add x8, x8, x11
128
130
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
129
131
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
130
132
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
@@ -193,30 +195,32 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
193
195
; CHECK-NEXT: ptrue p0.d
194
196
; CHECK-NEXT: neg x9, x9
195
197
; CHECK-NEXT: mov w10, #1000 // =0x3e8
198
+ ; CHECK-NEXT: rdvl x12, #2
196
199
; CHECK-NEXT: mov x8, xzr
197
200
; CHECK-NEXT: and x10, x9, x10
198
- ; CHECK-NEXT: addvl x11, x1, #2
199
201
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
200
202
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
201
- ; CHECK-NEXT: addvl x12, x0, #2
203
+ ; CHECK-NEXT: add x11, x1, x12
204
+ ; CHECK-NEXT: add x12, x0, x12
205
+ ; CHECK-NEXT: rdvl x13, #4
202
206
; CHECK-NEXT: mov z2.d, z1.d
203
207
; CHECK-NEXT: mov z3.d, z0.d
204
208
; CHECK-NEXT: .LBB2_1: // %vector.body
205
209
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
206
- ; CHECK-NEXT: add x13 , x0, x8
207
- ; CHECK-NEXT: add x14 , x12, x8
208
- ; CHECK-NEXT: add x15 , x1, x8
209
- ; CHECK-NEXT: add x16 , x11, x8
210
+ ; CHECK-NEXT: add x14 , x0, x8
211
+ ; CHECK-NEXT: add x15 , x12, x8
212
+ ; CHECK-NEXT: add x16 , x1, x8
213
+ ; CHECK-NEXT: add x17 , x11, x8
210
214
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x0, x8]
211
- ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13 , #1, mul vl]
215
+ ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x14 , #1, mul vl]
212
216
; CHECK-NEXT: ld1b { z6.b }, p1/z, [x12, x8]
213
- ; CHECK-NEXT: ld1d { z7.d }, p0/z, [x14 , #1, mul vl]
217
+ ; CHECK-NEXT: ld1d { z7.d }, p0/z, [x15 , #1, mul vl]
214
218
; CHECK-NEXT: ld1b { z16.b }, p1/z, [x1, x8]
215
- ; CHECK-NEXT: ld1d { z17.d }, p0/z, [x15 , #1, mul vl]
219
+ ; CHECK-NEXT: ld1d { z17.d }, p0/z, [x16 , #1, mul vl]
216
220
; CHECK-NEXT: ld1b { z18.b }, p1/z, [x11, x8]
217
- ; CHECK-NEXT: ld1d { z19.d }, p0/z, [x16 , #1, mul vl]
221
+ ; CHECK-NEXT: ld1d { z19.d }, p0/z, [x17 , #1, mul vl]
218
222
; CHECK-NEXT: adds x10, x10, x9
219
- ; CHECK-NEXT: addvl x8, x8, #4
223
+ ; CHECK-NEXT: add x8, x8, x13
220
224
; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z4.d, #0
221
225
; CHECK-NEXT: fcmla z0.d, p0/m, z17.d, z5.d, #0
222
226
; CHECK-NEXT: fcmla z2.d, p0/m, z18.d, z6.d, #0
@@ -326,6 +330,7 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
326
330
; CHECK-NEXT: mov w11, #100 // =0x64
327
331
; CHECK-NEXT: mov x8, xzr
328
332
; CHECK-NEXT: and x10, x10, x11
333
+ ; CHECK-NEXT: rdvl x11, #2
329
334
; CHECK-NEXT: zip2 z0.d, z2.d, z2.d
330
335
; CHECK-NEXT: zip1 z1.d, z2.d, z2.d
331
336
; CHECK-NEXT: .LBB3_1: // %vector.body
@@ -334,7 +339,7 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
334
339
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0]
335
340
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #1, mul vl]
336
341
; CHECK-NEXT: add x8, x8, x9
337
- ; CHECK-NEXT: addvl x0, x0, #2
342
+ ; CHECK-NEXT: add x0, x0, x11
338
343
; CHECK-NEXT: cmp x10, x8
339
344
; CHECK-NEXT: fadd z0.d, z5.d, z0.d
340
345
; CHECK-NEXT: fadd z1.d, z4.d, z1.d
0 commit comments