Skip to content

Commit 46ecf8e

Browse files
committed
rebase, fix tests, improve comment
1 parent 5060673 commit 46ecf8e

File tree

3 files changed

+55
-47
lines changed

3 files changed

+55
-47
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14649,7 +14649,7 @@ bool AArch64TargetLowering::shouldSinkOperands(
1464914649
}
1465014650
}
1465114651

14652-
// Sink vscales close to uses for better isel
14652+
// Sink vscales closer to uses for better isel
1465314653
switch (I->getOpcode()) {
1465414654
case Instruction::GetElementPtr:
1465514655
case Instruction::Add:

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,25 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
2020
; CHECK-NEXT: whilelo p1.d, xzr, x9
2121
; CHECK-NEXT: cntd x10
2222
; CHECK-NEXT: mov x8, xzr
23-
; CHECK-NEXT: mov x11, x10
23+
; CHECK-NEXT: rdvl x11, #2
24+
; CHECK-NEXT: mov x12, x10
2425
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
2526
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
2627
; CHECK-NEXT: .LBB0_1: // %vector.body
2728
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
2829
; CHECK-NEXT: zip2 p3.d, p1.d, p1.d
29-
; CHECK-NEXT: add x12, x0, x8
30-
; CHECK-NEXT: add x13, x1, x8
30+
; CHECK-NEXT: add x13, x0, x8
31+
; CHECK-NEXT: add x14, x1, x8
3132
; CHECK-NEXT: zip1 p2.d, p1.d, p1.d
3233
; CHECK-NEXT: mov z6.d, z1.d
3334
; CHECK-NEXT: mov z7.d, z0.d
34-
; CHECK-NEXT: whilelo p1.d, x11, x9
35-
; CHECK-NEXT: addvl x8, x8, #2
36-
; CHECK-NEXT: add x11, x11, x10
37-
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x12, #1, mul vl]
38-
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x12]
39-
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x13, #1, mul vl]
40-
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x13]
35+
; CHECK-NEXT: whilelo p1.d, x12, x9
36+
; CHECK-NEXT: add x8, x8, x11
37+
; CHECK-NEXT: add x12, x12, x10
38+
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl]
39+
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
40+
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl]
41+
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
4142
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
4243
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
4344
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
@@ -120,25 +121,26 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
120121
; CHECK-NEXT: mov x8, xzr
121122
; CHECK-NEXT: mov x9, xzr
122123
; CHECK-NEXT: and x11, x11, x12
124+
; CHECK-NEXT: rdvl x12, #2
123125
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
124126
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
125127
; CHECK-NEXT: .LBB1_1: // %vector.body
126128
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
127129
; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x9, lsl #2]
128-
; CHECK-NEXT: add x12, x0, x8
129-
; CHECK-NEXT: add x13, x1, x8
130+
; CHECK-NEXT: add x13, x0, x8
131+
; CHECK-NEXT: add x14, x1, x8
130132
; CHECK-NEXT: mov z6.d, z1.d
131133
; CHECK-NEXT: mov z7.d, z0.d
132134
; CHECK-NEXT: add x9, x9, x10
133-
; CHECK-NEXT: addvl x8, x8, #2
135+
; CHECK-NEXT: add x8, x8, x12
134136
; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
135137
; CHECK-NEXT: cmp x11, x9
136138
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
137139
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
138-
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x12, #1, mul vl]
139-
; CHECK-NEXT: ld1d { z3.d }, p1/z, [x12]
140-
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x13, #1, mul vl]
141-
; CHECK-NEXT: ld1d { z5.d }, p1/z, [x13]
140+
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
141+
; CHECK-NEXT: ld1d { z3.d }, p1/z, [x13]
142+
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
143+
; CHECK-NEXT: ld1d { z5.d }, p1/z, [x14]
142144
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
143145
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
144146
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
@@ -222,25 +224,26 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
222224
; CHECK-NEXT: mov x8, xzr
223225
; CHECK-NEXT: mov x9, xzr
224226
; CHECK-NEXT: cntd x11
227+
; CHECK-NEXT: rdvl x12, #2
225228
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
226229
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
227230
; CHECK-NEXT: .LBB2_1: // %vector.body
228231
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
229232
; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2, x9, lsl #2]
230-
; CHECK-NEXT: add x12, x0, x8
231-
; CHECK-NEXT: add x13, x1, x8
233+
; CHECK-NEXT: add x13, x0, x8
234+
; CHECK-NEXT: add x14, x1, x8
232235
; CHECK-NEXT: mov z6.d, z1.d
233236
; CHECK-NEXT: mov z7.d, z0.d
234237
; CHECK-NEXT: add x9, x9, x11
235-
; CHECK-NEXT: addvl x8, x8, #2
238+
; CHECK-NEXT: add x8, x8, x12
236239
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
237240
; CHECK-NEXT: zip2 p3.d, p1.d, p1.d
238241
; CHECK-NEXT: zip1 p2.d, p1.d, p1.d
239242
; CHECK-NEXT: whilelo p1.d, x9, x10
240-
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x12, #1, mul vl]
241-
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x12]
242-
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x13, #1, mul vl]
243-
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x13]
243+
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl]
244+
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
245+
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl]
246+
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
244247
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
245248
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
246249
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,19 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
2222
; CHECK-NEXT: mov w10, #100 // =0x64
2323
; CHECK-NEXT: mov x8, xzr
2424
; CHECK-NEXT: and x10, x9, x10
25+
; CHECK-NEXT: rdvl x11, #2
2526
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
2627
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
2728
; CHECK-NEXT: .LBB0_1: // %vector.body
2829
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
29-
; CHECK-NEXT: add x11, x0, x8
30-
; CHECK-NEXT: add x12, x1, x8
30+
; CHECK-NEXT: add x12, x0, x8
31+
; CHECK-NEXT: add x13, x1, x8
3132
; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
32-
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x11, #1, mul vl]
33+
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
3334
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
34-
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x12, #1, mul vl]
35+
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
3536
; CHECK-NEXT: adds x10, x10, x9
36-
; CHECK-NEXT: addvl x8, x8, #2
37+
; CHECK-NEXT: add x8, x8, x11
3738
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
3839
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
3940
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
@@ -110,21 +111,22 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
110111
; CHECK-NEXT: neg x9, x9
111112
; CHECK-NEXT: mov x8, xzr
112113
; CHECK-NEXT: and x10, x9, x10
114+
; CHECK-NEXT: rdvl x11, #2
113115
; CHECK-NEXT: sel z3.d, p0, z0.d, z1.d
114116
; CHECK-NEXT: mov z1.d, p0/m, z2.d
115117
; CHECK-NEXT: ptrue p0.d
116118
; CHECK-NEXT: zip2 z0.d, z1.d, z3.d
117119
; CHECK-NEXT: zip1 z1.d, z1.d, z3.d
118120
; CHECK-NEXT: .LBB1_1: // %vector.body
119121
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
120-
; CHECK-NEXT: add x11, x0, x8
121-
; CHECK-NEXT: add x12, x1, x8
122+
; CHECK-NEXT: add x12, x0, x8
123+
; CHECK-NEXT: add x13, x1, x8
122124
; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
123-
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x11, #1, mul vl]
125+
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
124126
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
125-
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x12, #1, mul vl]
127+
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
126128
; CHECK-NEXT: adds x10, x10, x9
127-
; CHECK-NEXT: addvl x8, x8, #2
129+
; CHECK-NEXT: add x8, x8, x11
128130
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
129131
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
130132
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
@@ -193,30 +195,32 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
193195
; CHECK-NEXT: ptrue p0.d
194196
; CHECK-NEXT: neg x9, x9
195197
; CHECK-NEXT: mov w10, #1000 // =0x3e8
198+
; CHECK-NEXT: rdvl x12, #2
196199
; CHECK-NEXT: mov x8, xzr
197200
; CHECK-NEXT: and x10, x9, x10
198-
; CHECK-NEXT: addvl x11, x1, #2
199201
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
200202
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
201-
; CHECK-NEXT: addvl x12, x0, #2
203+
; CHECK-NEXT: add x11, x1, x12
204+
; CHECK-NEXT: add x12, x0, x12
205+
; CHECK-NEXT: rdvl x13, #4
202206
; CHECK-NEXT: mov z2.d, z1.d
203207
; CHECK-NEXT: mov z3.d, z0.d
204208
; CHECK-NEXT: .LBB2_1: // %vector.body
205209
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
206-
; CHECK-NEXT: add x13, x0, x8
207-
; CHECK-NEXT: add x14, x12, x8
208-
; CHECK-NEXT: add x15, x1, x8
209-
; CHECK-NEXT: add x16, x11, x8
210+
; CHECK-NEXT: add x14, x0, x8
211+
; CHECK-NEXT: add x15, x12, x8
212+
; CHECK-NEXT: add x16, x1, x8
213+
; CHECK-NEXT: add x17, x11, x8
210214
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x0, x8]
211-
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
215+
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x14, #1, mul vl]
212216
; CHECK-NEXT: ld1b { z6.b }, p1/z, [x12, x8]
213-
; CHECK-NEXT: ld1d { z7.d }, p0/z, [x14, #1, mul vl]
217+
; CHECK-NEXT: ld1d { z7.d }, p0/z, [x15, #1, mul vl]
214218
; CHECK-NEXT: ld1b { z16.b }, p1/z, [x1, x8]
215-
; CHECK-NEXT: ld1d { z17.d }, p0/z, [x15, #1, mul vl]
219+
; CHECK-NEXT: ld1d { z17.d }, p0/z, [x16, #1, mul vl]
216220
; CHECK-NEXT: ld1b { z18.b }, p1/z, [x11, x8]
217-
; CHECK-NEXT: ld1d { z19.d }, p0/z, [x16, #1, mul vl]
221+
; CHECK-NEXT: ld1d { z19.d }, p0/z, [x17, #1, mul vl]
218222
; CHECK-NEXT: adds x10, x10, x9
219-
; CHECK-NEXT: addvl x8, x8, #4
223+
; CHECK-NEXT: add x8, x8, x13
220224
; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z4.d, #0
221225
; CHECK-NEXT: fcmla z0.d, p0/m, z17.d, z5.d, #0
222226
; CHECK-NEXT: fcmla z2.d, p0/m, z18.d, z6.d, #0
@@ -326,6 +330,7 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
326330
; CHECK-NEXT: mov w11, #100 // =0x64
327331
; CHECK-NEXT: mov x8, xzr
328332
; CHECK-NEXT: and x10, x10, x11
333+
; CHECK-NEXT: rdvl x11, #2
329334
; CHECK-NEXT: zip2 z0.d, z2.d, z2.d
330335
; CHECK-NEXT: zip1 z1.d, z2.d, z2.d
331336
; CHECK-NEXT: .LBB3_1: // %vector.body
@@ -334,7 +339,7 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
334339
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0]
335340
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #1, mul vl]
336341
; CHECK-NEXT: add x8, x8, x9
337-
; CHECK-NEXT: addvl x0, x0, #2
342+
; CHECK-NEXT: add x0, x0, x11
338343
; CHECK-NEXT: cmp x10, x8
339344
; CHECK-NEXT: fadd z0.d, z5.d, z0.d
340345
; CHECK-NEXT: fadd z1.d, z4.d, z1.d

0 commit comments

Comments
 (0)