Skip to content

Commit 5060673

Browse files
committed
Simplified logic, added GEP support, removed codegen test.
1 parent d449070 commit 5060673

8 files changed

+145
-199
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8230,7 +8230,6 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
82308230
if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
82318231
return true;
82328232
}
8233-
return false;
82348233
}
82358234

82368235
if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14521,7 +14521,7 @@ static bool shouldSinkVectorOfPtrs(Value *Ptrs, SmallVectorImpl<Use *> &Ops) {
1452114521
}
1452214522

1452314523
/// We want to sink following cases:
14524-
/// (add|sub) A, ((mul|shl) vscale, imm); (add|sub) A, vscale
14524+
/// (add|sub|gep) A, ((mul|shl) vscale, imm); (add|sub|gep) A, vscale
1452514525
static bool shouldSinkVScale(Value *Op, SmallVectorImpl<Use *> &Ops) {
1452614526
if (match(Op, m_VScale()))
1452714527
return true;
@@ -14649,29 +14649,28 @@ bool AArch64TargetLowering::shouldSinkOperands(
1464914649
}
1465014650
}
1465114651

14652+
// Sink vscales close to uses for better isel
1465214653
switch (I->getOpcode()) {
14654+
case Instruction::GetElementPtr:
14655+
case Instruction::Add:
1465314656
case Instruction::Sub:
14654-
case Instruction::Add: {
14655-
// If the subtarget wants to make use of sve inc* instructions, then sink
14656-
// vscale intrinsic (along with any shifts or multiplies) so that the
14657-
// appropriate folds can be made.
14658-
if (Subtarget->useScalarIncVL()) {
14659-
bool Sink = false;
14660-
if (shouldSinkVScale(I->getOperand(0), Ops)) {
14661-
Ops.push_back(&I->getOperandUse(0));
14662-
Sink = true;
14657+
for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {
14658+
if (shouldSinkVScale(I->getOperand(Op), Ops)) {
14659+
Ops.push_back(&I->getOperandUse(Op));
14660+
return true;
1466314661
}
14662+
}
14663+
break;
14664+
default:
14665+
break;
14666+
}
1466414667

14665-
if (shouldSinkVScale(I->getOperand(1), Ops)) {
14666-
Ops.push_back(&I->getOperandUse(1));
14667-
Sink = true;
14668-
}
14668+
if (!I->getType()->isVectorTy())
14669+
return false;
1466914670

14670-
if (Sink)
14671-
return true;
14672-
}
14673-
if (!I->getType()->isVectorTy())
14674-
return false;
14671+
switch (I->getOpcode()) {
14672+
case Instruction::Sub:
14673+
case Instruction::Add: {
1467514674
if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
1467614675
return false;
1467714676

@@ -14690,8 +14689,6 @@ bool AArch64TargetLowering::shouldSinkOperands(
1469014689
return true;
1469114690
}
1469214691
case Instruction::Or: {
14693-
if (!I->getType()->isVectorTy())
14694-
return false;
1469514692
// Pattern: Or(And(MaskValue, A), And(Not(MaskValue), B)) ->
1469614693
// bitselect(MaskValue, A, B) where Not(MaskValue) = Xor(MaskValue, -1)
1469714694
if (Subtarget->hasNEON()) {
@@ -14729,8 +14726,6 @@ bool AArch64TargetLowering::shouldSinkOperands(
1472914726
return false;
1473014727
}
1473114728
case Instruction::Mul: {
14732-
if (!I->getType()->isVectorTy())
14733-
return false;
1473414729
int NumZExts = 0, NumSExts = 0;
1473514730
for (auto &Op : I->operands()) {
1473614731
// Make sure we are not already sinking this operand

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,24 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
2020
; CHECK-NEXT: whilelo p1.d, xzr, x9
2121
; CHECK-NEXT: cntd x10
2222
; CHECK-NEXT: mov x8, xzr
23-
; CHECK-NEXT: rdvl x11, #2
24-
; CHECK-NEXT: mov x12, x10
23+
; CHECK-NEXT: mov x11, x10
2524
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
2625
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
2726
; CHECK-NEXT: .LBB0_1: // %vector.body
2827
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
2928
; CHECK-NEXT: zip2 p3.d, p1.d, p1.d
30-
; CHECK-NEXT: add x13, x0, x8
31-
; CHECK-NEXT: add x14, x1, x8
29+
; CHECK-NEXT: add x12, x0, x8
30+
; CHECK-NEXT: add x13, x1, x8
3231
; CHECK-NEXT: zip1 p2.d, p1.d, p1.d
3332
; CHECK-NEXT: mov z6.d, z1.d
3433
; CHECK-NEXT: mov z7.d, z0.d
35-
; CHECK-NEXT: whilelo p1.d, x12, x9
36-
; CHECK-NEXT: add x8, x8, x11
37-
; CHECK-NEXT: add x12, x12, x10
38-
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl]
39-
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
40-
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl]
41-
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
34+
; CHECK-NEXT: whilelo p1.d, x11, x9
35+
; CHECK-NEXT: addvl x8, x8, #2
36+
; CHECK-NEXT: add x11, x11, x10
37+
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x12, #1, mul vl]
38+
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x12]
39+
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x13, #1, mul vl]
40+
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x13]
4241
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
4342
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
4443
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
@@ -121,26 +120,25 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
121120
; CHECK-NEXT: mov x8, xzr
122121
; CHECK-NEXT: mov x9, xzr
123122
; CHECK-NEXT: and x11, x11, x12
124-
; CHECK-NEXT: rdvl x12, #2
125123
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
126124
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
127125
; CHECK-NEXT: .LBB1_1: // %vector.body
128126
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
129127
; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x9, lsl #2]
130-
; CHECK-NEXT: add x13, x0, x8
131-
; CHECK-NEXT: add x14, x1, x8
128+
; CHECK-NEXT: add x12, x0, x8
129+
; CHECK-NEXT: add x13, x1, x8
132130
; CHECK-NEXT: mov z6.d, z1.d
133131
; CHECK-NEXT: mov z7.d, z0.d
134132
; CHECK-NEXT: add x9, x9, x10
135-
; CHECK-NEXT: add x8, x8, x12
133+
; CHECK-NEXT: addvl x8, x8, #2
136134
; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
137135
; CHECK-NEXT: cmp x11, x9
138136
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
139137
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
140-
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
141-
; CHECK-NEXT: ld1d { z3.d }, p1/z, [x13]
142-
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
143-
; CHECK-NEXT: ld1d { z5.d }, p1/z, [x14]
138+
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x12, #1, mul vl]
139+
; CHECK-NEXT: ld1d { z3.d }, p1/z, [x12]
140+
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x13, #1, mul vl]
141+
; CHECK-NEXT: ld1d { z5.d }, p1/z, [x13]
144142
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
145143
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
146144
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
@@ -224,26 +222,25 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
224222
; CHECK-NEXT: mov x8, xzr
225223
; CHECK-NEXT: mov x9, xzr
226224
; CHECK-NEXT: cntd x11
227-
; CHECK-NEXT: rdvl x12, #2
228225
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
229226
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
230227
; CHECK-NEXT: .LBB2_1: // %vector.body
231228
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
232229
; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2, x9, lsl #2]
233-
; CHECK-NEXT: add x13, x0, x8
234-
; CHECK-NEXT: add x14, x1, x8
230+
; CHECK-NEXT: add x12, x0, x8
231+
; CHECK-NEXT: add x13, x1, x8
235232
; CHECK-NEXT: mov z6.d, z1.d
236233
; CHECK-NEXT: mov z7.d, z0.d
237234
; CHECK-NEXT: add x9, x9, x11
238-
; CHECK-NEXT: add x8, x8, x12
235+
; CHECK-NEXT: addvl x8, x8, #2
239236
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
240237
; CHECK-NEXT: zip2 p3.d, p1.d, p1.d
241238
; CHECK-NEXT: zip1 p2.d, p1.d, p1.d
242239
; CHECK-NEXT: whilelo p1.d, x9, x10
243-
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl]
244-
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
245-
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl]
246-
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
240+
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x12, #1, mul vl]
241+
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x12]
242+
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x13, #1, mul vl]
243+
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x13]
247244
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
248245
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
249246
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,22 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
1818
; CHECK-NEXT: ptrue p1.b
1919
; CHECK-NEXT: cntd x9
2020
; CHECK-NEXT: ptrue p0.d
21-
; CHECK-NEXT: neg x10, x9
22-
; CHECK-NEXT: mov w11, #100 // =0x64
21+
; CHECK-NEXT: neg x9, x9
22+
; CHECK-NEXT: mov w10, #100 // =0x64
2323
; CHECK-NEXT: mov x8, xzr
24-
; CHECK-NEXT: and x10, x10, x11
25-
; CHECK-NEXT: rdvl x11, #2
24+
; CHECK-NEXT: and x10, x9, x10
2625
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
2726
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
2827
; CHECK-NEXT: .LBB0_1: // %vector.body
2928
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
30-
; CHECK-NEXT: add x12, x0, x8
31-
; CHECK-NEXT: add x13, x1, x8
29+
; CHECK-NEXT: add x11, x0, x8
30+
; CHECK-NEXT: add x12, x1, x8
3231
; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
33-
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
32+
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x11, #1, mul vl]
3433
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
35-
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
36-
; CHECK-NEXT: subs x10, x10, x9
37-
; CHECK-NEXT: add x8, x8, x11
34+
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x12, #1, mul vl]
35+
; CHECK-NEXT: adds x10, x10, x9
36+
; CHECK-NEXT: addvl x8, x8, #2
3837
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
3938
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
4039
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
@@ -106,27 +105,26 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
106105
; CHECK-NEXT: mov z1.d, #0 // =0x0
107106
; CHECK-NEXT: fmov d2, #2.00000000
108107
; CHECK-NEXT: cntd x9
109-
; CHECK-NEXT: mov w11, #100 // =0x64
108+
; CHECK-NEXT: mov w10, #100 // =0x64
110109
; CHECK-NEXT: ptrue p1.b
111-
; CHECK-NEXT: neg x10, x9
110+
; CHECK-NEXT: neg x9, x9
112111
; CHECK-NEXT: mov x8, xzr
113-
; CHECK-NEXT: and x10, x10, x11
114-
; CHECK-NEXT: rdvl x11, #2
112+
; CHECK-NEXT: and x10, x9, x10
115113
; CHECK-NEXT: sel z3.d, p0, z0.d, z1.d
116114
; CHECK-NEXT: mov z1.d, p0/m, z2.d
117115
; CHECK-NEXT: ptrue p0.d
118116
; CHECK-NEXT: zip2 z0.d, z1.d, z3.d
119117
; CHECK-NEXT: zip1 z1.d, z1.d, z3.d
120118
; CHECK-NEXT: .LBB1_1: // %vector.body
121119
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
122-
; CHECK-NEXT: add x12, x0, x8
123-
; CHECK-NEXT: add x13, x1, x8
120+
; CHECK-NEXT: add x11, x0, x8
121+
; CHECK-NEXT: add x12, x1, x8
124122
; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8]
125-
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl]
123+
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x11, #1, mul vl]
126124
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8]
127-
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
128-
; CHECK-NEXT: subs x10, x10, x9
129-
; CHECK-NEXT: add x8, x8, x11
125+
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x12, #1, mul vl]
126+
; CHECK-NEXT: adds x10, x10, x9
127+
; CHECK-NEXT: addvl x8, x8, #2
130128
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
131129
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
132130
; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
@@ -193,34 +191,32 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
193191
; CHECK-NEXT: ptrue p1.b
194192
; CHECK-NEXT: cntw x9
195193
; CHECK-NEXT: ptrue p0.d
196-
; CHECK-NEXT: neg x10, x9
197-
; CHECK-NEXT: mov w11, #1000 // =0x3e8
198-
; CHECK-NEXT: rdvl x13, #2
194+
; CHECK-NEXT: neg x9, x9
195+
; CHECK-NEXT: mov w10, #1000 // =0x3e8
199196
; CHECK-NEXT: mov x8, xzr
200-
; CHECK-NEXT: and x10, x10, x11
197+
; CHECK-NEXT: and x10, x9, x10
198+
; CHECK-NEXT: addvl x11, x1, #2
201199
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
202200
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
203-
; CHECK-NEXT: rdvl x11, #4
204-
; CHECK-NEXT: add x12, x1, x13
205-
; CHECK-NEXT: add x13, x0, x13
201+
; CHECK-NEXT: addvl x12, x0, #2
206202
; CHECK-NEXT: mov z2.d, z1.d
207203
; CHECK-NEXT: mov z3.d, z0.d
208204
; CHECK-NEXT: .LBB2_1: // %vector.body
209205
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
210-
; CHECK-NEXT: add x14, x0, x8
211-
; CHECK-NEXT: add x15, x13, x8
212-
; CHECK-NEXT: add x16, x1, x8
213-
; CHECK-NEXT: add x17, x12, x8
206+
; CHECK-NEXT: add x13, x0, x8
207+
; CHECK-NEXT: add x14, x12, x8
208+
; CHECK-NEXT: add x15, x1, x8
209+
; CHECK-NEXT: add x16, x11, x8
214210
; CHECK-NEXT: ld1b { z4.b }, p1/z, [x0, x8]
215-
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x14, #1, mul vl]
216-
; CHECK-NEXT: ld1b { z6.b }, p1/z, [x13, x8]
217-
; CHECK-NEXT: ld1d { z7.d }, p0/z, [x15, #1, mul vl]
211+
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl]
212+
; CHECK-NEXT: ld1b { z6.b }, p1/z, [x12, x8]
213+
; CHECK-NEXT: ld1d { z7.d }, p0/z, [x14, #1, mul vl]
218214
; CHECK-NEXT: ld1b { z16.b }, p1/z, [x1, x8]
219-
; CHECK-NEXT: ld1d { z17.d }, p0/z, [x16, #1, mul vl]
220-
; CHECK-NEXT: ld1b { z18.b }, p1/z, [x12, x8]
221-
; CHECK-NEXT: ld1d { z19.d }, p0/z, [x17, #1, mul vl]
222-
; CHECK-NEXT: subs x10, x10, x9
223-
; CHECK-NEXT: add x8, x8, x11
215+
; CHECK-NEXT: ld1d { z17.d }, p0/z, [x15, #1, mul vl]
216+
; CHECK-NEXT: ld1b { z18.b }, p1/z, [x11, x8]
217+
; CHECK-NEXT: ld1d { z19.d }, p0/z, [x16, #1, mul vl]
218+
; CHECK-NEXT: adds x10, x10, x9
219+
; CHECK-NEXT: addvl x8, x8, #4
224220
; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z4.d, #0
225221
; CHECK-NEXT: fcmla z0.d, p0/m, z17.d, z5.d, #0
226222
; CHECK-NEXT: fcmla z2.d, p0/m, z18.d, z6.d, #0
@@ -330,7 +326,6 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
330326
; CHECK-NEXT: mov w11, #100 // =0x64
331327
; CHECK-NEXT: mov x8, xzr
332328
; CHECK-NEXT: and x10, x10, x11
333-
; CHECK-NEXT: rdvl x11, #2
334329
; CHECK-NEXT: zip2 z0.d, z2.d, z2.d
335330
; CHECK-NEXT: zip1 z1.d, z2.d, z2.d
336331
; CHECK-NEXT: .LBB3_1: // %vector.body
@@ -339,7 +334,7 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
339334
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0]
340335
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #1, mul vl]
341336
; CHECK-NEXT: add x8, x8, x9
342-
; CHECK-NEXT: add x0, x0, x11
337+
; CHECK-NEXT: addvl x0, x0, #2
343338
; CHECK-NEXT: cmp x10, x8
344339
; CHECK-NEXT: fadd z0.d, z5.d, z0.d
345340
; CHECK-NEXT: fadd z1.d, z4.d, z1.d

llvm/test/CodeGen/AArch64/sve-int-arith.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -770,19 +770,19 @@ define void @mad_in_loop(ptr %dst, ptr %src1, ptr %src2, i32 %n) {
770770
; CHECK-NEXT: b.lt .LBB70_3
771771
; CHECK-NEXT: // %bb.1: // %for.body.preheader
772772
; CHECK-NEXT: mov w9, w3
773-
; CHECK-NEXT: ptrue p1.s
773+
; CHECK-NEXT: ptrue p0.s
774774
; CHECK-NEXT: mov z0.s, #1 // =0x1
775-
; CHECK-NEXT: whilelo p0.s, xzr, x9
775+
; CHECK-NEXT: whilelo p1.s, xzr, x9
776776
; CHECK-NEXT: mov x8, xzr
777777
; CHECK-NEXT: cntw x10
778778
; CHECK-NEXT: .LBB70_2: // %vector.body
779779
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
780-
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
781-
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2, x8, lsl #2]
782-
; CHECK-NEXT: mad z1.s, p1/m, z2.s, z0.s
783-
; CHECK-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2]
780+
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1, x8, lsl #2]
781+
; CHECK-NEXT: ld1w { z2.s }, p1/z, [x2, x8, lsl #2]
782+
; CHECK-NEXT: mad z1.s, p0/m, z2.s, z0.s
783+
; CHECK-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
784784
; CHECK-NEXT: add x8, x8, x10
785-
; CHECK-NEXT: whilelo p0.s, x8, x9
785+
; CHECK-NEXT: whilelo p1.s, x8, x9
786786
; CHECK-NEXT: b.mi .LBB70_2
787787
; CHECK-NEXT: .LBB70_3: // %for.cond.cleanup
788788
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ define void @test_sink_ptrue_into_ptest(i32 %n) {
1111
; CHECK-NEXT: whilelt p0.s, wzr, w0
1212
; CHECK-NEXT: b.pl .LBB0_3
1313
; CHECK-NEXT: // %bb.1: // %for.body.preheader
14-
; CHECK-NEXT: mov w9, wzr
15-
; CHECK-NEXT: cntw x8
14+
; CHECK-NEXT: mov w8, wzr
15+
; CHECK-NEXT: cntw x9
1616
; CHECK-NEXT: .LBB0_2: // %for.body
1717
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
18-
; CHECK-NEXT: whilelt p0.s, w9, w0
19-
; CHECK-NEXT: add w9, w9, w8
18+
; CHECK-NEXT: whilelt p0.s, w8, w0
19+
; CHECK-NEXT: add w8, w8, w9
2020
; CHECK-NEXT: b.mi .LBB0_2
2121
; CHECK-NEXT: .LBB0_3: // %exit
2222
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)