Skip to content

Commit 9d9a11c

Browse files
committed
[ARM] Check for LSTP side-effects.
If the LSTP instruction is inserted with an element count low enough to immediately predicate some lanes as false, this can have some unintended effects on any proceeding MVE instructions in the preheader. Differential Revision: https://reviews.llvm.org/D88209
1 parent f5314d1 commit 9d9a11c

File tree

5 files changed

+108
-34
lines changed

5 files changed

+108
-34
lines changed

llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,28 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
568568
}
569569
}
570570

571+
// Could inserting the [W|D]LSTP cause some unintended affects? In a perfect
572+
// world the [w|d]lstp instruction would be last instruction in the preheader
573+
// and so it would only affect instructions within the loop body. But due to
574+
// scheduling, and/or the logic in this pass (above), the insertion point can
575+
// be moved earlier. So if the Loop Start isn't the last instruction in the
576+
// preheader, and if the initial element count is smaller than the vector
577+
// width, the Loop Start instruction will immediately generate one or more
578+
// false lane mask which can, incorrectly, affect the proceeding MVE
579+
// instructions in the preheader.
580+
auto cannotInsertWDLSTPBetween = [](MachineInstr *Begin,
581+
MachineInstr *End) {
582+
auto I = MachineBasicBlock::iterator(Begin);
583+
auto E = MachineBasicBlock::iterator(End);
584+
for (; I != E; ++I)
585+
if (shouldInspect(*I))
586+
return true;
587+
return false;
588+
};
589+
590+
if (cannotInsertWDLSTPBetween(StartInsertPt, &InsertBB->back()))
591+
return false;
592+
571593
// Especially in the case of while loops, InsertBB may not be the
572594
// preheader, so we need to check that the register isn't redefined
573595
// before entering the loop.

llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,22 +108,33 @@ body: |
108108
; CHECK: bb.1.vector.ph:
109109
; CHECK: successors: %bb.2(0x80000000)
110110
; CHECK: liveins: $r0, $r1, $r2
111+
; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
111112
; CHECK: renamable $q2 = MVE_VMOVimmi32 1, 0, $noreg, undef renamable $q2
113+
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
112114
; CHECK: renamable $q3 = MVE_VMOVimmi32 4, 0, $noreg, undef renamable $q3
113-
; CHECK: $lr = MVE_DLSTP_32 renamable $r2
115+
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
116+
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
117+
; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
118+
; CHECK: dead $lr = t2DLS renamable $r3
119+
; CHECK: $r4 = tMOVr killed $r3, 14 /* CC::al */, $noreg
114120
; CHECK: renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg
115121
; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool)
116-
; CHECK: renamable $r3, dead $cpsr = tLSRri killed renamable $r2, 1, 14 /* CC::al */, $noreg
122+
; CHECK: renamable $r3, dead $cpsr = tLSRri renamable $r2, 1, 14 /* CC::al */, $noreg
117123
; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
118124
; CHECK: bb.2.vector.body:
119125
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
120-
; CHECK: liveins: $lr, $q0, $q1, $q2, $q3, $r0, $r1
121-
; CHECK: MVE_VPTv4u32 2, renamable $q1, renamable $q0, 8, implicit-def $vpr
126+
; CHECK: liveins: $q0, $q1, $q2, $q3, $r0, $r1, $r2, $r4
127+
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
128+
; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg
129+
; CHECK: renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
130+
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
131+
; CHECK: MVE_VPST 1, implicit $vpr
132+
; CHECK: renamable $vpr = MVE_VCMPu32 renamable $q1, renamable $q0, 8, 1, killed renamable $vpr
122133
; CHECK: renamable $vpr = MVE_VCMPu32 renamable $q0, renamable $q2, 2, 1, killed renamable $vpr
123134
; CHECK: renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4)
124135
; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4)
125136
; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q3, 0, $noreg, undef renamable $q0
126-
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
137+
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
127138
; CHECK: bb.3.for.cond.cleanup:
128139
; CHECK: $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d8, def $d9
129140
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc

llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -153,17 +153,25 @@ body: |
153153
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
154154
; CHECK: dead $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg
155155
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7
156+
; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
157+
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
158+
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
159+
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
160+
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
156161
; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool)
157-
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2
162+
; CHECK: $lr = t2DLS killed renamable $lr
158163
; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
159164
; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1
160165
; CHECK: bb.2.vector.body:
161166
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
162-
; CHECK: liveins: $lr, $q1, $r0, $r1
163-
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv12, align 4)
164-
; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1315, align 4)
165-
; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 0, killed $noreg
166-
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
167+
; CHECK: liveins: $lr, $q1, $r0, $r1, $r2
168+
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
169+
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
170+
; CHECK: MVE_VPST 2, implicit $vpr
171+
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv12, align 4)
172+
; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1315, align 4)
173+
; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr
174+
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
167175
; CHECK: bb.3.middle.block:
168176
; CHECK: liveins: $q1
169177
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg
@@ -277,18 +285,27 @@ body: |
277285
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
278286
; CHECK: dead $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg
279287
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7
288+
; CHECK: renamable $r3, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
289+
; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
290+
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
291+
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
292+
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
293+
; CHECK: renamable $lr = t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
280294
; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool)
281295
; CHECK: renamable $r2, dead $cpsr = tLSRri killed renamable $r2, 2, 14 /* CC::al */, $noreg
282-
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2
296+
; CHECK: $lr = t2DLS killed renamable $lr
283297
; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
284298
; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1
285299
; CHECK: bb.2.vector.body:
286300
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
287-
; CHECK: liveins: $lr, $q1, $r0, $r1
288-
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4)
289-
; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4)
290-
; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 0, killed $noreg
291-
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
301+
; CHECK: liveins: $lr, $q1, $r0, $r1, $r2
302+
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
303+
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
304+
; CHECK: MVE_VPST 2, implicit $vpr
305+
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4)
306+
; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4)
307+
; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr
308+
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
292309
; CHECK: bb.3.middle.block:
293310
; CHECK: liveins: $q1
294311
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg

llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -152,35 +152,47 @@ body: |
152152
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
153153
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
154154
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8
155+
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
156+
; CHECK: tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
157+
; CHECK: t2IT 10, 8, implicit-def $itstate
158+
; CHECK: renamable $r3 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
159+
; CHECK: renamable $r12 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
160+
; CHECK: renamable $r3, dead $cpsr = tSUBrr renamable $r1, killed renamable $r3, 14 /* CC::al */, $noreg
155161
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
162+
; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg
163+
; CHECK: dead renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
156164
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
157165
; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
158166
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
167+
; CHECK: $r4 = tMOVr $lr, 14 /* CC::al */, $noreg
159168
; CHECK: bb.1.do.body.i:
160169
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
161-
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r12
170+
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r4, $r12
162171
; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
163172
; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
164173
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
165174
; CHECK: bb.2.arm_mean_f32_mve.exit:
166175
; CHECK: successors: %bb.3(0x80000000)
167-
; CHECK: liveins: $q0, $r0, $r1, $r2
176+
; CHECK: liveins: $q0, $r0, $r1, $r2, $r4
168177
; CHECK: $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg
169-
; CHECK: $lr = MVE_DLSTP_32 $r1
178+
; CHECK: $lr = t2DLS killed $r4
170179
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0
171180
; CHECK: renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg
172181
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg
173182
; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg
174183
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
175184
; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
176-
; CHECK: dead $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
185+
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
177186
; CHECK: bb.3.do.body:
178187
; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000)
179-
; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2
180-
; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.01, align 4)
181-
; CHECK: renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2
182-
; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg
183-
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.3
188+
; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3
189+
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
190+
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
191+
; CHECK: MVE_VPST 2, implicit $vpr
192+
; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.01, align 4)
193+
; CHECK: renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 1, renamable $vpr, undef renamable $q2
194+
; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 1, killed renamable $vpr
195+
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3
184196
; CHECK: bb.4.do.end:
185197
; CHECK: liveins: $q0, $r1, $r2
186198
; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg

llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -231,12 +231,17 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur
231231
; CHECK-NEXT: cmp r2, #0
232232
; CHECK-NEXT: beq.w .LBB3_3
233233
; CHECK-NEXT: @ %bb.1: @ %vector.ph
234+
; CHECK-NEXT: adds r3, r2, #3
234235
; CHECK-NEXT: adr r7, .LCPI3_5
236+
; CHECK-NEXT: bic r3, r3, #3
235237
; CHECK-NEXT: vmov.i32 q0, #0x8000
238+
; CHECK-NEXT: sub.w r12, r3, #4
239+
; CHECK-NEXT: movs r3, #1
236240
; CHECK-NEXT: adr r6, .LCPI3_4
237241
; CHECK-NEXT: adr r5, .LCPI3_3
242+
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
238243
; CHECK-NEXT: adr r4, .LCPI3_2
239-
; CHECK-NEXT: dlstp.32 lr, r2
244+
; CHECK-NEXT: dls lr, lr
240245
; CHECK-NEXT: vstrw.32 q0, [sp, #160] @ 16-byte Spill
241246
; CHECK-NEXT: vldrw.u32 q0, [r7]
242247
; CHECK-NEXT: adr.w r8, .LCPI3_1
@@ -272,14 +277,19 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur
272277
; CHECK-NEXT: .LBB3_2: @ %vector.body
273278
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
274279
; CHECK-NEXT: vldrw.u32 q0, [sp, #192] @ 16-byte Reload
275-
; CHECK-NEXT: vldrb.u32 q4, [r0, q0]
280+
; CHECK-NEXT: vctp.32 r2
281+
; CHECK-NEXT: vpst
282+
; CHECK-NEXT: vldrbt.u32 q4, [r0, q0]
276283
; CHECK-NEXT: vldrw.u32 q0, [sp, #176] @ 16-byte Reload
277-
; CHECK-NEXT: vldrb.u32 q7, [r0, q0]
284+
; CHECK-NEXT: vpst
285+
; CHECK-NEXT: vldrbt.u32 q7, [r0, q0]
278286
; CHECK-NEXT: vldrw.u32 q0, [sp, #144] @ 16-byte Reload
279287
; CHECK-NEXT: vldrw.u32 q5, [sp, #112] @ 16-byte Reload
288+
; CHECK-NEXT: subs r2, #4
280289
; CHECK-NEXT: vmul.i32 q6, q7, q0
281290
; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload
282-
; CHECK-NEXT: vldrb.u32 q1, [r0, q5]
291+
; CHECK-NEXT: vpst
292+
; CHECK-NEXT: vldrbt.u32 q1, [r0, q5]
283293
; CHECK-NEXT: vldrw.u32 q2, [sp, #80] @ 16-byte Reload
284294
; CHECK-NEXT: vmul.i32 q3, q4, q0
285295
; CHECK-NEXT: vldrw.u32 q0, [sp, #96] @ 16-byte Reload
@@ -310,12 +320,14 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur
310320
; CHECK-NEXT: vadd.i32 q1, q1, q0
311321
; CHECK-NEXT: vldrw.u32 q0, [sp, #192] @ 16-byte Reload
312322
; CHECK-NEXT: vshr.u32 q1, q1, #16
313-
; CHECK-NEXT: vstrb.32 q1, [r1, q0]
323+
; CHECK-NEXT: vpst
324+
; CHECK-NEXT: vstrbt.32 q1, [r1, q0]
314325
; CHECK-NEXT: vldrw.u32 q0, [sp, #176] @ 16-byte Reload
315-
; CHECK-NEXT: vstrb.32 q2, [r1, q0]
316-
; CHECK-NEXT: vstrb.32 q6, [r1, q5]
326+
; CHECK-NEXT: vpstt
327+
; CHECK-NEXT: vstrbt.32 q2, [r1, q0]
328+
; CHECK-NEXT: vstrbt.32 q6, [r1, q5]
317329
; CHECK-NEXT: adds r1, #12
318-
; CHECK-NEXT: letp lr, .LBB3_2
330+
; CHECK-NEXT: le lr, .LBB3_2
319331
; CHECK-NEXT: .LBB3_3: @ %for.cond.cleanup
320332
; CHECK-NEXT: add sp, #216
321333
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}

0 commit comments

Comments
 (0)