Skip to content

Commit 3504acc

Browse files
committed
[ARM] Don't mark vctp as having sideeffects
As far as I can tell, it should not be necessary for VCTP to be unpredictable in tail predicated loops. Either it has a a valid loop counter as a operand which will naturally keep it in the right loop, or it doesn't and it won't be converted to a tail predicated loop. Not marking it as having side effects allows it to be scheduled more cleanly for cases where it is not expected to become a tail predicate loop. Differential Revision: https://reviews.llvm.org/D83907
1 parent 2e74b6d commit 3504acc

File tree

3 files changed

+20
-21
lines changed

3 files changed

+20
-21
lines changed

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5605,7 +5605,6 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>;
56055605
def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
56065606
def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
56075607

5608-
let hasSideEffects = 1 in
56095608
class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
56105609
: MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
56115610
"$Rn", vpred_n, "", pattern> {

llvm/test/CodeGen/Thumb2/mve-float16regloops.ll

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -796,23 +796,23 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
796796
; CHECK-NEXT: .pad #16
797797
; CHECK-NEXT: sub sp, #16
798798
; CHECK-NEXT: ldrh r5, [r0]
799-
; CHECK-NEXT: ldr.w r12, [r0, #4]
799+
; CHECK-NEXT: ldr.w r9, [r0, #4]
800800
; CHECK-NEXT: subs r6, r5, #1
801801
; CHECK-NEXT: cmp r6, #3
802802
; CHECK-NEXT: bhi .LBB15_6
803803
; CHECK-NEXT: @ %bb.1: @ %if.then
804804
; CHECK-NEXT: ldr r7, [r0, #8]
805-
; CHECK-NEXT: add.w r4, r12, r6, lsl #1
805+
; CHECK-NEXT: add.w r4, r9, r6, lsl #1
806806
; CHECK-NEXT: lsr.w lr, r3, #2
807807
; CHECK-NEXT: ldrh.w r8, [r7, #6]
808-
; CHECK-NEXT: ldrh.w r9, [r7, #4]
808+
; CHECK-NEXT: ldrh.w r12, [r7, #4]
809809
; CHECK-NEXT: ldrh r6, [r7, #2]
810810
; CHECK-NEXT: ldrh r7, [r7]
811811
; CHECK-NEXT: wls lr, lr, .LBB15_5
812812
; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph
813813
; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill
814814
; CHECK-NEXT: bic r5, r3, #3
815-
; CHECK-NEXT: add.w r10, r12, #2
815+
; CHECK-NEXT: add.w r10, r9, #2
816816
; CHECK-NEXT: str r5, [sp] @ 4-byte Spill
817817
; CHECK-NEXT: add.w r5, r2, r5, lsl #1
818818
; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
@@ -828,7 +828,7 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
828828
; CHECK-NEXT: vmul.f16 q0, q0, r7
829829
; CHECK-NEXT: vfma.f16 q0, q1, r6
830830
; CHECK-NEXT: vldrw.u32 q1, [r5]
831-
; CHECK-NEXT: vfma.f16 q0, q1, r9
831+
; CHECK-NEXT: vfma.f16 q0, q1, r12
832832
; CHECK-NEXT: vldrw.u32 q1, [r10, #4]
833833
; CHECK-NEXT: add.w r10, r10, #8
834834
; CHECK-NEXT: vfma.f16 q0, q1, r8
@@ -838,7 +838,7 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
838838
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
839839
; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
840840
; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
841-
; CHECK-NEXT: add.w r12, r12, r2, lsl #1
841+
; CHECK-NEXT: add.w r9, r9, r2, lsl #1
842842
; CHECK-NEXT: add.w r1, r1, r2, lsl #1
843843
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
844844
; CHECK-NEXT: .LBB15_5: @ %while.end
@@ -847,35 +847,35 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
847847
; CHECK-NEXT: vctp.16 lr
848848
; CHECK-NEXT: vpst
849849
; CHECK-NEXT: vstrht.16 q0, [r4]
850-
; CHECK-NEXT: vldrw.u32 q0, [r12]
851-
; CHECK-NEXT: add.w r1, r12, #2
850+
; CHECK-NEXT: vldrw.u32 q0, [r9]
851+
; CHECK-NEXT: add.w r1, r9, #2
852852
; CHECK-NEXT: vldrw.u32 q1, [r1]
853-
; CHECK-NEXT: add.w r1, r12, #6
853+
; CHECK-NEXT: add.w r1, r9, #6
854854
; CHECK-NEXT: vmul.f16 q0, q0, r7
855855
; CHECK-NEXT: vfma.f16 q0, q1, r6
856-
; CHECK-NEXT: vldrw.u32 q1, [r12, #4]
857-
; CHECK-NEXT: vfma.f16 q0, q1, r9
856+
; CHECK-NEXT: vldrw.u32 q1, [r9, #4]
857+
; CHECK-NEXT: vfma.f16 q0, q1, r12
858858
; CHECK-NEXT: vldrw.u32 q1, [r1]
859859
; CHECK-NEXT: vfma.f16 q0, q1, r8
860860
; CHECK-NEXT: vpst
861861
; CHECK-NEXT: vstrht.16 q0, [r2]
862-
; CHECK-NEXT: ldr.w r12, [r0, #4]
862+
; CHECK-NEXT: ldr.w r9, [r0, #4]
863863
; CHECK-NEXT: .LBB15_6: @ %if.end
864-
; CHECK-NEXT: add.w r0, r12, r3, lsl #1
864+
; CHECK-NEXT: add.w r0, r9, r3, lsl #1
865865
; CHECK-NEXT: lsr.w lr, r5, #2
866866
; CHECK-NEXT: wls lr, lr, .LBB15_10
867867
; CHECK-NEXT: @ %bb.7: @ %while.body51.preheader
868868
; CHECK-NEXT: bic r2, r5, #3
869869
; CHECK-NEXT: adds r1, r2, r3
870-
; CHECK-NEXT: mov r3, r12
871-
; CHECK-NEXT: add.w r1, r12, r1, lsl #1
870+
; CHECK-NEXT: mov r3, r9
871+
; CHECK-NEXT: add.w r1, r9, r1, lsl #1
872872
; CHECK-NEXT: .LBB15_8: @ %while.body51
873873
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
874874
; CHECK-NEXT: vldrw.u32 q0, [r0], #8
875875
; CHECK-NEXT: vstrb.8 q0, [r3], #8
876876
; CHECK-NEXT: le lr, .LBB15_8
877877
; CHECK-NEXT: @ %bb.9: @ %while.end55.loopexit
878-
; CHECK-NEXT: add.w r12, r12, r2, lsl #1
878+
; CHECK-NEXT: add.w r9, r9, r2, lsl #1
879879
; CHECK-NEXT: mov r0, r1
880880
; CHECK-NEXT: .LBB15_10: @ %while.end55
881881
; CHECK-NEXT: ands r1, r5, #3
@@ -884,7 +884,7 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
884884
; CHECK-NEXT: vldrw.u32 q0, [r0]
885885
; CHECK-NEXT: vctp.16 r1
886886
; CHECK-NEXT: vpst
887-
; CHECK-NEXT: vstrht.16 q0, [r12]
887+
; CHECK-NEXT: vstrht.16 q0, [r9]
888888
; CHECK-NEXT: .LBB15_12: @ %if.end61
889889
; CHECK-NEXT: add sp, #16
890890
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}

llvm/test/CodeGen/Thumb2/mve-vctp.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) {
55
; CHECK-LABEL: vctp8:
66
; CHECK: @ %bb.0:
7-
; CHECK-NEXT: vctp.8 r0
87
; CHECK-NEXT: vldrw.u32 q1, [r1]
8+
; CHECK-NEXT: vctp.8 r0
99
; CHECK-NEXT: vmov.i32 q0, #0x0
1010
; CHECK-NEXT: vpsel q0, q1, q0
1111
; CHECK-NEXT: vstrw.32 q0, [r2]
@@ -20,8 +20,8 @@ define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) {
2020
define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) {
2121
; CHECK-LABEL: vctp16:
2222
; CHECK: @ %bb.0:
23-
; CHECK-NEXT: vctp.16 r0
2423
; CHECK-NEXT: vldrw.u32 q1, [r1]
24+
; CHECK-NEXT: vctp.16 r0
2525
; CHECK-NEXT: vmov.i32 q0, #0x0
2626
; CHECK-NEXT: vpsel q0, q1, q0
2727
; CHECK-NEXT: vstrw.32 q0, [r2]
@@ -36,8 +36,8 @@ define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) {
3636
define void @vctp32(i32 %arg, <4 x i32> *%in, <4 x i32>* %out) {
3737
; CHECK-LABEL: vctp32:
3838
; CHECK: @ %bb.0:
39-
; CHECK-NEXT: vctp.32 r0
4039
; CHECK-NEXT: vldrw.u32 q1, [r1]
40+
; CHECK-NEXT: vctp.32 r0
4141
; CHECK-NEXT: vmov.i32 q0, #0x0
4242
; CHECK-NEXT: vpsel q0, q1, q0
4343
; CHECK-NEXT: vstrw.32 q0, [r2]

0 commit comments

Comments
 (0)