Skip to content

Commit adfd12e

Browse files
committed
[ARM] Add patterns for store(fptosisat(..))
As an extension to D107866, this adds store(fptosisat(..)) patterns, similar to the existing fptosi patterns, to prevent unnecessarily moving into gpr regs where we can use fp stores directly. Differential Revision: https://reviews.llvm.org/D108378
1 parent 543fd3d commit adfd12e

File tree

4 files changed

+44
-48
lines changed

4 files changed

+44
-48
lines changed

llvm/lib/Target/ARM/ARMInstrVFP.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,6 +1605,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
16051605

16061606
def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
16071607
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
1608+
def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
1609+
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
16081610
}
16091611

16101612
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
@@ -1627,6 +1629,9 @@ def : VFPPat<(i32 (fp_to_sint_sat SPR:$a, i32)),
16271629
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
16281630
addrmode5:$ptr),
16291631
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
1632+
def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f32 SPR:$a), i32)),
1633+
addrmode5:$ptr),
1634+
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
16301635

16311636
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
16321637
(outs SPR:$Sd), (ins HPR:$Sm),
@@ -1658,6 +1663,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
16581663

16591664
def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
16601665
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
1666+
def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
1667+
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
16611668
}
16621669

16631670
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
@@ -1680,6 +1687,9 @@ def : VFPPat<(i32 (fp_to_uint_sat SPR:$a, i32)),
16801687
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
16811688
addrmode5:$ptr),
16821689
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
1690+
def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f32 SPR:$a), i32)),
1691+
addrmode5:$ptr),
1692+
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
16831693

16841694
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
16851695
(outs SPR:$Sd), (ins HPR:$Sm),

llvm/test/CodeGen/ARM/fptoi-sat-store.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ define void @test_signed_i32_f32(i32* %d, float %f) nounwind {
6363
; VFP: @ %bb.0:
6464
; VFP-NEXT: vmov s0, r1
6565
; VFP-NEXT: vcvt.s32.f32 s0, s0
66-
; VFP-NEXT: vmov r1, s0
67-
; VFP-NEXT: str r1, [r0]
66+
; VFP-NEXT: vstr s0, [r0]
6867
; VFP-NEXT: bx lr
6968
%r = call i32 @llvm.fptosi.sat.i32.f32(float %f)
7069
store i32 %r, i32* %d, align 4
@@ -141,16 +140,14 @@ define void @test_signed_i32_f64(i32* %d, double %f) nounwind {
141140
; VFP2: @ %bb.0:
142141
; VFP2-NEXT: vmov d16, r2, r3
143142
; VFP2-NEXT: vcvt.s32.f64 s0, d16
144-
; VFP2-NEXT: vmov r1, s0
145-
; VFP2-NEXT: str r1, [r0]
143+
; VFP2-NEXT: vstr s0, [r0]
146144
; VFP2-NEXT: bx lr
147145
;
148146
; FP16-LABEL: test_signed_i32_f64:
149147
; FP16: @ %bb.0:
150148
; FP16-NEXT: vmov d0, r2, r3
151149
; FP16-NEXT: vcvt.s32.f64 s0, d0
152-
; FP16-NEXT: vmov r1, s0
153-
; FP16-NEXT: str r1, [r0]
150+
; FP16-NEXT: vstr s0, [r0]
154151
; FP16-NEXT: bx lr
155152
%r = call i32 @llvm.fptosi.sat.i32.f64(double %f)
156153
store i32 %r, i32* %d, align 4
@@ -200,8 +197,7 @@ define void @test_unsigned_i32_f32(i32* %d, float %f) nounwind {
200197
; VFP: @ %bb.0:
201198
; VFP-NEXT: vmov s0, r1
202199
; VFP-NEXT: vcvt.u32.f32 s0, s0
203-
; VFP-NEXT: vmov r1, s0
204-
; VFP-NEXT: str r1, [r0]
200+
; VFP-NEXT: vstr s0, [r0]
205201
; VFP-NEXT: bx lr
206202
%r = call i32 @llvm.fptoui.sat.i32.f32(float %f)
207203
store i32 %r, i32* %d, align 4
@@ -260,16 +256,14 @@ define void @test_unsigned_i32_f64(i32* %d, double %f) nounwind {
260256
; VFP2: @ %bb.0:
261257
; VFP2-NEXT: vmov d16, r2, r3
262258
; VFP2-NEXT: vcvt.u32.f64 s0, d16
263-
; VFP2-NEXT: vmov r1, s0
264-
; VFP2-NEXT: str r1, [r0]
259+
; VFP2-NEXT: vstr s0, [r0]
265260
; VFP2-NEXT: bx lr
266261
;
267262
; FP16-LABEL: test_unsigned_i32_f64:
268263
; FP16: @ %bb.0:
269264
; FP16-NEXT: vmov d0, r2, r3
270265
; FP16-NEXT: vcvt.u32.f64 s0, d0
271-
; FP16-NEXT: vmov r1, s0
272-
; FP16-NEXT: str r1, [r0]
266+
; FP16-NEXT: vstr s0, [r0]
273267
; FP16-NEXT: bx lr
274268
%r = call i32 @llvm.fptoui.sat.i32.f64(double %f)
275269
store i32 %r, i32* %d, align 4

llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -152,20 +152,19 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f32_v4i32(<4 x float> %f) {
152152
define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
153153
; CHECK-MVE-LABEL: test_signed_v5f32_v5i32:
154154
; CHECK-MVE: @ %bb.0:
155-
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4
156-
; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0
157155
; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2
156+
; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0
158157
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s3
159158
; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s1
160-
; CHECK-MVE-NEXT: vmov r1, s4
161-
; CHECK-MVE-NEXT: vmov r2, s0
162-
; CHECK-MVE-NEXT: str r1, [r0, #16]
159+
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4
163160
; CHECK-MVE-NEXT: vmov r1, s2
161+
; CHECK-MVE-NEXT: vmov r2, s0
164162
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1
165163
; CHECK-MVE-NEXT: vmov r1, s6
166164
; CHECK-MVE-NEXT: vmov r2, s8
167165
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1
168166
; CHECK-MVE-NEXT: vstrw.32 q0, [r0]
167+
; CHECK-MVE-NEXT: vstr s4, [r0, #16]
169168
; CHECK-MVE-NEXT: bx lr
170169
;
171170
; CHECK-MVEFP-LABEL: test_signed_v5f32_v5i32:
@@ -183,22 +182,21 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
183182
define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
184183
; CHECK-MVE-LABEL: test_signed_v6f32_v6i32:
185184
; CHECK-MVE: @ %bb.0:
186-
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s5
187-
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4
188185
; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2
189186
; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0
190187
; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s3
191188
; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s1
192-
; CHECK-MVE-NEXT: vmov r1, s6
193-
; CHECK-MVE-NEXT: vmov r2, s4
194-
; CHECK-MVE-NEXT: strd r2, r1, [r0, #16]
189+
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s5
190+
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4
195191
; CHECK-MVE-NEXT: vmov r1, s2
196192
; CHECK-MVE-NEXT: vmov r2, s0
197193
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1
198194
; CHECK-MVE-NEXT: vmov r1, s8
199195
; CHECK-MVE-NEXT: vmov r2, s10
200196
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1
197+
; CHECK-MVE-NEXT: vstr s6, [r0, #20]
201198
; CHECK-MVE-NEXT: vstrw.32 q0, [r0]
199+
; CHECK-MVE-NEXT: vstr s4, [r0, #16]
202200
; CHECK-MVE-NEXT: bx lr
203201
;
204202
; CHECK-MVEFP-LABEL: test_signed_v6f32_v6i32:
@@ -218,25 +216,23 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
218216
define arm_aapcs_vfpcc <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) {
219217
; CHECK-MVE-LABEL: test_signed_v7f32_v7i32:
220218
; CHECK-MVE: @ %bb.0:
221-
; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s5
222-
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4
223-
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s6
224219
; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2
225220
; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0
226221
; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s3
227222
; CHECK-MVE-NEXT: vcvt.s32.f32 s12, s1
228-
; CHECK-MVE-NEXT: vmov r1, s8
229-
; CHECK-MVE-NEXT: vmov r2, s4
230-
; CHECK-MVE-NEXT: vmov r3, s6
231-
; CHECK-MVE-NEXT: strd r2, r1, [r0, #16]
223+
; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s5
224+
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4
225+
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s6
232226
; CHECK-MVE-NEXT: vmov r1, s2
233227
; CHECK-MVE-NEXT: vmov r2, s0
234-
; CHECK-MVE-NEXT: str r3, [r0, #24]
235228
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1
236229
; CHECK-MVE-NEXT: vmov r1, s10
237230
; CHECK-MVE-NEXT: vmov r2, s12
238231
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1
232+
; CHECK-MVE-NEXT: vstr s8, [r0, #20]
233+
; CHECK-MVE-NEXT: vstr s4, [r0, #16]
239234
; CHECK-MVE-NEXT: vstrw.32 q0, [r0]
235+
; CHECK-MVE-NEXT: vstr s6, [r0, #24]
240236
; CHECK-MVE-NEXT: bx lr
241237
;
242238
; CHECK-MVEFP-LABEL: test_signed_v7f32_v7i32:

llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -133,20 +133,19 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f32_v4i32(<4 x float> %f) {
133133
define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
134134
; CHECK-MVE-LABEL: test_unsigned_v5f32_v5i32:
135135
; CHECK-MVE: @ %bb.0:
136-
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
137-
; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
138136
; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
137+
; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
139138
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s3
140139
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s1
141-
; CHECK-MVE-NEXT: vmov r1, s4
142-
; CHECK-MVE-NEXT: vmov r2, s0
143-
; CHECK-MVE-NEXT: str r1, [r0, #16]
140+
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
144141
; CHECK-MVE-NEXT: vmov r1, s2
142+
; CHECK-MVE-NEXT: vmov r2, s0
145143
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1
146144
; CHECK-MVE-NEXT: vmov r1, s6
147145
; CHECK-MVE-NEXT: vmov r2, s8
148146
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1
149147
; CHECK-MVE-NEXT: vstrw.32 q0, [r0]
148+
; CHECK-MVE-NEXT: vstr s4, [r0, #16]
150149
; CHECK-MVE-NEXT: bx lr
151150
;
152151
; CHECK-MVEFP-LABEL: test_unsigned_v5f32_v5i32:
@@ -164,22 +163,21 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
164163
define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
165164
; CHECK-MVE-LABEL: test_unsigned_v6f32_v6i32:
166165
; CHECK-MVE: @ %bb.0:
167-
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s5
168-
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
169166
; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
170167
; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
171168
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s3
172169
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s1
173-
; CHECK-MVE-NEXT: vmov r1, s6
174-
; CHECK-MVE-NEXT: vmov r2, s4
175-
; CHECK-MVE-NEXT: strd r2, r1, [r0, #16]
170+
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s5
171+
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
176172
; CHECK-MVE-NEXT: vmov r1, s2
177173
; CHECK-MVE-NEXT: vmov r2, s0
178174
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1
179175
; CHECK-MVE-NEXT: vmov r1, s8
180176
; CHECK-MVE-NEXT: vmov r2, s10
181177
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1
178+
; CHECK-MVE-NEXT: vstr s6, [r0, #20]
182179
; CHECK-MVE-NEXT: vstrw.32 q0, [r0]
180+
; CHECK-MVE-NEXT: vstr s4, [r0, #16]
183181
; CHECK-MVE-NEXT: bx lr
184182
;
185183
; CHECK-MVEFP-LABEL: test_unsigned_v6f32_v6i32:
@@ -199,25 +197,23 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
199197
define arm_aapcs_vfpcc <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
200198
; CHECK-MVE-LABEL: test_unsigned_v7f32_v7i32:
201199
; CHECK-MVE: @ %bb.0:
202-
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s5
203-
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
204-
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s6
205200
; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
206201
; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
207202
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s3
208203
; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s1
209-
; CHECK-MVE-NEXT: vmov r1, s8
210-
; CHECK-MVE-NEXT: vmov r2, s4
211-
; CHECK-MVE-NEXT: vmov r3, s6
212-
; CHECK-MVE-NEXT: strd r2, r1, [r0, #16]
204+
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s5
205+
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
206+
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s6
213207
; CHECK-MVE-NEXT: vmov r1, s2
214208
; CHECK-MVE-NEXT: vmov r2, s0
215-
; CHECK-MVE-NEXT: str r3, [r0, #24]
216209
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1
217210
; CHECK-MVE-NEXT: vmov r1, s10
218211
; CHECK-MVE-NEXT: vmov r2, s12
219212
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1
213+
; CHECK-MVE-NEXT: vstr s8, [r0, #20]
214+
; CHECK-MVE-NEXT: vstr s4, [r0, #16]
220215
; CHECK-MVE-NEXT: vstrw.32 q0, [r0]
216+
; CHECK-MVE-NEXT: vstr s6, [r0, #24]
221217
; CHECK-MVE-NEXT: bx lr
222218
;
223219
; CHECK-MVEFP-LABEL: test_unsigned_v7f32_v7i32:

0 commit comments

Comments
 (0)