Skip to content

Commit 3366a1c

Browse files
committed
[AArch64] Remove redundant instructions in int-to-fp of lowest vector element
1 parent b22adf0 commit 3366a1c

File tree

4 files changed

+191
-14
lines changed

4 files changed

+191
-14
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6079,6 +6079,32 @@ def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
60796079
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
60806080
(UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
60816081
}
6082+
6083+
// int -> float conversion of value in lane 0 of simd vector should use
6084+
// correct cvtf variant to avoid costly fpr <-> gpr register transfers.
6085+
def : Pat<(f32 (sint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))),
6086+
(SCVTFv1i32 (i32 (EXTRACT_SUBREG (v4i32 FPR128:$Rn), ssub)))>;
6087+
6088+
def : Pat<(f32 (uint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))),
6089+
(UCVTFv1i32 (i32 (EXTRACT_SUBREG (v4i32 FPR128:$Rn), ssub)))>;
6090+
6091+
def : Pat<(f64 (sint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
6092+
(SCVTFv1i64 (i64 (EXTRACT_SUBREG (v2i64 FPR128:$Rn), dsub)))>;
6093+
6094+
def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
6095+
(UCVTFv1i64 (i64 (EXTRACT_SUBREG (v2i64 FPR128:$Rn), dsub)))>;
6096+
6097+
// fp16: integer extraction from vector must be at least 32-bits to be legal.
6098+
// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
6099+
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
6100+
def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract (v8i16 FPR128:$Rn), (i64 0))), i16)))),
6101+
(SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
6102+
6103+
// unsigned 32-bit extracted element is truncated to 16-bits using AND
6104+
def : Pat<(f16 (uint_to_fp (i32 (and (i32 (vector_extract (v8i16 FPR128:$Rn), (i64 0))), (i32 65535))))),
6105+
(UCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
6106+
}
6107+
60826108
// If an integer is about to be converted to a floating point value,
60836109
// just load it on the floating point unit.
60846110
// Here are the patterns for 8 and 16-bits to float.

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,8 +1127,7 @@ define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
11271127
; CHECK-SD-LABEL: test_bitcastv1f64tov8i8:
11281128
; CHECK-SD: // %bb.0:
11291129
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1130-
; CHECK-SD-NEXT: fmov x8, d0
1131-
; CHECK-SD-NEXT: scvtf d0, x8
1130+
; CHECK-SD-NEXT: scvtf d0, d0
11321131
; CHECK-SD-NEXT: neg v0.8b, v0.8b
11331132
; CHECK-SD-NEXT: ret
11341133
;
@@ -1147,8 +1146,7 @@ define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
11471146
; CHECK-SD-LABEL: test_bitcastv1f64tov4i16:
11481147
; CHECK-SD: // %bb.0:
11491148
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1150-
; CHECK-SD-NEXT: fmov x8, d0
1151-
; CHECK-SD-NEXT: scvtf d0, x8
1149+
; CHECK-SD-NEXT: scvtf d0, d0
11521150
; CHECK-SD-NEXT: neg v0.4h, v0.4h
11531151
; CHECK-SD-NEXT: ret
11541152
;
@@ -1167,8 +1165,7 @@ define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
11671165
; CHECK-SD-LABEL: test_bitcastv1f64tov2i32:
11681166
; CHECK-SD: // %bb.0:
11691167
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1170-
; CHECK-SD-NEXT: fmov x8, d0
1171-
; CHECK-SD-NEXT: scvtf d0, x8
1168+
; CHECK-SD-NEXT: scvtf d0, d0
11721169
; CHECK-SD-NEXT: neg v0.2s, v0.2s
11731170
; CHECK-SD-NEXT: ret
11741171
;
@@ -1187,8 +1184,7 @@ define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
11871184
; CHECK-SD-LABEL: test_bitcastv1f64tov1i64:
11881185
; CHECK-SD: // %bb.0:
11891186
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1190-
; CHECK-SD-NEXT: fmov x8, d0
1191-
; CHECK-SD-NEXT: scvtf d0, x8
1187+
; CHECK-SD-NEXT: scvtf d0, d0
11921188
; CHECK-SD-NEXT: neg d0, d0
11931189
; CHECK-SD-NEXT: ret
11941190
;
@@ -1209,8 +1205,7 @@ define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
12091205
; CHECK-LABEL: test_bitcastv1f64tov2f32:
12101206
; CHECK: // %bb.0:
12111207
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1212-
; CHECK-NEXT: fmov x8, d0
1213-
; CHECK-NEXT: scvtf d0, x8
1208+
; CHECK-NEXT: scvtf d0, d0
12141209
; CHECK-NEXT: fneg v0.2s, v0.2s
12151210
; CHECK-NEXT: ret
12161211
%vcvt.i = sitofp <1 x i64> %a to <1 x double>

llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,162 @@ define <8 x half> @h_v8_s8(<8 x i16> %u) #0 {
101101
ret <8 x half> %v
102102
}
103103

104+
; int-to-fp conversion of element in lane 0 should apply
105+
; cvtf on vector subregister to avoid fpr->gpr trip
106+
define float @l0_extract_f_v2s(<2 x i32> %u) {
107+
; CHECK-LABEL: l0_extract_f_v2s:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
110+
; CHECK-NEXT: scvtf s0, s0
111+
; CHECK-NEXT: ret
112+
%i = extractelement <2 x i32> %u, i64 0
113+
%f = sitofp i32 %i to float
114+
ret float %f
115+
}
116+
117+
; cvtf to use ssub for bottom 32-bits from v2i32
118+
define float @l0_extract_f_v2u(<2 x i32> %u) {
119+
; CHECK-LABEL: l0_extract_f_v2u:
120+
; CHECK: // %bb.0:
121+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
122+
; CHECK-NEXT: ucvtf s0, s0
123+
; CHECK-NEXT: ret
124+
%i = extractelement <2 x i32> %u, i64 0
125+
%f = uitofp i32 %i to float
126+
ret float %f
127+
}
128+
129+
; Pattern should only apply when it is known to be lane 0
130+
define float @ln_extract_f_v2s(<2 x i32> %u, i64 %n) {
131+
; CHECK-LABEL: ln_extract_f_v2s:
132+
; CHECK: // %bb.0:
133+
; CHECK-NEXT: sub sp, sp, #16
134+
; CHECK-NEXT: .cfi_def_cfa_offset 16
135+
; CHECK-NEXT: add x8, sp, #8
136+
; CHECK-NEXT: str d0, [sp, #8]
137+
; CHECK-NEXT: bfi x8, x0, #2, #1
138+
; CHECK-NEXT: ldr s0, [x8]
139+
; CHECK-NEXT: scvtf s0, s0
140+
; CHECK-NEXT: add sp, sp, #16
141+
; CHECK-NEXT: ret
142+
%i = extractelement <2 x i32> %u, i64 %n
143+
%f = sitofp i32 %i to float
144+
ret float %f
145+
}
146+
147+
; cvtf to use ssub for bottom 32-bits from v4i32
148+
define float @l0_extract_f_v4s(<4 x i32> %u) {
149+
; CHECK-LABEL: l0_extract_f_v4s:
150+
; CHECK: // %bb.0:
151+
; CHECK-NEXT: scvtf s0, s0
152+
; CHECK-NEXT: ret
153+
%i = extractelement <4 x i32> %u, i64 0
154+
%f = sitofp i32 %i to float
155+
ret float %f
156+
}
157+
158+
define float @l0_extract_f_v4u(<4 x i32> %u) {
159+
; CHECK-LABEL: l0_extract_f_v4u:
160+
; CHECK: // %bb.0:
161+
; CHECK-NEXT: ucvtf s0, s0
162+
; CHECK-NEXT: ret
163+
%i = extractelement <4 x i32> %u, i64 0
164+
%f = uitofp i32 %i to float
165+
ret float %f
166+
}
167+
168+
define float @ln_extract_f_v4s(<4 x i32> %u, i64 %n) {
169+
; CHECK-LABEL: ln_extract_f_v4s:
170+
; CHECK: // %bb.0:
171+
; CHECK-NEXT: sub sp, sp, #16
172+
; CHECK-NEXT: .cfi_def_cfa_offset 16
173+
; CHECK-NEXT: mov x8, sp
174+
; CHECK-NEXT: str q0, [sp]
175+
; CHECK-NEXT: bfi x8, x0, #2, #2
176+
; CHECK-NEXT: ldr s0, [x8]
177+
; CHECK-NEXT: scvtf s0, s0
178+
; CHECK-NEXT: add sp, sp, #16
179+
; CHECK-NEXT: ret
180+
%i = extractelement <4 x i32> %u, i64 %n
181+
%f = sitofp i32 %i to float
182+
ret float %f
183+
}
184+
185+
; cvtf to use dsub for bottom 64-bits from v2i64
186+
define double @l0_extract_d_v2s(<2 x i64> %u) {
187+
; CHECK-LABEL: l0_extract_d_v2s:
188+
; CHECK: // %bb.0:
189+
; CHECK-NEXT: scvtf d0, d0
190+
; CHECK-NEXT: ret
191+
%i = extractelement <2 x i64> %u, i64 0
192+
%f = sitofp i64 %i to double
193+
ret double %f
194+
}
195+
196+
define double @l0_extract_d_v2u(<2 x i64> %u) {
197+
; CHECK-LABEL: l0_extract_d_v2u:
198+
; CHECK: // %bb.0:
199+
; CHECK-NEXT: ucvtf d0, d0
200+
; CHECK-NEXT: ret
201+
%i = extractelement <2 x i64> %u, i64 0
202+
%f = uitofp i64 %i to double
203+
ret double %f
204+
}
205+
206+
define double @ln_extract_d_v2s(<2 x i64> %u, i64 %n) {
207+
; CHECK-LABEL: ln_extract_d_v2s:
208+
; CHECK: // %bb.0:
209+
; CHECK-NEXT: sub sp, sp, #16
210+
; CHECK-NEXT: .cfi_def_cfa_offset 16
211+
; CHECK-NEXT: mov x8, sp
212+
; CHECK-NEXT: str q0, [sp]
213+
; CHECK-NEXT: bfi x8, x0, #3, #1
214+
; CHECK-NEXT: ldr d0, [x8]
215+
; CHECK-NEXT: scvtf d0, d0
216+
; CHECK-NEXT: add sp, sp, #16
217+
; CHECK-NEXT: ret
218+
%i = extractelement <2 x i64> %u, i64 %n
219+
%f = sitofp i64 %i to double
220+
ret double %f
221+
}
222+
223+
; (fullfp16) cvtf to use hsub for bottom 16-bits from v8i16
224+
define half @l0_extract_h_v8s(<8 x i16> %u) #0 {
225+
; CHECK-LABEL: l0_extract_h_v8s:
226+
; CHECK: // %bb.0:
227+
; CHECK-NEXT: scvtf h0, h0
228+
; CHECK-NEXT: ret
229+
%i = extractelement <8 x i16> %u, i32 0
230+
%f = sitofp i16 %i to half
231+
ret half %f
232+
}
233+
234+
define half @l0_extract_h_v8u(<8 x i16> %u) #0 {
235+
; CHECK-LABEL: l0_extract_h_v8u:
236+
; CHECK: // %bb.0:
237+
; CHECK-NEXT: ucvtf h0, h0
238+
; CHECK-NEXT: ret
239+
%i = extractelement <8 x i16> %u, i32 0
240+
%f = uitofp i16 %i to half
241+
ret half %f
242+
}
243+
244+
define half @ln_extract_h_v8u(<8 x i16> %u, i32 %n) #0 {
245+
; CHECK-LABEL: ln_extract_h_v8u:
246+
; CHECK: // %bb.0:
247+
; CHECK-NEXT: sub sp, sp, #16
248+
; CHECK-NEXT: .cfi_def_cfa_offset 16
249+
; CHECK-NEXT: mov x8, sp
250+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
251+
; CHECK-NEXT: str q0, [sp]
252+
; CHECK-NEXT: bfi x8, x0, #1, #3
253+
; CHECK-NEXT: ldrh w8, [x8]
254+
; CHECK-NEXT: ucvtf h0, w8
255+
; CHECK-NEXT: add sp, sp, #16
256+
; CHECK-NEXT: ret
257+
%i = extractelement <8 x i16> %u, i32 %n
258+
%f = uitofp i16 %i to half
259+
ret half %f
260+
}
261+
104262
attributes #0 = { "target-features"="+fullfp16"}

llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -827,8 +827,7 @@ define <1 x double> @ucvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
827827
; CHECK-LABEL: ucvtf_v1i64_v1f64:
828828
; CHECK: // %bb.0:
829829
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
830-
; CHECK-NEXT: fmov x8, d0
831-
; CHECK-NEXT: ucvtf d0, x8
830+
; CHECK-NEXT: ucvtf d0, d0
832831
; CHECK-NEXT: ret
833832
%res = uitofp <1 x i64> %op1 to <1 x double>
834833
ret <1 x double> %res
@@ -1752,8 +1751,7 @@ define <1 x double> @scvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
17521751
; CHECK-LABEL: scvtf_v1i64_v1f64:
17531752
; CHECK: // %bb.0:
17541753
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1755-
; CHECK-NEXT: fmov x8, d0
1756-
; CHECK-NEXT: scvtf d0, x8
1754+
; CHECK-NEXT: scvtf d0, d0
17571755
; CHECK-NEXT: ret
17581756
%res = sitofp <1 x i64> %op1 to <1 x double>
17591757
ret <1 x double> %res

0 commit comments

Comments
 (0)