Skip to content

Commit 981b31c

Browse files
[SVE] Add ISel patterns for "insert undef_nxv#f##, f##, 0"
Differential Revision: https://reviews.llvm.org/D89235
1 parent 2089878 commit 981b31c

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2171,6 +2171,19 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
21712171
def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)),
21722172
(INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
21732173

2174+
def : Pat<(nxv8f16 (vector_insert (nxv8f16 (undef)), (f16 FPR16:$src), 0)),
2175+
(INSERT_SUBREG (nxv8f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
2176+
def : Pat<(nxv4f16 (vector_insert (nxv4f16 (undef)), (f16 FPR16:$src), 0)),
2177+
(INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
2178+
def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)),
2179+
(INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
2180+
def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)),
2181+
(INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
2182+
def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)),
2183+
(INSERT_SUBREG (nxv2f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
2184+
def : Pat<(nxv2f64 (vector_insert (nxv2f64 (undef)), (f64 FPR64:$src), 0)),
2185+
(INSERT_SUBREG (nxv2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
2186+
21742187
// Insert scalar into vector[0]
21752188
def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)),
21762189
(CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>;

llvm/test/CodeGen/AArch64/sve-insert-element.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,57 @@ define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a,
223223
%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
224224
ret <vscale x 16 x i8> %d
225225
}
226+
227+
define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
228+
; CHECK-LABEL: test_insert_into_undef_nxv8f16:
229+
; CHECK: // %bb.0:
230+
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
231+
; CHECK-NEXT: ret
232+
%b = insertelement <vscale x 8 x half> undef, half %a, i32 0
233+
ret <vscale x 8 x half> %b
234+
}
235+
236+
define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
237+
; CHECK-LABEL: test_insert_into_undef_nxv4f16:
238+
; CHECK: // %bb.0:
239+
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
240+
; CHECK-NEXT: ret
241+
%b = insertelement <vscale x 4 x half> undef, half %a, i32 0
242+
ret <vscale x 4 x half> %b
243+
}
244+
245+
define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
246+
; CHECK-LABEL: test_insert_into_undef_nxv2f16:
247+
; CHECK: // %bb.0:
248+
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
249+
; CHECK-NEXT: ret
250+
%b = insertelement <vscale x 2 x half> undef, half %a, i32 0
251+
ret <vscale x 2 x half> %b
252+
}
253+
254+
define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
255+
; CHECK-LABEL: test_insert_into_undef_nxv4f32:
256+
; CHECK: // %bb.0:
257+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
258+
; CHECK-NEXT: ret
259+
%b = insertelement <vscale x 4 x float> undef, float %a, i32 0
260+
ret <vscale x 4 x float> %b
261+
}
262+
263+
define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
264+
; CHECK-LABEL: test_insert_into_undef_nxv2f32:
265+
; CHECK: // %bb.0:
266+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
267+
; CHECK-NEXT: ret
268+
%b = insertelement <vscale x 2 x float> undef, float %a, i32 0
269+
ret <vscale x 2 x float> %b
270+
}
271+
272+
define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
273+
; CHECK-LABEL: test_insert_into_undef_nxv2f64:
274+
; CHECK: // %bb.0:
275+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
276+
; CHECK-NEXT: ret
277+
%b = insertelement <vscale x 2 x double> undef, double %a, i32 0
278+
ret <vscale x 2 x double> %b
279+
}

0 commit comments

Comments
 (0)