Skip to content

Commit 0583297

Browse files
committed
[AArch64][GlobalISel] Add patterns for FPR i8 G_DUP
This adds missing patterns for i8 G_DUP from FPR registers, not present from the other fp patterns like f16/f32 etc.
1 parent 55b6c3e commit 0583297

File tree

2 files changed

+82
-35
lines changed

2 files changed

+82
-35
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7156,6 +7156,16 @@ def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
71567156
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
71577157
(i64 0)))>;
71587158

7159+
// Patterns for importing fpr i8 G_DUP under GISel.
7160+
def : Pat<(v8i8 (AArch64dup (i8 FPR8:$Rn))),
7161+
(v8i8 (DUPv8i8lane
7162+
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rn, bsub),
7163+
(i64 0)))>;
7164+
def : Pat<(v16i8 (AArch64dup (i8 FPR8:$Rn))),
7165+
(v16i8 (DUPv16i8lane
7166+
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rn, bsub),
7167+
(i64 0)))>;
7168+
71597169
def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
71607170
(DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
71617171
def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),

llvm/test/CodeGen/AArch64/dup.ll

Lines changed: 72 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,6 @@
66
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i8
77
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i8
88
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v2i8
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v3i8
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v4i8
11-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v8i8
12-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v16i8
13-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v32i8
149

1510
define <2 x i8> @dup_v2i8(i8 %a) {
1611
; CHECK-LABEL: dup_v2i8:
@@ -127,14 +122,25 @@ entry:
127122
}
128123

129124
define <3 x i8> @loaddup_str_v3i8(ptr %p) {
130-
; CHECK-LABEL: loaddup_str_v3i8:
131-
; CHECK: // %bb.0: // %entry
132-
; CHECK-NEXT: mov x8, x0
133-
; CHECK-NEXT: ldrb w0, [x0]
134-
; CHECK-NEXT: strb wzr, [x8]
135-
; CHECK-NEXT: mov w1, w0
136-
; CHECK-NEXT: mov w2, w0
137-
; CHECK-NEXT: ret
125+
; CHECK-SD-LABEL: loaddup_str_v3i8:
126+
; CHECK-SD: // %bb.0: // %entry
127+
; CHECK-SD-NEXT: mov x8, x0
128+
; CHECK-SD-NEXT: ldrb w0, [x0]
129+
; CHECK-SD-NEXT: strb wzr, [x8]
130+
; CHECK-SD-NEXT: mov w1, w0
131+
; CHECK-SD-NEXT: mov w2, w0
132+
; CHECK-SD-NEXT: ret
133+
;
134+
; CHECK-GI-LABEL: loaddup_str_v3i8:
135+
; CHECK-GI: // %bb.0: // %entry
136+
; CHECK-GI-NEXT: ldr b0, [x0]
137+
; CHECK-GI-NEXT: mov x8, x0
138+
; CHECK-GI-NEXT: strb wzr, [x8]
139+
; CHECK-GI-NEXT: dup v0.8b, v0.b[0]
140+
; CHECK-GI-NEXT: umov w0, v0.b[0]
141+
; CHECK-GI-NEXT: umov w1, v0.b[1]
142+
; CHECK-GI-NEXT: umov w2, v0.b[2]
143+
; CHECK-GI-NEXT: ret
138144
entry:
139145
%a = load i8, ptr %p
140146
%b = insertelement <3 x i8> poison, i8 %a, i64 0
@@ -201,12 +207,21 @@ entry:
201207
}
202208

203209
define <4 x i8> @loaddup_str_v4i8(ptr %p) {
204-
; CHECK-LABEL: loaddup_str_v4i8:
205-
; CHECK: // %bb.0: // %entry
206-
; CHECK-NEXT: ldrb w8, [x0]
207-
; CHECK-NEXT: strb wzr, [x0]
208-
; CHECK-NEXT: dup v0.4h, w8
209-
; CHECK-NEXT: ret
210+
; CHECK-SD-LABEL: loaddup_str_v4i8:
211+
; CHECK-SD: // %bb.0: // %entry
212+
; CHECK-SD-NEXT: ldrb w8, [x0]
213+
; CHECK-SD-NEXT: strb wzr, [x0]
214+
; CHECK-SD-NEXT: dup v0.4h, w8
215+
; CHECK-SD-NEXT: ret
216+
;
217+
; CHECK-GI-LABEL: loaddup_str_v4i8:
218+
; CHECK-GI: // %bb.0: // %entry
219+
; CHECK-GI-NEXT: ldr b0, [x0]
220+
; CHECK-GI-NEXT: strb wzr, [x0]
221+
; CHECK-GI-NEXT: dup v0.8b, v0.b[0]
222+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
223+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
224+
; CHECK-GI-NEXT: ret
210225
entry:
211226
%a = load i8, ptr %p
212227
%b = insertelement <4 x i8> poison, i8 %a, i64 0
@@ -250,11 +265,18 @@ entry:
250265
}
251266

252267
define <8 x i8> @loaddup_str_v8i8(ptr %p) {
253-
; CHECK-LABEL: loaddup_str_v8i8:
254-
; CHECK: // %bb.0: // %entry
255-
; CHECK-NEXT: ld1r { v0.8b }, [x0]
256-
; CHECK-NEXT: strb wzr, [x0]
257-
; CHECK-NEXT: ret
268+
; CHECK-SD-LABEL: loaddup_str_v8i8:
269+
; CHECK-SD: // %bb.0: // %entry
270+
; CHECK-SD-NEXT: ld1r { v0.8b }, [x0]
271+
; CHECK-SD-NEXT: strb wzr, [x0]
272+
; CHECK-SD-NEXT: ret
273+
;
274+
; CHECK-GI-LABEL: loaddup_str_v8i8:
275+
; CHECK-GI: // %bb.0: // %entry
276+
; CHECK-GI-NEXT: ldr b0, [x0]
277+
; CHECK-GI-NEXT: strb wzr, [x0]
278+
; CHECK-GI-NEXT: dup v0.8b, v0.b[0]
279+
; CHECK-GI-NEXT: ret
258280
entry:
259281
%a = load i8, ptr %p
260282
%b = insertelement <8 x i8> poison, i8 %a, i64 0
@@ -297,11 +319,18 @@ entry:
297319
}
298320

299321
define <16 x i8> @loaddup_str_v16i8(ptr %p) {
300-
; CHECK-LABEL: loaddup_str_v16i8:
301-
; CHECK: // %bb.0: // %entry
302-
; CHECK-NEXT: ld1r { v0.16b }, [x0]
303-
; CHECK-NEXT: strb wzr, [x0]
304-
; CHECK-NEXT: ret
322+
; CHECK-SD-LABEL: loaddup_str_v16i8:
323+
; CHECK-SD: // %bb.0: // %entry
324+
; CHECK-SD-NEXT: ld1r { v0.16b }, [x0]
325+
; CHECK-SD-NEXT: strb wzr, [x0]
326+
; CHECK-SD-NEXT: ret
327+
;
328+
; CHECK-GI-LABEL: loaddup_str_v16i8:
329+
; CHECK-GI: // %bb.0: // %entry
330+
; CHECK-GI-NEXT: ldr b0, [x0]
331+
; CHECK-GI-NEXT: strb wzr, [x0]
332+
; CHECK-GI-NEXT: dup v0.16b, v0.b[0]
333+
; CHECK-GI-NEXT: ret
305334
entry:
306335
%a = load i8, ptr %p
307336
%b = insertelement <16 x i8> poison, i8 %a, i64 0
@@ -353,12 +382,20 @@ entry:
353382
}
354383

355384
define <32 x i8> @loaddup_str_v32i8(ptr %p) {
356-
; CHECK-LABEL: loaddup_str_v32i8:
357-
; CHECK: // %bb.0: // %entry
358-
; CHECK-NEXT: ld1r { v0.16b }, [x0]
359-
; CHECK-NEXT: strb wzr, [x0]
360-
; CHECK-NEXT: mov v1.16b, v0.16b
361-
; CHECK-NEXT: ret
385+
; CHECK-SD-LABEL: loaddup_str_v32i8:
386+
; CHECK-SD: // %bb.0: // %entry
387+
; CHECK-SD-NEXT: ld1r { v0.16b }, [x0]
388+
; CHECK-SD-NEXT: strb wzr, [x0]
389+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
390+
; CHECK-SD-NEXT: ret
391+
;
392+
; CHECK-GI-LABEL: loaddup_str_v32i8:
393+
; CHECK-GI: // %bb.0: // %entry
394+
; CHECK-GI-NEXT: ldr b1, [x0]
395+
; CHECK-GI-NEXT: strb wzr, [x0]
396+
; CHECK-GI-NEXT: dup v0.16b, v1.b[0]
397+
; CHECK-GI-NEXT: dup v1.16b, v1.b[0]
398+
; CHECK-GI-NEXT: ret
362399
entry:
363400
%a = load i8, ptr %p
364401
%b = insertelement <32 x i8> poison, i8 %a, i64 0

0 commit comments

Comments
 (0)