Skip to content

Commit 44e7b8a

Browse files
committed
[AArch64] Tests for implicit zero patterns. NFC
See D149616
1 parent 1a3947d commit 44e7b8a

File tree

2 files changed

+169
-0
lines changed

2 files changed

+169
-0
lines changed

llvm/test/CodeGen/AArch64/implicitly-set-zero-high-64-bits.ll

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,117 @@ entry:
3232
ret <8 x i8> %vtbl11.i
3333
}
3434

35+
define <8 x i8> @tbl1v8i8(ptr nocapture noundef readonly %in, <8 x i8> noundef %idx) {
36+
; CHECK-LABEL: tbl1v8i8:
37+
; CHECK: // %bb.0: // %entry
38+
; CHECK-NEXT: ldr q1, [x0]
39+
; CHECK-NEXT: shrn v1.8b, v1.8h, #4
40+
; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b
41+
; CHECK-NEXT: ret
42+
entry:
43+
%0 = load <8 x i16>, ptr %in, align 2
44+
%1 = lshr <8 x i16> %0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
45+
%vshrn_n = trunc <8 x i16> %1 to <8 x i8>
46+
%vtbl1.i = shufflevector <8 x i8> %vshrn_n, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
47+
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %idx)
48+
ret <8 x i8> %vtbl11.i
49+
}
50+
51+
define <8 x i16> @addpv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
52+
; CHECK-LABEL: addpv4i16:
53+
; CHECK: // %bb.0: // %entry
54+
; CHECK-NEXT: movi v2.2d, #0000000000000000
55+
; CHECK-NEXT: addp v0.4h, v0.4h, v1.4h
56+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
57+
; CHECK-NEXT: ret
58+
entry:
59+
%vpadd_v2.i = tail call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
60+
%shuffle.i = shufflevector <4 x i16> %vpadd_v2.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
61+
ret <8 x i16> %shuffle.i
62+
}
63+
64+
define <8 x i16> @addv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) {
65+
; CHECK-LABEL: addv4i16:
66+
; CHECK: // %bb.0: // %entry
67+
; CHECK-NEXT: movi v2.2d, #0000000000000000
68+
; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
69+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
70+
; CHECK-NEXT: ret
71+
entry:
72+
%add.i = add <4 x i16> %b, %a
73+
%shuffle.i = shufflevector <4 x i16> %add.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
74+
ret <8 x i16> %shuffle.i
75+
}
76+
77+
define <16 x i8> @rshrn(<8 x i16> noundef %a, <4 x i16> noundef %b) {
78+
; CHECK-LABEL: rshrn:
79+
; CHECK: // %bb.0: // %entry
80+
; CHECK-NEXT: rshrn v0.8b, v0.8h, #3
81+
; CHECK-NEXT: ret
82+
entry:
83+
%vrshrn_n1 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %a, i32 3)
84+
%shuffle.i = shufflevector <8 x i8> %vrshrn_n1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
85+
ret <16 x i8> %shuffle.i
86+
}
87+
88+
define <16 x i8> @tbl1(<16 x i8> %a, <8 x i8> %b) {
89+
; CHECK-LABEL: tbl1:
90+
; CHECK: // %bb.0: // %entry
91+
; CHECK-NEXT: movi v2.2d, #0000000000000000
92+
; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
93+
; CHECK-NEXT: mov v0.d[1], v2.d[0]
94+
; CHECK-NEXT: ret
95+
entry:
96+
%vtbl11 = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b)
97+
%shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
98+
ret <16 x i8> %shuffle.i
99+
}
100+
101+
define <2 x double> @fadd(double noundef %x, double noundef %y) {
102+
; CHECK-LABEL: fadd:
103+
; CHECK: // %bb.0: // %entry
104+
; CHECK-NEXT: movi v2.2d, #0000000000000000
105+
; CHECK-NEXT: fadd d0, d0, d1
106+
; CHECK-NEXT: mov v2.d[0], v0.d[0]
107+
; CHECK-NEXT: mov v0.16b, v2.16b
108+
; CHECK-NEXT: ret
109+
entry:
110+
%add = fadd double %x, %y
111+
%vecinit1 = insertelement <2 x double> poison, double %add, i64 0
112+
%vecinit2 = insertelement <2 x double> %vecinit1, double 0.0, i64 1
113+
ret <2 x double> %vecinit2
114+
}
115+
116+
define <16 x i8> @bsl(<4 x i16> noundef %a, <4 x i16> noundef %c, <4 x i16> noundef %d, <4 x i16> noundef %b) {
117+
; CHECK-LABEL: bsl:
118+
; CHECK: // %bb.0: // %entry
119+
; CHECK-NEXT: movi v3.2d, #0000000000000000
120+
; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b
121+
; CHECK-NEXT: mov v0.d[1], v3.d[0]
122+
; CHECK-NEXT: ret
123+
entry:
124+
%vbsl3.i = and <4 x i16> %c, %a
125+
%0 = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
126+
%vbsl4.i = and <4 x i16> %0, %d
127+
%vbsl5.i = or <4 x i16> %vbsl4.i, %vbsl3.i
128+
%1 = bitcast <4 x i16> %vbsl5.i to <8 x i8>
129+
%shuffle.i = shufflevector <8 x i8> %1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
130+
ret <16 x i8> %shuffle.i
131+
}
132+
133+
define <16 x i8> @load(ptr %a, <8 x i8> %b) {
134+
; CHECK-LABEL: load:
135+
; CHECK: // %bb.0: // %entry
136+
; CHECK-NEXT: movi v1.2d, #0000000000000000
137+
; CHECK-NEXT: ldr d0, [x0]
138+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
139+
; CHECK-NEXT: ret
140+
entry:
141+
%vtbl11 = load <8 x i8>, ptr %a
142+
%shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
143+
ret <16 x i8> %shuffle.i
144+
}
145+
146+
147+
declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
148+
declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>)

llvm/test/CodeGen/AArch64/peephole-insvigpr.mir

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
ret void
3333
}
3434

35+
define void @fadd(double %v, double %p) {
36+
entry:
37+
ret void
38+
}
39+
3540
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
3641

3742
...
@@ -395,4 +400,54 @@ body: |
395400
RET_ReallyLR
396401
397402
...
403+
---
404+
name: fadd
405+
alignment: 4
406+
tracksRegLiveness: true
407+
registers:
408+
- { id: 0, class: fpr64, preferred-register: '' }
409+
- { id: 1, class: fpr64, preferred-register: '' }
410+
- { id: 2, class: fpr64, preferred-register: '' }
411+
- { id: 3, class: fpr128, preferred-register: '' }
412+
- { id: 4, class: fpr64, preferred-register: '' }
413+
- { id: 5, class: fpr128, preferred-register: '' }
414+
- { id: 6, class: fpr128, preferred-register: '' }
415+
- { id: 7, class: fpr128, preferred-register: '' }
416+
- { id: 8, class: fpr128, preferred-register: '' }
417+
- { id: 9, class: fpr128, preferred-register: '' }
418+
liveins:
419+
- { reg: '$d0', virtual-reg: '%0' }
420+
- { reg: '$d1', virtual-reg: '%1' }
421+
body: |
422+
bb.0.entry:
423+
liveins: $d0, $d1
398424
425+
; CHECK-LABEL: name: fadd
426+
; CHECK: liveins: $d0, $d1
427+
; CHECK-NEXT: {{ $}}
428+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d1
429+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0
430+
; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[COPY1]], [[COPY]], implicit $fpcr
431+
; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
432+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub
433+
; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
434+
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[COPY2]], %subreg.dsub
435+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
436+
; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[FADDDrr]], %subreg.dsub
437+
; CHECK-NEXT: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG1]], 1, killed [[INSERT_SUBREG]], 0
438+
; CHECK-NEXT: $q0 = COPY [[INSvi64lane]]
439+
; CHECK-NEXT: RET_ReallyLR implicit $q0
440+
%1:fpr64 = COPY $d1
441+
%0:fpr64 = COPY $d0
442+
%2:fpr64 = nofpexcept FADDDrr %0, %1, implicit $fpcr
443+
%3:fpr128 = MOVIv2d_ns 0
444+
%4:fpr64 = COPY %3.dsub
445+
%6:fpr128 = IMPLICIT_DEF
446+
%5:fpr128 = INSERT_SUBREG %6, killed %4, %subreg.dsub
447+
%8:fpr128 = IMPLICIT_DEF
448+
%7:fpr128 = INSERT_SUBREG %8, killed %2, %subreg.dsub
449+
%9:fpr128 = INSvi64lane %7, 1, killed %5, 0
450+
$q0 = COPY %9
451+
RET_ReallyLR implicit $q0
452+
453+
...

0 commit comments

Comments
 (0)