Skip to content

Commit 935d753

Browse files
committed
[AArch64][GlobalISel] Add test coverage fir ld1r combines. NFC
1 parent 3f67544 commit 935d753

File tree

2 files changed

+159
-57
lines changed

2 files changed

+159
-57
lines changed

llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll

Lines changed: 78 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2+
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
34

45

56
%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
@@ -295,12 +296,18 @@ entry:
295296
define <1 x i64> @testDUP.v1i64(ptr %a, ptr %b) #0 {
296297
; As there is a store operation depending on %1, LD1R pattern can't be selected.
297298
; So LDR and FMOV should be emitted.
298-
; CHECK-LABEL: testDUP.v1i64:
299-
; CHECK: // %bb.0:
300-
; CHECK-NEXT: ldr x8, [x0]
301-
; CHECK-NEXT: fmov d0, x8
302-
; CHECK-NEXT: str x8, [x1]
303-
; CHECK-NEXT: ret
299+
; CHECK-GI-LABEL: testDUP.v1i64:
300+
; CHECK-GI: // %bb.0:
301+
; CHECK-GI-NEXT: ldr x8, [x0]
302+
; CHECK-GI-NEXT: fmov d0, x8
303+
; CHECK-GI-NEXT: str x8, [x1]
304+
; CHECK-GI-NEXT: ret
305+
;
306+
; CHECK-SD-LABEL: testDUP.v1i64:
307+
; CHECK-SD: // %bb.0:
308+
; CHECK-SD-NEXT: ldr d0, [x0]
309+
; CHECK-SD-NEXT: str d0, [x1]
310+
; CHECK-SD-NEXT: ret
304311
%1 = load i64, ptr %a, align 8
305312
store i64 %1, ptr %b, align 8
306313
%vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
@@ -322,10 +329,16 @@ define <1 x double> @testDUP.v1f64(ptr %a, ptr %b) #0 {
322329
}
323330

324331
define <16 x i8> @test_vld1q_lane_s8(ptr %a, <16 x i8> %b) {
325-
; CHECK-LABEL: test_vld1q_lane_s8:
326-
; CHECK: // %bb.0: // %entry
327-
; CHECK-NEXT: ld1 { v0.b }[15], [x0]
328-
; CHECK-NEXT: ret
332+
; CHECK-GI-LABEL: test_vld1q_lane_s8:
333+
; CHECK-GI: // %bb.0: // %entry
334+
; CHECK-GI-NEXT: ld1 { v0.b }[15], [x0]
335+
; CHECK-GI-NEXT: ret
336+
;
337+
; CHECK-SD-LABEL: test_vld1q_lane_s8:
338+
; CHECK-SD: // %bb.0: // %entry
339+
; CHECK-SD-NEXT: ldr b1, [x0]
340+
; CHECK-SD-NEXT: mov v0.b[15], v1.b[0]
341+
; CHECK-SD-NEXT: ret
329342
entry:
330343
%0 = load i8, ptr %a, align 1
331344
%vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
@@ -388,12 +401,20 @@ entry:
388401
}
389402

390403
define <8 x i8> @test_vld1_lane_s8(ptr %a, <8 x i8> %b) {
391-
; CHECK-LABEL: test_vld1_lane_s8:
392-
; CHECK: // %bb.0: // %entry
393-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
394-
; CHECK-NEXT: ld1 { v0.b }[7], [x0]
395-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
396-
; CHECK-NEXT: ret
404+
; CHECK-GI-LABEL: test_vld1_lane_s8:
405+
; CHECK-GI: // %bb.0: // %entry
406+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
407+
; CHECK-GI-NEXT: ld1 { v0.b }[7], [x0]
408+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
409+
; CHECK-GI-NEXT: ret
410+
;
411+
; CHECK-SD-LABEL: test_vld1_lane_s8:
412+
; CHECK-SD: // %bb.0: // %entry
413+
; CHECK-SD-NEXT: ldr b1, [x0]
414+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
415+
; CHECK-SD-NEXT: mov v0.b[7], v1.b[0]
416+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
417+
; CHECK-SD-NEXT: ret
397418
entry:
398419
%0 = load i8, ptr %a, align 1
399420
%vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
@@ -607,11 +628,16 @@ entry:
607628
}
608629

609630
define void @test_vst1_lane0_s16(ptr %a, <4 x i16> %b) {
610-
; CHECK-LABEL: test_vst1_lane0_s16:
611-
; CHECK: // %bb.0: // %entry
612-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
613-
; CHECK-NEXT: str h0, [x0]
614-
; CHECK-NEXT: ret
631+
; CHECK-GI-LABEL: test_vst1_lane0_s16:
632+
; CHECK-GI: // %bb.0: // %entry
633+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
634+
; CHECK-GI-NEXT: str h0, [x0]
635+
; CHECK-GI-NEXT: ret
636+
;
637+
; CHECK-SD-LABEL: test_vst1_lane0_s16:
638+
; CHECK-SD: // %bb.0: // %entry
639+
; CHECK-SD-NEXT: str h0, [x0]
640+
; CHECK-SD-NEXT: ret
615641
entry:
616642
%0 = extractelement <4 x i16> %b, i32 0
617643
store i16 %0, ptr %a, align 2
@@ -631,23 +657,33 @@ entry:
631657
}
632658

633659
define void @test_vst1_lane0_s32(ptr %a, <2 x i32> %b) {
634-
; CHECK-LABEL: test_vst1_lane0_s32:
635-
; CHECK: // %bb.0: // %entry
636-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
637-
; CHECK-NEXT: str s0, [x0]
638-
; CHECK-NEXT: ret
660+
; CHECK-GI-LABEL: test_vst1_lane0_s32:
661+
; CHECK-GI: // %bb.0: // %entry
662+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
663+
; CHECK-GI-NEXT: str s0, [x0]
664+
; CHECK-GI-NEXT: ret
665+
;
666+
; CHECK-SD-LABEL: test_vst1_lane0_s32:
667+
; CHECK-SD: // %bb.0: // %entry
668+
; CHECK-SD-NEXT: str s0, [x0]
669+
; CHECK-SD-NEXT: ret
639670
entry:
640671
%0 = extractelement <2 x i32> %b, i32 0
641672
store i32 %0, ptr %a, align 4
642673
ret void
643674
}
644675

645676
define void @test_vst1_lane_s64(ptr %a, <1 x i64> %b) {
646-
; CHECK-LABEL: test_vst1_lane_s64:
647-
; CHECK: // %bb.0: // %entry
648-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
649-
; CHECK-NEXT: str d0, [x0]
650-
; CHECK-NEXT: ret
677+
; CHECK-GI-LABEL: test_vst1_lane_s64:
678+
; CHECK-GI: // %bb.0: // %entry
679+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
680+
; CHECK-GI-NEXT: str d0, [x0]
681+
; CHECK-GI-NEXT: ret
682+
;
683+
; CHECK-SD-LABEL: test_vst1_lane_s64:
684+
; CHECK-SD: // %bb.0: // %entry
685+
; CHECK-SD-NEXT: str d0, [x0]
686+
; CHECK-SD-NEXT: ret
651687
entry:
652688
%0 = extractelement <1 x i64> %b, i32 0
653689
store i64 %0, ptr %a, align 8
@@ -667,11 +703,16 @@ entry:
667703
}
668704

669705
define void @test_vst1_lane0_f32(ptr %a, <2 x float> %b) {
670-
; CHECK-LABEL: test_vst1_lane0_f32:
671-
; CHECK: // %bb.0: // %entry
672-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
673-
; CHECK-NEXT: str s0, [x0]
674-
; CHECK-NEXT: ret
706+
; CHECK-GI-LABEL: test_vst1_lane0_f32:
707+
; CHECK-GI: // %bb.0: // %entry
708+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
709+
; CHECK-GI-NEXT: str s0, [x0]
710+
; CHECK-GI-NEXT: ret
711+
;
712+
; CHECK-SD-LABEL: test_vst1_lane0_f32:
713+
; CHECK-SD: // %bb.0: // %entry
714+
; CHECK-SD-NEXT: str s0, [x0]
715+
; CHECK-SD-NEXT: ret
675716
entry:
676717
%0 = extractelement <2 x float> %b, i32 0
677718
store float %0, ptr %a, align 4

llvm/test/CodeGen/AArch64/neon-vector-splat.ll

Lines changed: 81 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,20 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s
2+
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
; CHECK-GI: warning: Instruction selection used fallback path for shuffle8
36

47
define <2 x i32> @shuffle(ptr %P) {
5-
; CHECK-LABEL: shuffle:
6-
; CHECK: // %bb.0:
7-
; CHECK-NEXT: ld1r { v0.2s }, [x0]
8-
; CHECK-NEXT: ret
8+
; CHECK-SD-LABEL: shuffle:
9+
; CHECK-SD: // %bb.0:
10+
; CHECK-SD-NEXT: ld1r { v0.2s }, [x0]
11+
; CHECK-SD-NEXT: ret
12+
;
13+
; CHECK-GI-LABEL: shuffle:
14+
; CHECK-GI: // %bb.0:
15+
; CHECK-GI-NEXT: ldr d0, [x0]
16+
; CHECK-GI-NEXT: dup v0.2s, v0.s[0]
17+
; CHECK-GI-NEXT: ret
918
%lv2i32 = load <2 x i32>, ptr %P
1019
%B = shufflevector <2 x i32> %lv2i32, <2 x i32> undef, <2 x i32> zeroinitializer
1120
ret <2 x i32> %B
@@ -22,13 +31,21 @@ define <4 x i32> @shuffle2(ptr %P) {
2231
}
2332

2433
define <4 x i32> @shuffle2_multiuse(ptr %P) {
25-
; CHECK-LABEL: shuffle2_multiuse:
26-
; CHECK: // %bb.0:
27-
; CHECK-NEXT: ldr q0, [x0]
28-
; CHECK-NEXT: dup v1.4s, v0.s[0]
29-
; CHECK-NEXT: dup v0.4s, v0.s[1]
30-
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
31-
; CHECK-NEXT: ret
34+
; CHECK-SD-LABEL: shuffle2_multiuse:
35+
; CHECK-SD: // %bb.0:
36+
; CHECK-SD-NEXT: ldr q0, [x0]
37+
; CHECK-SD-NEXT: dup v1.4s, v0.s[0]
38+
; CHECK-SD-NEXT: dup v0.4s, v0.s[1]
39+
; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
40+
; CHECK-SD-NEXT: ret
41+
;
42+
; CHECK-GI-LABEL: shuffle2_multiuse:
43+
; CHECK-GI: // %bb.0:
44+
; CHECK-GI-NEXT: ldr q0, [x0]
45+
; CHECK-GI-NEXT: ld1r { v1.4s }, [x0]
46+
; CHECK-GI-NEXT: dup v0.4s, v0.s[1]
47+
; CHECK-GI-NEXT: add v0.4s, v1.4s, v0.4s
48+
; CHECK-GI-NEXT: ret
3249
%lv2i32 = load <4 x i32>, ptr %P
3350
%B = shufflevector <4 x i32> %lv2i32, <4 x i32> undef, <4 x i32> zeroinitializer
3451
%C = shufflevector <4 x i32> %lv2i32, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -37,10 +54,16 @@ define <4 x i32> @shuffle2_multiuse(ptr %P) {
3754
}
3855

3956
define <4 x i16> @shuffle3(ptr %P) {
40-
; CHECK-LABEL: shuffle3:
41-
; CHECK: // %bb.0:
42-
; CHECK-NEXT: ld1r { v0.4h }, [x0]
43-
; CHECK-NEXT: ret
57+
; CHECK-SD-LABEL: shuffle3:
58+
; CHECK-SD: // %bb.0:
59+
; CHECK-SD-NEXT: ld1r { v0.4h }, [x0]
60+
; CHECK-SD-NEXT: ret
61+
;
62+
; CHECK-GI-LABEL: shuffle3:
63+
; CHECK-GI: // %bb.0:
64+
; CHECK-GI-NEXT: ldr d0, [x0]
65+
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
66+
; CHECK-GI-NEXT: ret
4467
%lv4i16 = load <4 x i16>, ptr %P
4568
%sv4i16 = shufflevector <4 x i16> %lv4i16, <4 x i16> undef, <4 x i32> zeroinitializer
4669
ret <4 x i16> %sv4i16
@@ -57,10 +80,16 @@ define <8 x i16> @shuffle4(ptr %P) {
5780
}
5881

5982
define <8 x i8> @shuffle5(ptr %P) {
60-
; CHECK-LABEL: shuffle5:
61-
; CHECK: // %bb.0:
62-
; CHECK-NEXT: ld1r { v0.8b }, [x0]
63-
; CHECK-NEXT: ret
83+
; CHECK-SD-LABEL: shuffle5:
84+
; CHECK-SD: // %bb.0:
85+
; CHECK-SD-NEXT: ld1r { v0.8b }, [x0]
86+
; CHECK-SD-NEXT: ret
87+
;
88+
; CHECK-GI-LABEL: shuffle5:
89+
; CHECK-GI: // %bb.0:
90+
; CHECK-GI-NEXT: ldr d0, [x0]
91+
; CHECK-GI-NEXT: dup v0.8b, v0.b[0]
92+
; CHECK-GI-NEXT: ret
6493
%lv8i8 = load <8 x i8>, ptr %P
6594
%sv8i8 = shufflevector <8 x i8> %lv8i8, <8 x i8> undef, <8 x i32> zeroinitializer
6695
ret <8 x i8> %sv8i8
@@ -95,3 +124,35 @@ define <2 x ptr> @shuffle8(ptr %P) {
95124
%sv2ptr = shufflevector <2 x ptr> %lv2ptr, <2 x ptr> undef, <2 x i32> zeroinitializer
96125
ret <2 x ptr> %sv2ptr
97126
}
127+
128+
define <4 x i32> @multiblock_aliasing(ptr %P, i1 %c) {
129+
; CHECK-SD-LABEL: multiblock_aliasing:
130+
; CHECK-SD: // %bb.0: // %entry
131+
; CHECK-SD-NEXT: ldr q0, [x0]
132+
; CHECK-SD-NEXT: tbz w1, #0, .LBB9_2
133+
; CHECK-SD-NEXT: // %bb.1: // %then
134+
; CHECK-SD-NEXT: stp xzr, xzr, [x0]
135+
; CHECK-SD-NEXT: .LBB9_2: // %else
136+
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
137+
; CHECK-SD-NEXT: ret
138+
;
139+
; CHECK-GI-LABEL: multiblock_aliasing:
140+
; CHECK-GI: // %bb.0: // %entry
141+
; CHECK-GI-NEXT: tbz w1, #0, .LBB9_2
142+
; CHECK-GI-NEXT: // %bb.1: // %then
143+
; CHECK-GI-NEXT: stp xzr, xzr, [x0]
144+
; CHECK-GI-NEXT: .LBB9_2: // %else
145+
; CHECK-GI-NEXT: ld1r { v0.4s }, [x0]
146+
; CHECK-GI-NEXT: ret
147+
entry:
148+
%lv2ptr = load <4 x i32>, ptr %P
149+
br i1 %c, label %then, label %else
150+
151+
then:
152+
store <4 x i32> zeroinitializer, ptr %P
153+
br label %else
154+
155+
else:
156+
%sv2ptr = shufflevector <4 x i32> %lv2ptr, <4 x i32> undef, <4 x i32> zeroinitializer
157+
ret <4 x i32> %sv2ptr
158+
}

0 commit comments

Comments
 (0)