|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
1 | 2 | ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
|
2 | 3 |
|
3 |
| -define <8 x i8> @vuzpi8(ptr %A, ptr %B) nounwind { |
4 |
| -;CHECK-LABEL: vuzpi8: |
5 |
| -;CHECK: uzp1.8b |
6 |
| -;CHECK: uzp2.8b |
7 |
| -;CHECK-NEXT: add.8b |
8 |
| - %tmp1 = load <8 x i8>, ptr %A |
9 |
| - %tmp2 = load <8 x i8>, ptr %B |
10 |
| - %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
11 |
| - %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
12 |
| - %tmp5 = add <8 x i8> %tmp3, %tmp4 |
13 |
| - ret <8 x i8> %tmp5 |
| 4 | +define <8 x i8> @vuzpi8(<8 x i8> %A, <8 x i8> %B) nounwind { |
| 5 | +; CHECK-LABEL: vuzpi8: |
| 6 | +; CHECK: // %bb.0: |
| 7 | +; CHECK-NEXT: uzp1.8b v2, v0, v1 |
| 8 | +; CHECK-NEXT: uzp2.8b v0, v0, v1 |
| 9 | +; CHECK-NEXT: add.8b v0, v2, v0 |
| 10 | +; CHECK-NEXT: ret |
| 11 | + %tmp3 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| 12 | + %tmp4 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| 13 | + %tmp5 = add <8 x i8> %tmp3, %tmp4 |
| 14 | + ret <8 x i8> %tmp5 |
14 | 15 | }
|
15 | 16 |
|
16 |
| -define <4 x i16> @vuzpi16(ptr %A, ptr %B) nounwind { |
17 |
| -;CHECK-LABEL: vuzpi16: |
18 |
| -;CHECK: uzp1.4h |
19 |
| -;CHECK: uzp2.4h |
20 |
| -;CHECK-NEXT: add.4h |
21 |
| - %tmp1 = load <4 x i16>, ptr %A |
22 |
| - %tmp2 = load <4 x i16>, ptr %B |
23 |
| - %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
24 |
| - %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
25 |
| - %tmp5 = add <4 x i16> %tmp3, %tmp4 |
26 |
| - ret <4 x i16> %tmp5 |
| 17 | +define <4 x i16> @vuzpi16(<4 x i16> %A, <4 x i16> %B) nounwind { |
| 18 | +; CHECK-LABEL: vuzpi16: |
| 19 | +; CHECK: // %bb.0: |
| 20 | +; CHECK-NEXT: uzp1.4h v2, v0, v1 |
| 21 | +; CHECK-NEXT: uzp2.4h v0, v0, v1 |
| 22 | +; CHECK-NEXT: add.4h v0, v2, v0 |
| 23 | +; CHECK-NEXT: ret |
| 24 | + %tmp3 = shufflevector <4 x i16> %A, <4 x i16> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| 25 | + %tmp4 = shufflevector <4 x i16> %A, <4 x i16> %B, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| 26 | + %tmp5 = add <4 x i16> %tmp3, %tmp4 |
| 27 | + ret <4 x i16> %tmp5 |
27 | 28 | }
|
28 | 29 |
|
29 |
| -define <16 x i8> @vuzpQi8(ptr %A, ptr %B) nounwind { |
30 |
| -;CHECK-LABEL: vuzpQi8: |
31 |
| -;CHECK: uzp1.16b |
32 |
| -;CHECK: uzp2.16b |
33 |
| -;CHECK-NEXT: add.16b |
34 |
| - %tmp1 = load <16 x i8>, ptr %A |
35 |
| - %tmp2 = load <16 x i8>, ptr %B |
36 |
| - %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> |
37 |
| - %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> |
38 |
| - %tmp5 = add <16 x i8> %tmp3, %tmp4 |
39 |
| - ret <16 x i8> %tmp5 |
| 30 | +define <16 x i8> @vuzpQi8(<16 x i8> %A, <16 x i8> %B) nounwind { |
| 31 | +; CHECK-LABEL: vuzpQi8: |
| 32 | +; CHECK: // %bb.0: |
| 33 | +; CHECK-NEXT: uzp1.16b v2, v0, v1 |
| 34 | +; CHECK-NEXT: uzp2.16b v0, v0, v1 |
| 35 | +; CHECK-NEXT: add.16b v0, v2, v0 |
| 36 | +; CHECK-NEXT: ret |
| 37 | + %tmp3 = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> |
| 38 | + %tmp4 = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> |
| 39 | + %tmp5 = add <16 x i8> %tmp3, %tmp4 |
| 40 | + ret <16 x i8> %tmp5 |
40 | 41 | }
|
41 | 42 |
|
42 |
| -define <8 x i16> @vuzpQi16(ptr %A, ptr %B) nounwind { |
43 |
| -;CHECK-LABEL: vuzpQi16: |
44 |
| -;CHECK: uzp1.8h |
45 |
| -;CHECK: uzp2.8h |
46 |
| -;CHECK-NEXT: add.8h |
47 |
| - %tmp1 = load <8 x i16>, ptr %A |
48 |
| - %tmp2 = load <8 x i16>, ptr %B |
49 |
| - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
50 |
| - %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
51 |
| - %tmp5 = add <8 x i16> %tmp3, %tmp4 |
52 |
| - ret <8 x i16> %tmp5 |
| 43 | +define <8 x i16> @vuzpQi16(<8 x i16> %A, <8 x i16> %B) nounwind { |
| 44 | +; CHECK-LABEL: vuzpQi16: |
| 45 | +; CHECK: // %bb.0: |
| 46 | +; CHECK-NEXT: uzp1.8h v2, v0, v1 |
| 47 | +; CHECK-NEXT: uzp2.8h v0, v0, v1 |
| 48 | +; CHECK-NEXT: add.8h v0, v2, v0 |
| 49 | +; CHECK-NEXT: ret |
| 50 | + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| 51 | + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| 52 | + %tmp5 = add <8 x i16> %tmp3, %tmp4 |
| 53 | + ret <8 x i16> %tmp5 |
53 | 54 | }
|
54 | 55 |
|
55 |
| -define <4 x i32> @vuzpQi32(ptr %A, ptr %B) nounwind { |
56 |
| -;CHECK-LABEL: vuzpQi32: |
57 |
| -;CHECK: uzp1.4s |
58 |
| -;CHECK: uzp2.4s |
59 |
| -;CHECK-NEXT: add.4s |
60 |
| - %tmp1 = load <4 x i32>, ptr %A |
61 |
| - %tmp2 = load <4 x i32>, ptr %B |
62 |
| - %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
63 |
| - %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
64 |
| - %tmp5 = add <4 x i32> %tmp3, %tmp4 |
65 |
| - ret <4 x i32> %tmp5 |
| 56 | +define <4 x i32> @vuzpQi32(<4 x i32> %A, <4 x i32> %B) nounwind { |
| 57 | +; CHECK-LABEL: vuzpQi32: |
| 58 | +; CHECK: // %bb.0: |
| 59 | +; CHECK-NEXT: uzp1.4s v2, v0, v1 |
| 60 | +; CHECK-NEXT: uzp2.4s v0, v0, v1 |
| 61 | +; CHECK-NEXT: add.4s v0, v2, v0 |
| 62 | +; CHECK-NEXT: ret |
| 63 | + %tmp3 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| 64 | + %tmp4 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| 65 | + %tmp5 = add <4 x i32> %tmp3, %tmp4 |
| 66 | + ret <4 x i32> %tmp5 |
66 | 67 | }
|
67 | 68 |
|
68 |
| -define <4 x float> @vuzpQf(ptr %A, ptr %B) nounwind { |
69 |
| -;CHECK-LABEL: vuzpQf: |
70 |
| -;CHECK: uzp1.4s |
71 |
| -;CHECK: uzp2.4s |
72 |
| -;CHECK-NEXT: fadd.4s |
73 |
| - %tmp1 = load <4 x float>, ptr %A |
74 |
| - %tmp2 = load <4 x float>, ptr %B |
75 |
| - %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
76 |
| - %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
77 |
| - %tmp5 = fadd <4 x float> %tmp3, %tmp4 |
78 |
| - ret <4 x float> %tmp5 |
| 69 | +define <4 x float> @vuzpQf(<4 x float> %A, <4 x float> %B) nounwind { |
| 70 | +; CHECK-LABEL: vuzpQf: |
| 71 | +; CHECK: // %bb.0: |
| 72 | +; CHECK-NEXT: uzp1.4s v2, v0, v1 |
| 73 | +; CHECK-NEXT: uzp2.4s v0, v0, v1 |
| 74 | +; CHECK-NEXT: fadd.4s v0, v2, v0 |
| 75 | +; CHECK-NEXT: ret |
| 76 | + %tmp3 = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 6> |
| 77 | + %tmp4 = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> <i32 1, i32 3, i32 5, i32 7> |
| 78 | + %tmp5 = fadd <4 x float> %tmp3, %tmp4 |
| 79 | + ret <4 x float> %tmp5 |
79 | 80 | }
|
80 | 81 |
|
81 | 82 | ; Undef shuffle indices should not prevent matching to VUZP:
|
82 | 83 |
|
83 |
| -define <8 x i8> @vuzpi8_undef(ptr %A, ptr %B) nounwind { |
84 |
| -;CHECK-LABEL: vuzpi8_undef: |
85 |
| -;CHECK: uzp1.8b |
86 |
| -;CHECK: uzp2.8b |
87 |
| -;CHECK-NEXT: add.8b |
88 |
| - %tmp1 = load <8 x i8>, ptr %A |
89 |
| - %tmp2 = load <8 x i8>, ptr %B |
90 |
| - %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14> |
91 |
| - %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15> |
92 |
| - %tmp5 = add <8 x i8> %tmp3, %tmp4 |
93 |
| - ret <8 x i8> %tmp5 |
| 84 | +define <8 x i8> @vuzpi8_undef(<8 x i8> %A, <8 x i8> %B) nounwind { |
| 85 | +; CHECK-LABEL: vuzpi8_undef: |
| 86 | +; CHECK: // %bb.0: |
| 87 | +; CHECK-NEXT: uzp1.8b v2, v0, v1 |
| 88 | +; CHECK-NEXT: uzp2.8b v0, v0, v1 |
| 89 | +; CHECK-NEXT: add.8b v0, v2, v0 |
| 90 | +; CHECK-NEXT: ret |
| 91 | + %tmp3 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14> |
| 92 | + %tmp4 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15> |
| 93 | + %tmp5 = add <8 x i8> %tmp3, %tmp4 |
| 94 | + ret <8 x i8> %tmp5 |
94 | 95 | }
|
95 | 96 |
|
96 |
| -define <8 x i16> @vuzpQi16_undef(ptr %A, ptr %B) nounwind { |
97 |
| -;CHECK-LABEL: vuzpQi16_undef: |
98 |
| -;CHECK: uzp1.8h |
99 |
| -;CHECK: uzp2.8h |
100 |
| -;CHECK-NEXT: add.8h |
101 |
| - %tmp1 = load <8 x i16>, ptr %A |
102 |
| - %tmp2 = load <8 x i16>, ptr %B |
103 |
| - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14> |
104 |
| - %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> |
105 |
| - %tmp5 = add <8 x i16> %tmp3, %tmp4 |
106 |
| - ret <8 x i16> %tmp5 |
| 97 | +define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind { |
| 98 | +; CHECK-LABEL: vuzpQi16_undef1: |
| 99 | +; CHECK: // %bb.0: |
| 100 | +; CHECK-NEXT: uzp1.8h v2, v0, v1 |
| 101 | +; CHECK-NEXT: uzp2.8h v0, v0, v1 |
| 102 | +; CHECK-NEXT: add.8h v0, v2, v0 |
| 103 | +; CHECK-NEXT: ret |
| 104 | + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14> |
| 105 | + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> |
| 106 | + %tmp5 = add <8 x i16> %tmp3, %tmp4 |
| 107 | + ret <8 x i16> %tmp5 |
| 108 | +} |
| 109 | + |
| 110 | +define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind { |
| 111 | +; CHECK-LABEL: vuzpQi16_undef0: |
| 112 | +; CHECK: // %bb.0: |
| 113 | +; CHECK-NEXT: adrp x8, .LCPI8_0 |
| 114 | +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| 115 | +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] |
| 116 | +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| 117 | +; CHECK-NEXT: uzp2.8h v3, v0, v1 |
| 118 | +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 |
| 119 | +; CHECK-NEXT: add.8h v0, v0, v3 |
| 120 | +; CHECK-NEXT: ret |
| 121 | + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| 122 | + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| 123 | + %tmp5 = add <8 x i16> %tmp3, %tmp4 |
| 124 | + ret <8 x i16> %tmp5 |
| 125 | +} |
| 126 | + |
| 127 | +define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind { |
| 128 | +; CHECK-LABEL: vuzpQi16_undef01: |
| 129 | +; CHECK: // %bb.0: |
| 130 | +; CHECK-NEXT: adrp x8, .LCPI9_0 |
| 131 | +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| 132 | +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] |
| 133 | +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| 134 | +; CHECK-NEXT: uzp2.8h v3, v0, v1 |
| 135 | +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 |
| 136 | +; CHECK-NEXT: add.8h v0, v0, v3 |
| 137 | +; CHECK-NEXT: ret |
| 138 | + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| 139 | + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| 140 | + %tmp5 = add <8 x i16> %tmp3, %tmp4 |
| 141 | + ret <8 x i16> %tmp5 |
| 142 | +} |
| 143 | + |
| 144 | +define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind { |
| 145 | +; CHECK-LABEL: vuzpQi16_undef012: |
| 146 | +; CHECK: // %bb.0: |
| 147 | +; CHECK-NEXT: adrp x8, .LCPI10_0 |
| 148 | +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| 149 | +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_0] |
| 150 | +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| 151 | +; CHECK-NEXT: uzp2.8h v3, v0, v1 |
| 152 | +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 |
| 153 | +; CHECK-NEXT: add.8h v0, v0, v3 |
| 154 | +; CHECK-NEXT: ret |
| 155 | + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14> |
| 156 | + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15> |
| 157 | + %tmp5 = add <8 x i16> %tmp3, %tmp4 |
| 158 | + ret <8 x i16> %tmp5 |
107 | 159 | }
|
0 commit comments