|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s |
| 3 | + |
| 4 | +define <4 x i32> @smull(<4 x i16> %x, <4 x i16> *%y) { |
| 5 | +; CHECK-LABEL: smull: |
| 6 | +; CHECK: // %bb.0: // %entry |
| 7 | +; CHECK-NEXT: fmov d1, d0 |
| 8 | +; CHECK-NEXT: mov w8, #1 |
| 9 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 10 | +; CHECK-NEXT: dup v1.4h, v1.h[3] |
| 11 | +; CHECK-NEXT: .LBB0_1: // %l1 |
| 12 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 13 | +; CHECK-NEXT: ldr d2, [x0] |
| 14 | +; CHECK-NEXT: subs w8, w8, #1 |
| 15 | +; CHECK-NEXT: smlal v0.4s, v2.4h, v1.4h |
| 16 | +; CHECK-NEXT: b.eq .LBB0_1 |
| 17 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 18 | +; CHECK-NEXT: ret |
| 19 | +entry: |
| 20 | + %a = shufflevector <4 x i16> %x, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 21 | + br label %l1 |
| 22 | + |
| 23 | +l1: |
| 24 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 25 | + %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 26 | + %l = load <4 x i16>, <4 x i16> *%y |
| 27 | + %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %l, <4 x i16> %a) |
| 28 | + %c = add nsw <4 x i32> %q, %b |
| 29 | + %pa = add i32 %p, 1 |
| 30 | + %c1 = icmp eq i32 %p, 0 |
| 31 | + br i1 %c1, label %l1, label %l2 |
| 32 | + |
| 33 | +l2: |
| 34 | + ret <4 x i32> %c |
| 35 | +} |
| 36 | + |
| 37 | +define <4 x i32> @umull(<4 x i16> %x, <4 x i16> *%y) { |
| 38 | +; CHECK-LABEL: umull: |
| 39 | +; CHECK: // %bb.0: // %entry |
| 40 | +; CHECK-NEXT: fmov d1, d0 |
| 41 | +; CHECK-NEXT: mov w8, #1 |
| 42 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 43 | +; CHECK-NEXT: dup v1.4h, v1.h[3] |
| 44 | +; CHECK-NEXT: .LBB1_1: // %l1 |
| 45 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 46 | +; CHECK-NEXT: ldr d2, [x0] |
| 47 | +; CHECK-NEXT: subs w8, w8, #1 |
| 48 | +; CHECK-NEXT: umlal v0.4s, v2.4h, v1.4h |
| 49 | +; CHECK-NEXT: b.eq .LBB1_1 |
| 50 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 51 | +; CHECK-NEXT: ret |
| 52 | +entry: |
| 53 | + %a = shufflevector <4 x i16> %x, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 54 | + br label %l1 |
| 55 | + |
| 56 | +l1: |
| 57 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 58 | + %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 59 | + %l = load <4 x i16>, <4 x i16> *%y |
| 60 | + %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %l, <4 x i16> %a) |
| 61 | + %c = add nsw <4 x i32> %q, %b |
| 62 | + %pa = add i32 %p, 1 |
| 63 | + %c1 = icmp eq i32 %p, 0 |
| 64 | + br i1 %c1, label %l1, label %l2 |
| 65 | + |
| 66 | +l2: |
| 67 | + ret <4 x i32> %c |
| 68 | +} |
| 69 | + |
| 70 | +define <4 x i32> @sqadd(<4 x i32> %x, <4 x i32> *%y) { |
| 71 | +; CHECK-LABEL: sqadd: |
| 72 | +; CHECK: // %bb.0: // %entry |
| 73 | +; CHECK-NEXT: mov v1.16b, v0.16b |
| 74 | +; CHECK-NEXT: mov w8, #1 |
| 75 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 76 | +; CHECK-NEXT: dup v1.4s, v1.s[3] |
| 77 | +; CHECK-NEXT: .LBB2_1: // %l1 |
| 78 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 79 | +; CHECK-NEXT: ldr q2, [x0] |
| 80 | +; CHECK-NEXT: subs w8, w8, #1 |
| 81 | +; CHECK-NEXT: sqrdmulh v2.4s, v2.4s, v1.4s |
| 82 | +; CHECK-NEXT: sqadd v0.4s, v0.4s, v2.4s |
| 83 | +; CHECK-NEXT: b.eq .LBB2_1 |
| 84 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 85 | +; CHECK-NEXT: ret |
| 86 | +entry: |
| 87 | + %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 88 | + br label %l1 |
| 89 | + |
| 90 | +l1: |
| 91 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 92 | + %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 93 | + %l = load <4 x i32>, <4 x i32> *%y |
| 94 | + %b = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %l, <4 x i32> %a) |
| 95 | + %c = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %q, <4 x i32> %b) |
| 96 | + %pa = add i32 %p, 1 |
| 97 | + %c1 = icmp eq i32 %p, 0 |
| 98 | + br i1 %c1, label %l1, label %l2 |
| 99 | + |
| 100 | +l2: |
| 101 | + ret <4 x i32> %c |
| 102 | +} |
| 103 | + |
| 104 | +define <4 x i32> @sqsub(<4 x i32> %x, <4 x i32> *%y) { |
| 105 | +; CHECK-LABEL: sqsub: |
| 106 | +; CHECK: // %bb.0: // %entry |
| 107 | +; CHECK-NEXT: mov v1.16b, v0.16b |
| 108 | +; CHECK-NEXT: mov w8, #1 |
| 109 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 110 | +; CHECK-NEXT: dup v1.4s, v1.s[3] |
| 111 | +; CHECK-NEXT: .LBB3_1: // %l1 |
| 112 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 113 | +; CHECK-NEXT: ldr q2, [x0] |
| 114 | +; CHECK-NEXT: subs w8, w8, #1 |
| 115 | +; CHECK-NEXT: sqrdmulh v2.4s, v2.4s, v1.4s |
| 116 | +; CHECK-NEXT: sqsub v0.4s, v0.4s, v2.4s |
| 117 | +; CHECK-NEXT: b.eq .LBB3_1 |
| 118 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 119 | +; CHECK-NEXT: ret |
| 120 | +entry: |
| 121 | + %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 122 | + br label %l1 |
| 123 | + |
| 124 | +l1: |
| 125 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 126 | + %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 127 | + %l = load <4 x i32>, <4 x i32> *%y |
| 128 | + %b = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %l, <4 x i32> %a) |
| 129 | + %c = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %q, <4 x i32> %b) |
| 130 | + %pa = add i32 %p, 1 |
| 131 | + %c1 = icmp eq i32 %p, 0 |
| 132 | + br i1 %c1, label %l1, label %l2 |
| 133 | + |
| 134 | +l2: |
| 135 | + ret <4 x i32> %c |
| 136 | +} |
| 137 | + |
| 138 | +define <4 x i32> @sqdmulh(<4 x i32> %x, <4 x i32> *%y) { |
| 139 | +; CHECK-LABEL: sqdmulh: |
| 140 | +; CHECK: // %bb.0: // %entry |
| 141 | +; CHECK-NEXT: mov v1.16b, v0.16b |
| 142 | +; CHECK-NEXT: mov w8, #1 |
| 143 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 144 | +; CHECK-NEXT: dup v1.4s, v1.s[3] |
| 145 | +; CHECK-NEXT: .LBB4_1: // %l1 |
| 146 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 147 | +; CHECK-NEXT: ldr q2, [x0] |
| 148 | +; CHECK-NEXT: subs w8, w8, #1 |
| 149 | +; CHECK-NEXT: sqdmulh v2.4s, v2.4s, v1.4s |
| 150 | +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s |
| 151 | +; CHECK-NEXT: b.eq .LBB4_1 |
| 152 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 153 | +; CHECK-NEXT: ret |
| 154 | +entry: |
| 155 | + %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 156 | + br label %l1 |
| 157 | + |
| 158 | +l1: |
| 159 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 160 | + %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 161 | + %l = load <4 x i32>, <4 x i32> *%y |
| 162 | + %b = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %l, <4 x i32> %a) |
| 163 | + %c = add nsw <4 x i32> %q, %b |
| 164 | + %pa = add i32 %p, 1 |
| 165 | + %c1 = icmp eq i32 %p, 0 |
| 166 | + br i1 %c1, label %l1, label %l2 |
| 167 | + |
| 168 | +l2: |
| 169 | + ret <4 x i32> %c |
| 170 | +} |
| 171 | + |
| 172 | +define <4 x i32> @sqdmull(<4 x i16> %x, <4 x i16> *%y) { |
| 173 | +; CHECK-LABEL: sqdmull: |
| 174 | +; CHECK: // %bb.0: // %entry |
| 175 | +; CHECK-NEXT: fmov d1, d0 |
| 176 | +; CHECK-NEXT: mov w8, #1 |
| 177 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 178 | +; CHECK-NEXT: dup v1.4h, v1.h[3] |
| 179 | +; CHECK-NEXT: .LBB5_1: // %l1 |
| 180 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 181 | +; CHECK-NEXT: ldr d2, [x0] |
| 182 | +; CHECK-NEXT: subs w8, w8, #1 |
| 183 | +; CHECK-NEXT: sqdmull v2.4s, v2.4h, v1.4h |
| 184 | +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s |
| 185 | +; CHECK-NEXT: b.eq .LBB5_1 |
| 186 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 187 | +; CHECK-NEXT: ret |
| 188 | +entry: |
| 189 | + %a = shufflevector <4 x i16> %x, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 190 | + br label %l1 |
| 191 | + |
| 192 | +l1: |
| 193 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 194 | + %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 195 | + %l = load <4 x i16>, <4 x i16> *%y |
| 196 | + %b = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %l, <4 x i16> %a) |
| 197 | + %c = add nsw <4 x i32> %q, %b |
| 198 | + %pa = add i32 %p, 1 |
| 199 | + %c1 = icmp eq i32 %p, 0 |
| 200 | + br i1 %c1, label %l1, label %l2 |
| 201 | + |
| 202 | +l2: |
| 203 | + ret <4 x i32> %c |
| 204 | +} |
| 205 | + |
| 206 | +define <4 x i32> @mlal(<4 x i32> %x, <4 x i32> *%y) { |
| 207 | +; CHECK-LABEL: mlal: |
| 208 | +; CHECK: // %bb.0: // %entry |
| 209 | +; CHECK-NEXT: mov v1.16b, v0.16b |
| 210 | +; CHECK-NEXT: mov w8, #1 |
| 211 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 212 | +; CHECK-NEXT: dup v1.4s, v1.s[3] |
| 213 | +; CHECK-NEXT: .LBB6_1: // %l1 |
| 214 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 215 | +; CHECK-NEXT: ldr q2, [x0] |
| 216 | +; CHECK-NEXT: subs w8, w8, #1 |
| 217 | +; CHECK-NEXT: mla v0.4s, v2.4s, v1.4s |
| 218 | +; CHECK-NEXT: b.eq .LBB6_1 |
| 219 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 220 | +; CHECK-NEXT: ret |
| 221 | +entry: |
| 222 | + %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 223 | + br label %l1 |
| 224 | + |
| 225 | +l1: |
| 226 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 227 | + %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 228 | + %l = load <4 x i32>, <4 x i32> *%y |
| 229 | + %b = mul <4 x i32> %l, %a |
| 230 | + %c = add <4 x i32> %q, %b |
| 231 | + %pa = add i32 %p, 1 |
| 232 | + %c1 = icmp eq i32 %p, 0 |
| 233 | + br i1 %c1, label %l1, label %l2 |
| 234 | + |
| 235 | +l2: |
| 236 | + ret <4 x i32> %c |
| 237 | +} |
| 238 | + |
| 239 | +define <4 x float> @fmul(<4 x float> %x, <4 x float> *%y) { |
| 240 | +; CHECK-LABEL: fmul: |
| 241 | +; CHECK: // %bb.0: // %entry |
| 242 | +; CHECK-NEXT: mov v1.16b, v0.16b |
| 243 | +; CHECK-NEXT: mov w8, #1 |
| 244 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 245 | +; CHECK-NEXT: .LBB7_1: // %l1 |
| 246 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 247 | +; CHECK-NEXT: ldr q2, [x0] |
| 248 | +; CHECK-NEXT: subs w8, w8, #1 |
| 249 | +; CHECK-NEXT: fmul v2.4s, v2.4s, v1.s[3] |
| 250 | +; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s |
| 251 | +; CHECK-NEXT: b.eq .LBB7_1 |
| 252 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 253 | +; CHECK-NEXT: ret |
| 254 | +entry: |
| 255 | + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 256 | + br label %l1 |
| 257 | + |
| 258 | +l1: |
| 259 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 260 | + %q = phi <4 x float> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 261 | + %l = load <4 x float>, <4 x float> *%y |
| 262 | + %b = fmul <4 x float> %l, %a |
| 263 | + %c = fadd <4 x float> %b, %q |
| 264 | + %pa = add i32 %p, 1 |
| 265 | + %c1 = icmp eq i32 %p, 0 |
| 266 | + br i1 %c1, label %l1, label %l2 |
| 267 | + |
| 268 | +l2: |
| 269 | + ret <4 x float> %c |
| 270 | +} |
| 271 | + |
| 272 | +define <4 x float> @fmuladd(<4 x float> %x, <4 x float> *%y) { |
| 273 | +; CHECK-LABEL: fmuladd: |
| 274 | +; CHECK: // %bb.0: // %entry |
| 275 | +; CHECK-NEXT: mov v1.16b, v0.16b |
| 276 | +; CHECK-NEXT: mov w8, #1 |
| 277 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 278 | +; CHECK-NEXT: dup v1.4s, v1.s[3] |
| 279 | +; CHECK-NEXT: .LBB8_1: // %l1 |
| 280 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 281 | +; CHECK-NEXT: ldr q2, [x0] |
| 282 | +; CHECK-NEXT: subs w8, w8, #1 |
| 283 | +; CHECK-NEXT: fmla v0.4s, v1.4s, v2.4s |
| 284 | +; CHECK-NEXT: b.eq .LBB8_1 |
| 285 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 286 | +; CHECK-NEXT: ret |
| 287 | +entry: |
| 288 | + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 289 | + br label %l1 |
| 290 | + |
| 291 | +l1: |
| 292 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 293 | + %q = phi <4 x float> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 294 | + %l = load <4 x float>, <4 x float> *%y |
| 295 | + %b = fmul fast <4 x float> %l, %a |
| 296 | + %c = fadd fast <4 x float> %b, %q |
| 297 | + %pa = add i32 %p, 1 |
| 298 | + %c1 = icmp eq i32 %p, 0 |
| 299 | + br i1 %c1, label %l1, label %l2 |
| 300 | + |
| 301 | +l2: |
| 302 | + ret <4 x float> %c |
| 303 | +} |
| 304 | + |
| 305 | +define <4 x float> @fma(<4 x float> %x, <4 x float> *%y) { |
| 306 | +; CHECK-LABEL: fma: |
| 307 | +; CHECK: // %bb.0: // %entry |
| 308 | +; CHECK-NEXT: mov v1.16b, v0.16b |
| 309 | +; CHECK-NEXT: mov w8, #1 |
| 310 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 311 | +; CHECK-NEXT: dup v1.4s, v1.s[3] |
| 312 | +; CHECK-NEXT: .LBB9_1: // %l1 |
| 313 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 314 | +; CHECK-NEXT: mov v2.16b, v0.16b |
| 315 | +; CHECK-NEXT: ldr q3, [x0] |
| 316 | +; CHECK-NEXT: mov v0.16b, v1.16b |
| 317 | +; CHECK-NEXT: subs w8, w8, #1 |
| 318 | +; CHECK-NEXT: fmla v0.4s, v2.4s, v3.4s |
| 319 | +; CHECK-NEXT: b.eq .LBB9_1 |
| 320 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 321 | +; CHECK-NEXT: ret |
| 322 | +entry: |
| 323 | + %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
| 324 | + br label %l1 |
| 325 | + |
| 326 | +l1: |
| 327 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 328 | + %q = phi <4 x float> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 329 | + %l = load <4 x float>, <4 x float> *%y |
| 330 | + %c = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %l, <4 x float> %q, <4 x float> %a) |
| 331 | + %pa = add i32 %p, 1 |
| 332 | + %c1 = icmp eq i32 %p, 0 |
| 333 | + br i1 %c1, label %l1, label %l2 |
| 334 | + |
| 335 | +l2: |
| 336 | + ret <4 x float> %c |
| 337 | +} |
| 338 | + |
| 339 | +define <4 x i32> @smull_nonsplat(<4 x i16> %x, <4 x i16> *%y) { |
| 340 | +; CHECK-LABEL: smull_nonsplat: |
| 341 | +; CHECK: // %bb.0: // %entry |
| 342 | +; CHECK-NEXT: fmov d1, d0 |
| 343 | +; CHECK-NEXT: mov w8, #1 |
| 344 | +; CHECK-NEXT: movi v0.2d, #0000000000000000 |
| 345 | +; CHECK-NEXT: dup v2.4h, v1.h[3] |
| 346 | +; CHECK-NEXT: ext v2.8b, v1.8b, v2.8b, #4 |
| 347 | +; CHECK-NEXT: ext v1.8b, v1.8b, v2.8b, #6 |
| 348 | +; CHECK-NEXT: .LBB10_1: // %l1 |
| 349 | +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| 350 | +; CHECK-NEXT: ldr d2, [x0] |
| 351 | +; CHECK-NEXT: subs w8, w8, #1 |
| 352 | +; CHECK-NEXT: smlal v0.4s, v2.4h, v1.4h |
| 353 | +; CHECK-NEXT: b.eq .LBB10_1 |
| 354 | +; CHECK-NEXT: // %bb.2: // %l2 |
| 355 | +; CHECK-NEXT: ret |
| 356 | +entry: |
| 357 | + %a = shufflevector <4 x i16> %x, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 3> |
| 358 | + br label %l1 |
| 359 | + |
| 360 | +l1: |
| 361 | + %p = phi i32 [ 0, %entry ], [ %pa, %l1 ] |
| 362 | + %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ] |
| 363 | + %l = load <4 x i16>, <4 x i16> *%y |
| 364 | + %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %l, <4 x i16> %a) |
| 365 | + %c = add nsw <4 x i32> %q, %b |
| 366 | + %pa = add i32 %p, 1 |
| 367 | + %c1 = icmp eq i32 %p, 0 |
| 368 | + br i1 %c1, label %l1, label %l2 |
| 369 | + |
| 370 | +l2: |
| 371 | + ret <4 x i32> %c |
| 372 | +} |
| 373 | + |
| 374 | +declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) |
| 375 | +declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) |
| 376 | +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) |
| 377 | +declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) |
| 378 | +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) |
| 379 | +declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) |
| 380 | +declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) |
| 381 | +declare <4 x float> @llvm.fma.v4f32(<4 x float> %l, <4 x float> %a, <4 x float> %q) |
0 commit comments