|
3 | 3 | ; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM
|
4 | 4 | ; RUN: llc -mtriple=aarch64 -mattr=+sve,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE
|
5 | 5 | ; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE2
|
6 |
| -; RUN: llc -mtriple=aarch64 -mattr=+sme -force-streaming -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SME |
| 6 | +; RUN: llc -mtriple=aarch64 -mattr=+sve,+sme,+i8mm -force-streaming -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SME |
7 | 7 |
|
8 | 8 | define <vscale x 4 x i32> @udot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
9 | 9 | ; CHECK-LABEL: udot:
|
@@ -106,23 +106,7 @@ define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
|
106 | 106 | ;
|
107 | 107 | ; CHECK-NEWLOWERING-LABEL: usdot:
|
108 | 108 | ; CHECK-NEWLOWERING: // %bb.0: // %entry
|
109 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z1.b |
110 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b |
111 |
| -; CHECK-NEWLOWERING-NEXT: ptrue p0.s |
112 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b |
113 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b |
114 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z3.h |
115 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h |
116 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h |
117 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h |
118 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s |
119 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z1.h |
120 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z2.h |
121 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h |
122 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h |
123 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z3.s, z4.s |
124 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s |
125 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s |
| 109 | +; CHECK-NEWLOWERING-NEXT: usdot z0.s, z1.b, z2.b |
126 | 110 | ; CHECK-NEWLOWERING-NEXT: ret
|
127 | 111 | entry:
|
128 | 112 | %a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i32>
|
@@ -161,23 +145,7 @@ define <vscale x 4 x i32> @sudot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
|
161 | 145 | ;
|
162 | 146 | ; CHECK-NEWLOWERING-LABEL: sudot:
|
163 | 147 | ; CHECK-NEWLOWERING: // %bb.0: // %entry
|
164 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z1.b |
165 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b |
166 |
| -; CHECK-NEWLOWERING-NEXT: ptrue p0.s |
167 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b |
168 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b |
169 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z3.h |
170 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h |
171 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h |
172 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h |
173 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s |
174 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z1.h |
175 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z2.h |
176 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h |
177 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h |
178 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z3.s, z4.s |
179 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s |
180 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s |
| 148 | +; CHECK-NEWLOWERING-NEXT: usdot z0.s, z2.b, z1.b |
181 | 149 | ; CHECK-NEWLOWERING-NEXT: ret
|
182 | 150 | entry:
|
183 | 151 | %a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i32>
|
@@ -329,46 +297,31 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
|
329 | 297 | ; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z2.d, z3.d
|
330 | 298 | ; CHECK-NOI8MM-NEXT: ret
|
331 | 299 | ;
|
332 |
| -; CHECK-NEWLOWERING-LABEL: usdot_8to64: |
333 |
| -; CHECK-NEWLOWERING: // %bb.0: // %entry |
334 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z4.h, z2.b |
335 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z2.b |
336 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z5.h, z3.b |
337 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z3.b |
338 |
| -; CHECK-NEWLOWERING-NEXT: ptrue p0.d |
339 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h |
340 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z2.h |
341 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z5.h |
342 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h |
343 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h |
344 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h |
345 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h |
346 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h |
347 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z6.s |
348 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z7.s |
349 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z24.s |
350 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z25.s |
351 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z6.s |
352 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s |
353 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z24.d, z24.s |
354 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z25.d, z25.s |
355 |
| -; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d |
356 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z4.s |
357 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z5.s |
358 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d |
359 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z2.s |
360 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z3.s |
361 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s |
362 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s |
363 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s |
364 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s |
365 |
| -; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z6.d, z24.d |
366 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z7.d, z25.d |
367 |
| -; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d |
368 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d |
369 |
| -; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z5.d |
370 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z3.d |
371 |
| -; CHECK-NEWLOWERING-NEXT: ret |
| 300 | +; CHECK-NEWLOWERING-SVE-LABEL: usdot_8to64: |
| 301 | +; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry |
| 302 | +; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000 |
| 303 | +; CHECK-NEWLOWERING-SVE-NEXT: usdot z4.s, z2.b, z3.b |
| 304 | +; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.d, z4.s |
| 305 | +; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z3.d, z4.s |
| 306 | +; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d |
| 307 | +; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d |
| 308 | +; CHECK-NEWLOWERING-SVE-NEXT: ret |
| 309 | +; |
| 310 | +; CHECK-NEWLOWERING-SVE2-LABEL: usdot_8to64: |
| 311 | +; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry |
| 312 | +; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000 |
| 313 | +; CHECK-NEWLOWERING-SVE2-NEXT: usdot z4.s, z2.b, z3.b |
| 314 | +; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z4.s |
| 315 | +; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z4.s |
| 316 | +; CHECK-NEWLOWERING-SVE2-NEXT: ret |
| 317 | +; |
| 318 | +; CHECK-NEWLOWERING-SME-LABEL: usdot_8to64: |
| 319 | +; CHECK-NEWLOWERING-SME: // %bb.0: // %entry |
| 320 | +; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0 |
| 321 | +; CHECK-NEWLOWERING-SME-NEXT: usdot z4.s, z2.b, z3.b |
| 322 | +; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s |
| 323 | +; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s |
| 324 | +; CHECK-NEWLOWERING-SME-NEXT: ret |
372 | 325 | entry:
|
373 | 326 | %a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i64>
|
374 | 327 | %b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i64>
|
@@ -430,46 +383,31 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
|
430 | 383 | ; CHECK-NOI8MM-NEXT: mla z0.d, p0/m, z2.d, z3.d
|
431 | 384 | ; CHECK-NOI8MM-NEXT: ret
|
432 | 385 | ;
|
433 |
| -; CHECK-NEWLOWERING-LABEL: sudot_8to64: |
434 |
| -; CHECK-NEWLOWERING: // %bb.0: // %entry |
435 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z4.h, z2.b |
436 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z2.b |
437 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z5.h, z3.b |
438 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z3.b |
439 |
| -; CHECK-NEWLOWERING-NEXT: ptrue p0.d |
440 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h |
441 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z2.h |
442 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z5.h |
443 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h |
444 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h |
445 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h |
446 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h |
447 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h |
448 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z6.s |
449 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z7.s |
450 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z24.s |
451 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z25.s |
452 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z6.s |
453 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s |
454 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z24.d, z24.s |
455 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z25.d, z25.s |
456 |
| -; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d |
457 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z4.s |
458 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z5.s |
459 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d |
460 |
| -; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z2.s |
461 |
| -; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z3.s |
462 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s |
463 |
| -; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s |
464 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s |
465 |
| -; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s |
466 |
| -; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z6.d, z24.d |
467 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z7.d, z25.d |
468 |
| -; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d |
469 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d |
470 |
| -; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z5.d |
471 |
| -; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z3.d |
472 |
| -; CHECK-NEWLOWERING-NEXT: ret |
| 386 | +; CHECK-NEWLOWERING-SVE-LABEL: sudot_8to64: |
| 387 | +; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry |
| 388 | +; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000 |
| 389 | +; CHECK-NEWLOWERING-SVE-NEXT: usdot z4.s, z3.b, z2.b |
| 390 | +; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.d, z4.s |
| 391 | +; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z3.d, z4.s |
| 392 | +; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d |
| 393 | +; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d |
| 394 | +; CHECK-NEWLOWERING-SVE-NEXT: ret |
| 395 | +; |
| 396 | +; CHECK-NEWLOWERING-SVE2-LABEL: sudot_8to64: |
| 397 | +; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry |
| 398 | +; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000 |
| 399 | +; CHECK-NEWLOWERING-SVE2-NEXT: usdot z4.s, z3.b, z2.b |
| 400 | +; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z4.s |
| 401 | +; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z4.s |
| 402 | +; CHECK-NEWLOWERING-SVE2-NEXT: ret |
| 403 | +; |
| 404 | +; CHECK-NEWLOWERING-SME-LABEL: sudot_8to64: |
| 405 | +; CHECK-NEWLOWERING-SME: // %bb.0: // %entry |
| 406 | +; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0 |
| 407 | +; CHECK-NEWLOWERING-SME-NEXT: usdot z4.s, z3.b, z2.b |
| 408 | +; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s |
| 409 | +; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s |
| 410 | +; CHECK-NEWLOWERING-SME-NEXT: ret |
473 | 411 | entry:
|
474 | 412 | %a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i64>
|
475 | 413 | %b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i64>
|
|
0 commit comments