|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
| 2 | +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-UNALIGNED %s |
| 3 | +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-NOUNALIGNED %s |
2 | 4 | ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
|
3 | 5 | ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
|
4 | 6 | ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
|
|
7 | 9 | ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
|
8 | 10 |
|
9 | 11 | define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) {
|
| 12 | +; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align1: |
| 13 | +; GFX12-UNALIGNED: ; %bb.0: |
| 14 | +; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 15 | +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off |
| 16 | +; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0) |
| 17 | +; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] |
| 18 | +; |
| 19 | +; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1: |
| 20 | +; GFX12-NOUNALIGNED: ; %bb.0: |
| 21 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 22 | +; GFX12-NOUNALIGNED-NEXT: s_clause 0xb |
| 23 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v2, v[0:1], off |
| 24 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v3, v[0:1], off offset:1 |
| 25 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v4, v[0:1], off offset:2 |
| 26 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v5, v[0:1], off offset:3 |
| 27 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v6, v[0:1], off offset:4 |
| 28 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v7, v[0:1], off offset:5 |
| 29 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v8, v[0:1], off offset:6 |
| 30 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v9, v[0:1], off offset:7 |
| 31 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v10, v[0:1], off offset:8 |
| 32 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v11, v[0:1], off offset:9 |
| 33 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v12, v[0:1], off offset:11 |
| 34 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v0, v[0:1], off offset:10 |
| 35 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) |
| 36 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2 |
| 37 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) |
| 38 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v4 |
| 39 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) |
| 40 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v5 |
| 41 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) |
| 42 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v4, v7, 8, v6 |
| 43 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) |
| 44 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v8 |
| 45 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) |
| 46 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v9 |
| 47 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) |
| 48 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v7, v11, 8, v10 |
| 49 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) |
| 50 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v12 |
| 51 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) |
| 52 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v0 |
| 53 | +; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v0, v2, v3, v1 |
| 54 | +; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v4 |
| 55 | +; GFX12-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3) |
| 56 | +; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v7 |
| 57 | +; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] |
| 58 | +; |
10 | 59 | ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
|
11 | 60 | ; GFX9-UNALIGNED: ; %bb.0:
|
12 | 61 | ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
@@ -166,6 +215,31 @@ define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) {
|
166 | 215 | }
|
167 | 216 |
|
168 | 217 | define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) {
|
| 218 | +; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align2: |
| 219 | +; GFX12-UNALIGNED: ; %bb.0: |
| 220 | +; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 221 | +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off |
| 222 | +; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0) |
| 223 | +; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] |
| 224 | +; |
| 225 | +; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: |
| 226 | +; GFX12-NOUNALIGNED: ; %bb.0: |
| 227 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 228 | +; GFX12-NOUNALIGNED-NEXT: s_clause 0x5 |
| 229 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v2, v[0:1], off |
| 230 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v3, v[0:1], off offset:2 |
| 231 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v4, v[0:1], off offset:4 |
| 232 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v5, v[0:1], off offset:6 |
| 233 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v6, v[0:1], off offset:8 |
| 234 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v7, v[0:1], off offset:10 |
| 235 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) |
| 236 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 |
| 237 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) |
| 238 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v5, 16, v4 |
| 239 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) |
| 240 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v7, 16, v6 |
| 241 | +; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] |
| 242 | +; |
169 | 243 | ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
|
170 | 244 | ; GFX9-UNALIGNED: ; %bb.0:
|
171 | 245 | ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
@@ -256,6 +330,13 @@ define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) {
|
256 | 330 | }
|
257 | 331 |
|
258 | 332 | define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) {
|
| 333 | +; GFX12-LABEL: v_load_constant_v3i32_align4: |
| 334 | +; GFX12: ; %bb.0: |
| 335 | +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 336 | +; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off |
| 337 | +; GFX12-NEXT: s_waitcnt vmcnt(0) |
| 338 | +; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 339 | +; |
259 | 340 | ; GFX9-LABEL: v_load_constant_v3i32_align4:
|
260 | 341 | ; GFX9: ; %bb.0:
|
261 | 342 | ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
@@ -291,6 +372,13 @@ define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) {
|
291 | 372 | }
|
292 | 373 |
|
293 | 374 | define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) {
|
| 375 | +; GFX12-LABEL: v_load_constant_i96_align8: |
| 376 | +; GFX12: ; %bb.0: |
| 377 | +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 378 | +; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off |
| 379 | +; GFX12-NEXT: s_waitcnt vmcnt(0) |
| 380 | +; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 381 | +; |
294 | 382 | ; GFX9-LABEL: v_load_constant_i96_align8:
|
295 | 383 | ; GFX9: ; %bb.0:
|
296 | 384 | ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
@@ -326,6 +414,13 @@ define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) {
|
326 | 414 | }
|
327 | 415 |
|
328 | 416 | define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) {
|
| 417 | +; GFX12-LABEL: v_load_constant_v3i32_align8: |
| 418 | +; GFX12: ; %bb.0: |
| 419 | +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 420 | +; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off |
| 421 | +; GFX12-NEXT: s_waitcnt vmcnt(0) |
| 422 | +; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 423 | +; |
329 | 424 | ; GFX9-LABEL: v_load_constant_v3i32_align8:
|
330 | 425 | ; GFX9: ; %bb.0:
|
331 | 426 | ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
@@ -361,6 +456,13 @@ define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) {
|
361 | 456 | }
|
362 | 457 |
|
363 | 458 | define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) {
|
| 459 | +; GFX12-LABEL: v_load_constant_v6i16_align8: |
| 460 | +; GFX12: ; %bb.0: |
| 461 | +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 462 | +; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off |
| 463 | +; GFX12-NEXT: s_waitcnt vmcnt(0) |
| 464 | +; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 465 | +; |
364 | 466 | ; GFX9-LABEL: v_load_constant_v6i16_align8:
|
365 | 467 | ; GFX9: ; %bb.0:
|
366 | 468 | ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
@@ -405,6 +507,25 @@ define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) {
|
405 | 507 | }
|
406 | 508 |
|
407 | 509 | define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) {
|
| 510 | +; GFX12-LABEL: v_load_constant_v12i8_align8: |
| 511 | +; GFX12: ; %bb.0: |
| 512 | +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 513 | +; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off |
| 514 | +; GFX12-NEXT: s_waitcnt vmcnt(0) |
| 515 | +; GFX12-NEXT: v_lshrrev_b32_e32 v13, 8, v0 |
| 516 | +; GFX12-NEXT: v_lshrrev_b32_e32 v12, 16, v0 |
| 517 | +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 24, v0 |
| 518 | +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 8, v1 |
| 519 | +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v1 |
| 520 | +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 24, v1 |
| 521 | +; GFX12-NEXT: v_lshrrev_b32_e32 v9, 8, v2 |
| 522 | +; GFX12-NEXT: v_lshrrev_b32_e32 v10, 16, v2 |
| 523 | +; GFX12-NEXT: v_lshrrev_b32_e32 v11, 24, v2 |
| 524 | +; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v13 |
| 525 | +; GFX12-NEXT: v_mov_b32_e32 v8, v2 |
| 526 | +; GFX12-NEXT: v_mov_b32_e32 v2, v12 |
| 527 | +; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 528 | +; |
408 | 529 | ; GFX9-LABEL: v_load_constant_v12i8_align8:
|
409 | 530 | ; GFX9: ; %bb.0:
|
410 | 531 | ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
@@ -475,6 +596,13 @@ define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) {
|
475 | 596 | }
|
476 | 597 |
|
477 | 598 | define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) {
|
| 599 | +; GFX12-LABEL: v_load_constant_v3i32_align16: |
| 600 | +; GFX12: ; %bb.0: |
| 601 | +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 602 | +; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off |
| 603 | +; GFX12-NEXT: s_waitcnt vmcnt(0) |
| 604 | +; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 605 | +; |
478 | 606 | ; GFX9-LABEL: v_load_constant_v3i32_align16:
|
479 | 607 | ; GFX9: ; %bb.0:
|
480 | 608 | ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
@@ -506,6 +634,60 @@ define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) {
|
506 | 634 | }
|
507 | 635 |
|
508 | 636 | define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg %ptr) {
|
| 637 | +; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align1: |
| 638 | +; GFX12-UNALIGNED: ; %bb.0: |
| 639 | +; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 |
| 640 | +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1] |
| 641 | +; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0) |
| 642 | +; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 |
| 643 | +; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 |
| 644 | +; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 |
| 645 | +; GFX12-UNALIGNED-NEXT: ; return to shader part epilog |
| 646 | +; |
| 647 | +; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1: |
| 648 | +; GFX12-NOUNALIGNED: ; %bb.0: |
| 649 | +; GFX12-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0 |
| 650 | +; GFX12-NOUNALIGNED-NEXT: s_clause 0xb |
| 651 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v1, v0, s[0:1] |
| 652 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v2, v0, s[0:1] offset:1 |
| 653 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v3, v0, s[0:1] offset:2 |
| 654 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v4, v0, s[0:1] offset:3 |
| 655 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v5, v0, s[0:1] offset:4 |
| 656 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v6, v0, s[0:1] offset:5 |
| 657 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v7, v0, s[0:1] offset:6 |
| 658 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v8, v0, s[0:1] offset:7 |
| 659 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v9, v0, s[0:1] offset:8 |
| 660 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v10, v0, s[0:1] offset:9 |
| 661 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v11, v0, s[0:1] offset:11 |
| 662 | +; GFX12-NOUNALIGNED-NEXT: global_load_u8 v0, v0, s[0:1] offset:10 |
| 663 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) |
| 664 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v2, 8, v1 |
| 665 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) |
| 666 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| 667 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) |
| 668 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v4 |
| 669 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) |
| 670 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v4, v6, 8, v5 |
| 671 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) |
| 672 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v7 |
| 673 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) |
| 674 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v8 |
| 675 | +; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v1, v2, v3, v1 |
| 676 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) |
| 677 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v7, v10, 8, v9 |
| 678 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) |
| 679 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v11 |
| 680 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) |
| 681 | +; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| 682 | +; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v2, v5, v6, v4 |
| 683 | +; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v1 |
| 684 | +; GFX12-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) |
| 685 | +; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v0, v8, v0, v7 |
| 686 | +; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v2 |
| 687 | +; GFX12-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| 688 | +; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v0 |
| 689 | +; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog |
| 690 | +; |
509 | 691 | ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
|
510 | 692 | ; GFX9-UNALIGNED: ; %bb.0:
|
511 | 693 | ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
|
@@ -674,6 +856,38 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg
|
674 | 856 | }
|
675 | 857 |
|
676 | 858 | define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg %ptr) {
|
| 859 | +; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align2: |
| 860 | +; GFX12-UNALIGNED: ; %bb.0: |
| 861 | +; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 |
| 862 | +; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1] |
| 863 | +; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0) |
| 864 | +; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 |
| 865 | +; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 |
| 866 | +; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 |
| 867 | +; GFX12-UNALIGNED-NEXT: ; return to shader part epilog |
| 868 | +; |
| 869 | +; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2: |
| 870 | +; GFX12-NOUNALIGNED: ; %bb.0: |
| 871 | +; GFX12-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0 |
| 872 | +; GFX12-NOUNALIGNED-NEXT: s_clause 0x5 |
| 873 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v1, v0, s[0:1] |
| 874 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v2, v0, s[0:1] offset:2 |
| 875 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v3, v0, s[0:1] offset:4 |
| 876 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v4, v0, s[0:1] offset:6 |
| 877 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v5, v0, s[0:1] offset:8 |
| 878 | +; GFX12-NOUNALIGNED-NEXT: global_load_u16 v0, v0, s[0:1] offset:10 |
| 879 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) |
| 880 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v2, 16, v1 |
| 881 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) |
| 882 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v4, 16, v3 |
| 883 | +; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) |
| 884 | +; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v0, 16, v5 |
| 885 | +; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v1 |
| 886 | +; GFX12-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) |
| 887 | +; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v2 |
| 888 | +; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v0 |
| 889 | +; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog |
| 890 | +; |
677 | 891 | ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align2:
|
678 | 892 | ; GFX9-UNALIGNED: ; %bb.0:
|
679 | 893 | ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
|
@@ -773,6 +987,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg
|
773 | 987 | }
|
774 | 988 |
|
775 | 989 | define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) {
|
| 990 | +; GFX12-LABEL: s_load_constant_v3i32_align4: |
| 991 | +; GFX12: ; %bb.0: |
| 992 | +; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 |
| 993 | +; GFX12-NEXT: s_waitcnt lgkmcnt(0) |
| 994 | +; GFX12-NEXT: ; return to shader part epilog |
| 995 | +; |
776 | 996 | ; GFX9-LABEL: s_load_constant_v3i32_align4:
|
777 | 997 | ; GFX9: ; %bb.0:
|
778 | 998 | ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
@@ -804,6 +1024,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg
|
804 | 1024 | }
|
805 | 1025 |
|
806 | 1026 | define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
|
| 1027 | +; GFX12-LABEL: s_load_constant_i96_align8: |
| 1028 | +; GFX12: ; %bb.0: |
| 1029 | +; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 |
| 1030 | +; GFX12-NEXT: s_waitcnt lgkmcnt(0) |
| 1031 | +; GFX12-NEXT: ; return to shader part epilog |
| 1032 | +; |
807 | 1033 | ; GFX9-LABEL: s_load_constant_i96_align8:
|
808 | 1034 | ; GFX9: ; %bb.0:
|
809 | 1035 | ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
@@ -835,6 +1061,12 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
|
835 | 1061 | }
|
836 | 1062 |
|
837 | 1063 | define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg %ptr) {
|
| 1064 | +; GFX12-LABEL: s_load_constant_v3i32_align8: |
| 1065 | +; GFX12: ; %bb.0: |
| 1066 | +; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 |
| 1067 | +; GFX12-NEXT: s_waitcnt lgkmcnt(0) |
| 1068 | +; GFX12-NEXT: ; return to shader part epilog |
| 1069 | +; |
838 | 1070 | ; GFX9-LABEL: s_load_constant_v3i32_align8:
|
839 | 1071 | ; GFX9: ; %bb.0:
|
840 | 1072 | ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
@@ -866,6 +1098,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg
|
866 | 1098 | }
|
867 | 1099 |
|
868 | 1100 | define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg %ptr) {
|
| 1101 | +; GFX12-LABEL: s_load_constant_v6i16_align8: |
| 1102 | +; GFX12: ; %bb.0: |
| 1103 | +; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 |
| 1104 | +; GFX12-NEXT: s_waitcnt lgkmcnt(0) |
| 1105 | +; GFX12-NEXT: ; return to shader part epilog |
| 1106 | +; |
869 | 1107 | ; GFX9-LABEL: s_load_constant_v6i16_align8:
|
870 | 1108 | ; GFX9: ; %bb.0:
|
871 | 1109 | ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
@@ -898,6 +1136,25 @@ define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg
|
898 | 1136 | }
|
899 | 1137 |
|
900 | 1138 | define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg %ptr) {
|
| 1139 | +; GFX12-LABEL: s_load_constant_v12i8_align8: |
| 1140 | +; GFX12: ; %bb.0: |
| 1141 | +; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 |
| 1142 | +; GFX12-NEXT: s_waitcnt lgkmcnt(0) |
| 1143 | +; GFX12-NEXT: s_lshr_b32 s13, s0, 8 |
| 1144 | +; GFX12-NEXT: s_lshr_b32 s12, s0, 16 |
| 1145 | +; GFX12-NEXT: s_lshr_b32 s3, s0, 24 |
| 1146 | +; GFX12-NEXT: s_lshr_b32 s5, s1, 8 |
| 1147 | +; GFX12-NEXT: s_lshr_b32 s6, s1, 16 |
| 1148 | +; GFX12-NEXT: s_lshr_b32 s7, s1, 24 |
| 1149 | +; GFX12-NEXT: s_lshr_b32 s9, s2, 8 |
| 1150 | +; GFX12-NEXT: s_lshr_b32 s10, s2, 16 |
| 1151 | +; GFX12-NEXT: s_lshr_b32 s11, s2, 24 |
| 1152 | +; GFX12-NEXT: s_mov_b32 s4, s1 |
| 1153 | +; GFX12-NEXT: s_mov_b32 s8, s2 |
| 1154 | +; GFX12-NEXT: s_mov_b32 s1, s13 |
| 1155 | +; GFX12-NEXT: s_mov_b32 s2, s12 |
| 1156 | +; GFX12-NEXT: ; return to shader part epilog |
| 1157 | +; |
901 | 1158 | ; GFX9-LABEL: s_load_constant_v12i8_align8:
|
902 | 1159 | ; GFX9: ; %bb.0:
|
903 | 1160 | ; GFX9-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0
|
@@ -956,6 +1213,12 @@ define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg
|
956 | 1213 | }
|
957 | 1214 |
|
958 | 1215 | define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(ptr addrspace(4) inreg %ptr) {
|
| 1216 | +; GFX12-LABEL: s_load_constant_v3i32_align16: |
| 1217 | +; GFX12: ; %bb.0: |
| 1218 | +; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 |
| 1219 | +; GFX12-NEXT: s_waitcnt lgkmcnt(0) |
| 1220 | +; GFX12-NEXT: ; return to shader part epilog |
| 1221 | +; |
959 | 1222 | ; GCN-LABEL: s_load_constant_v3i32_align16:
|
960 | 1223 | ; GCN: ; %bb.0:
|
961 | 1224 | ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
|
0 commit comments