Skip to content

Commit a07f133

Browse files
committed
test updates
1 parent 18b7bfe commit a07f133

File tree

2 files changed

+271
-24
lines changed

2 files changed

+271
-24
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-UNALIGNED %s
3+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-NOUNALIGNED %s
24
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
35
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
46
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
@@ -7,6 +9,53 @@
79
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
810

911
define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) {
12+
; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
13+
; GFX12-UNALIGNED: ; %bb.0:
14+
; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15+
; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
16+
; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
17+
; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
18+
;
19+
; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1:
20+
; GFX12-NOUNALIGNED: ; %bb.0:
21+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22+
; GFX12-NOUNALIGNED-NEXT: s_clause 0xb
23+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v2, v[0:1], off
24+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v3, v[0:1], off offset:1
25+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v4, v[0:1], off offset:2
26+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v5, v[0:1], off offset:3
27+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v6, v[0:1], off offset:4
28+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v7, v[0:1], off offset:5
29+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v8, v[0:1], off offset:6
30+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v9, v[0:1], off offset:7
31+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v10, v[0:1], off offset:8
32+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v11, v[0:1], off offset:9
33+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v12, v[0:1], off offset:11
34+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v0, v[0:1], off offset:10
35+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10)
36+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2
37+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9)
38+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v4
39+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8)
40+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v5
41+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6)
42+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v4, v7, 8, v6
43+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5)
44+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v8
45+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
46+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v9
47+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
48+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v7, v11, 8, v10
49+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1)
50+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v12
51+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
52+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v0
53+
; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v0, v2, v3, v1
54+
; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v4
55+
; GFX12-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3)
56+
; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v7
57+
; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
58+
;
1059
; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
1160
; GFX9-UNALIGNED: ; %bb.0:
1261
; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -166,6 +215,31 @@ define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) {
166215
}
167216

168217
define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) {
218+
; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
219+
; GFX12-UNALIGNED: ; %bb.0:
220+
; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221+
; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off
222+
; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
223+
; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
224+
;
225+
; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2:
226+
; GFX12-NOUNALIGNED: ; %bb.0:
227+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228+
; GFX12-NOUNALIGNED-NEXT: s_clause 0x5
229+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v2, v[0:1], off
230+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v3, v[0:1], off offset:2
231+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v4, v[0:1], off offset:4
232+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v5, v[0:1], off offset:6
233+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v6, v[0:1], off offset:8
234+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v7, v[0:1], off offset:10
235+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
236+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2
237+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
238+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v5, 16, v4
239+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
240+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v7, 16, v6
241+
; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
242+
;
169243
; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
170244
; GFX9-UNALIGNED: ; %bb.0:
171245
; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -256,6 +330,13 @@ define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) {
256330
}
257331

258332
define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) {
333+
; GFX12-LABEL: v_load_constant_v3i32_align4:
334+
; GFX12: ; %bb.0:
335+
; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336+
; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
337+
; GFX12-NEXT: s_waitcnt vmcnt(0)
338+
; GFX12-NEXT: s_setpc_b64 s[30:31]
339+
;
259340
; GFX9-LABEL: v_load_constant_v3i32_align4:
260341
; GFX9: ; %bb.0:
261342
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -291,6 +372,13 @@ define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) {
291372
}
292373

293374
define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) {
375+
; GFX12-LABEL: v_load_constant_i96_align8:
376+
; GFX12: ; %bb.0:
377+
; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378+
; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
379+
; GFX12-NEXT: s_waitcnt vmcnt(0)
380+
; GFX12-NEXT: s_setpc_b64 s[30:31]
381+
;
294382
; GFX9-LABEL: v_load_constant_i96_align8:
295383
; GFX9: ; %bb.0:
296384
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -326,6 +414,13 @@ define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) {
326414
}
327415

328416
define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) {
417+
; GFX12-LABEL: v_load_constant_v3i32_align8:
418+
; GFX12: ; %bb.0:
419+
; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420+
; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
421+
; GFX12-NEXT: s_waitcnt vmcnt(0)
422+
; GFX12-NEXT: s_setpc_b64 s[30:31]
423+
;
329424
; GFX9-LABEL: v_load_constant_v3i32_align8:
330425
; GFX9: ; %bb.0:
331426
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -361,6 +456,13 @@ define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) {
361456
}
362457

363458
define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) {
459+
; GFX12-LABEL: v_load_constant_v6i16_align8:
460+
; GFX12: ; %bb.0:
461+
; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462+
; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
463+
; GFX12-NEXT: s_waitcnt vmcnt(0)
464+
; GFX12-NEXT: s_setpc_b64 s[30:31]
465+
;
364466
; GFX9-LABEL: v_load_constant_v6i16_align8:
365467
; GFX9: ; %bb.0:
366468
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -405,6 +507,25 @@ define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) {
405507
}
406508

407509
define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) {
510+
; GFX12-LABEL: v_load_constant_v12i8_align8:
511+
; GFX12: ; %bb.0:
512+
; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
513+
; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
514+
; GFX12-NEXT: s_waitcnt vmcnt(0)
515+
; GFX12-NEXT: v_lshrrev_b32_e32 v13, 8, v0
516+
; GFX12-NEXT: v_lshrrev_b32_e32 v12, 16, v0
517+
; GFX12-NEXT: v_lshrrev_b32_e32 v3, 24, v0
518+
; GFX12-NEXT: v_lshrrev_b32_e32 v5, 8, v1
519+
; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v1
520+
; GFX12-NEXT: v_lshrrev_b32_e32 v7, 24, v1
521+
; GFX12-NEXT: v_lshrrev_b32_e32 v9, 8, v2
522+
; GFX12-NEXT: v_lshrrev_b32_e32 v10, 16, v2
523+
; GFX12-NEXT: v_lshrrev_b32_e32 v11, 24, v2
524+
; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v13
525+
; GFX12-NEXT: v_mov_b32_e32 v8, v2
526+
; GFX12-NEXT: v_mov_b32_e32 v2, v12
527+
; GFX12-NEXT: s_setpc_b64 s[30:31]
528+
;
408529
; GFX9-LABEL: v_load_constant_v12i8_align8:
409530
; GFX9: ; %bb.0:
410531
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -475,6 +596,13 @@ define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) {
475596
}
476597

477598
define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) {
599+
; GFX12-LABEL: v_load_constant_v3i32_align16:
600+
; GFX12: ; %bb.0:
601+
; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
602+
; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off
603+
; GFX12-NEXT: s_waitcnt vmcnt(0)
604+
; GFX12-NEXT: s_setpc_b64 s[30:31]
605+
;
478606
; GFX9-LABEL: v_load_constant_v3i32_align16:
479607
; GFX9: ; %bb.0:
480608
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -506,6 +634,60 @@ define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) {
506634
}
507635

508636
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg %ptr) {
637+
; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
638+
; GFX12-UNALIGNED: ; %bb.0:
639+
; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
640+
; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1]
641+
; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
642+
; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
643+
; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
644+
; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
645+
; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
646+
;
647+
; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1:
648+
; GFX12-NOUNALIGNED: ; %bb.0:
649+
; GFX12-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0
650+
; GFX12-NOUNALIGNED-NEXT: s_clause 0xb
651+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v1, v0, s[0:1]
652+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v2, v0, s[0:1] offset:1
653+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v3, v0, s[0:1] offset:2
654+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v4, v0, s[0:1] offset:3
655+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v5, v0, s[0:1] offset:4
656+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v6, v0, s[0:1] offset:5
657+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v7, v0, s[0:1] offset:6
658+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v8, v0, s[0:1] offset:7
659+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v9, v0, s[0:1] offset:8
660+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v10, v0, s[0:1] offset:9
661+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v11, v0, s[0:1] offset:11
662+
; GFX12-NOUNALIGNED-NEXT: global_load_u8 v0, v0, s[0:1] offset:10
663+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10)
664+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v2, 8, v1
665+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9)
666+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v3
667+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8)
668+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v4
669+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6)
670+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v4, v6, 8, v5
671+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5)
672+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v7
673+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
674+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v8
675+
; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v1, v2, v3, v1
676+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
677+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v7, v10, 8, v9
678+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1)
679+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v11
680+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
681+
; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0
682+
; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v2, v5, v6, v4
683+
; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v1
684+
; GFX12-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
685+
; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v0, v8, v0, v7
686+
; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v2
687+
; GFX12-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_2)
688+
; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v0
689+
; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
690+
;
509691
; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
510692
; GFX9-UNALIGNED: ; %bb.0:
511693
; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
@@ -674,6 +856,38 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg
674856
}
675857

676858
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg %ptr) {
859+
; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align2:
860+
; GFX12-UNALIGNED: ; %bb.0:
861+
; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
862+
; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1]
863+
; GFX12-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
864+
; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
865+
; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
866+
; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
867+
; GFX12-UNALIGNED-NEXT: ; return to shader part epilog
868+
;
869+
; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2:
870+
; GFX12-NOUNALIGNED: ; %bb.0:
871+
; GFX12-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0
872+
; GFX12-NOUNALIGNED-NEXT: s_clause 0x5
873+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v1, v0, s[0:1]
874+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v2, v0, s[0:1] offset:2
875+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v3, v0, s[0:1] offset:4
876+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v4, v0, s[0:1] offset:6
877+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v5, v0, s[0:1] offset:8
878+
; GFX12-NOUNALIGNED-NEXT: global_load_u16 v0, v0, s[0:1] offset:10
879+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
880+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v2, 16, v1
881+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
882+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v4, 16, v3
883+
; GFX12-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
884+
; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v0, 16, v5
885+
; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v1
886+
; GFX12-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
887+
; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v2
888+
; GFX12-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v0
889+
; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog
890+
;
677891
; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align2:
678892
; GFX9-UNALIGNED: ; %bb.0:
679893
; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
@@ -773,6 +987,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg
773987
}
774988

775989
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) {
990+
; GFX12-LABEL: s_load_constant_v3i32_align4:
991+
; GFX12: ; %bb.0:
992+
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
993+
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
994+
; GFX12-NEXT: ; return to shader part epilog
995+
;
776996
; GFX9-LABEL: s_load_constant_v3i32_align4:
777997
; GFX9: ; %bb.0:
778998
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
@@ -804,6 +1024,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg
8041024
}
8051025

8061026
define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
1027+
; GFX12-LABEL: s_load_constant_i96_align8:
1028+
; GFX12: ; %bb.0:
1029+
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1030+
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
1031+
; GFX12-NEXT: ; return to shader part epilog
1032+
;
8071033
; GFX9-LABEL: s_load_constant_i96_align8:
8081034
; GFX9: ; %bb.0:
8091035
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
@@ -835,6 +1061,12 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
8351061
}
8361062

8371063
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg %ptr) {
1064+
; GFX12-LABEL: s_load_constant_v3i32_align8:
1065+
; GFX12: ; %bb.0:
1066+
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1067+
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
1068+
; GFX12-NEXT: ; return to shader part epilog
1069+
;
8381070
; GFX9-LABEL: s_load_constant_v3i32_align8:
8391071
; GFX9: ; %bb.0:
8401072
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
@@ -866,6 +1098,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg
8661098
}
8671099

8681100
define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg %ptr) {
1101+
; GFX12-LABEL: s_load_constant_v6i16_align8:
1102+
; GFX12: ; %bb.0:
1103+
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1104+
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
1105+
; GFX12-NEXT: ; return to shader part epilog
1106+
;
8691107
; GFX9-LABEL: s_load_constant_v6i16_align8:
8701108
; GFX9: ; %bb.0:
8711109
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
@@ -898,6 +1136,25 @@ define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg
8981136
}
8991137

9001138
define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg %ptr) {
1139+
; GFX12-LABEL: s_load_constant_v12i8_align8:
1140+
; GFX12: ; %bb.0:
1141+
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1142+
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
1143+
; GFX12-NEXT: s_lshr_b32 s13, s0, 8
1144+
; GFX12-NEXT: s_lshr_b32 s12, s0, 16
1145+
; GFX12-NEXT: s_lshr_b32 s3, s0, 24
1146+
; GFX12-NEXT: s_lshr_b32 s5, s1, 8
1147+
; GFX12-NEXT: s_lshr_b32 s6, s1, 16
1148+
; GFX12-NEXT: s_lshr_b32 s7, s1, 24
1149+
; GFX12-NEXT: s_lshr_b32 s9, s2, 8
1150+
; GFX12-NEXT: s_lshr_b32 s10, s2, 16
1151+
; GFX12-NEXT: s_lshr_b32 s11, s2, 24
1152+
; GFX12-NEXT: s_mov_b32 s4, s1
1153+
; GFX12-NEXT: s_mov_b32 s8, s2
1154+
; GFX12-NEXT: s_mov_b32 s1, s13
1155+
; GFX12-NEXT: s_mov_b32 s2, s12
1156+
; GFX12-NEXT: ; return to shader part epilog
1157+
;
9011158
; GFX9-LABEL: s_load_constant_v12i8_align8:
9021159
; GFX9: ; %bb.0:
9031160
; GFX9-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0
@@ -956,6 +1213,12 @@ define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg
9561213
}
9571214

9581215
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(ptr addrspace(4) inreg %ptr) {
1216+
; GFX12-LABEL: s_load_constant_v3i32_align16:
1217+
; GFX12: ; %bb.0:
1218+
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
1219+
; GFX12-NEXT: s_waitcnt lgkmcnt(0)
1220+
; GFX12-NEXT: ; return to shader part epilog
1221+
;
9591222
; GCN-LABEL: s_load_constant_v3i32_align16:
9601223
; GCN: ; %bb.0:
9611224
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0

0 commit comments

Comments
 (0)