|
4 | 4 | ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
|
5 | 5 | ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s
|
6 | 6 | ; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s
|
| 7 | +; RUN: llc < %s -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED %s |
7 | 8 |
|
8 | 9 | define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
|
9 | 10 | ; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x:
|
@@ -33,6 +34,13 @@ define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
|
33 | 34 | ; GFX11-PACKED-NEXT: tbuffer_load_d16_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
|
34 | 35 | ; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
|
35 | 36 | ; GFX11-PACKED-NEXT: ; return to shader part epilog
|
| 37 | +; |
| 38 | +; GFX12-PACKED-LABEL: tbuffer_load_d16_x: |
| 39 | +; GFX12-PACKED: ; %bb.0: ; %main_body |
| 40 | +; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0 |
| 41 | +; GFX12-PACKED-NEXT: tbuffer_load_d16_format_x v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen |
| 42 | +; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 43 | +; GFX12-PACKED-NEXT: ; return to shader part epilog |
36 | 44 | main_body:
|
37 | 45 | %data = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
|
38 | 46 | ret half %data
|
@@ -70,6 +78,14 @@ define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
|
70 | 78 | ; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
|
71 | 79 | ; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0
|
72 | 80 | ; GFX11-PACKED-NEXT: ; return to shader part epilog
|
| 81 | +; |
| 82 | +; GFX12-PACKED-LABEL: tbuffer_load_d16_xy: |
| 83 | +; GFX12-PACKED: ; %bb.0: ; %main_body |
| 84 | +; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0 |
| 85 | +; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xy v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen |
| 86 | +; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 87 | +; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 88 | +; GFX12-PACKED-NEXT: ; return to shader part epilog |
73 | 89 | main_body:
|
74 | 90 | %data = call <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
|
75 | 91 | %elt = extractelement <2 x half> %data, i32 1
|
@@ -108,6 +124,14 @@ define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
|
108 | 124 | ; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
|
109 | 125 | ; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1
|
110 | 126 | ; GFX11-PACKED-NEXT: ; return to shader part epilog
|
| 127 | +; |
| 128 | +; GFX12-PACKED-LABEL: tbuffer_load_d16_xyz: |
| 129 | +; GFX12-PACKED: ; %bb.0: ; %main_body |
| 130 | +; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0 |
| 131 | +; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen |
| 132 | +; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 133 | +; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, v1 |
| 134 | +; GFX12-PACKED-NEXT: ; return to shader part epilog |
111 | 135 | main_body:
|
112 | 136 | %data = call <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
|
113 | 137 | %elt = extractelement <3 x half> %data, i32 2
|
@@ -146,6 +170,14 @@ define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
|
146 | 170 | ; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
|
147 | 171 | ; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1
|
148 | 172 | ; GFX11-PACKED-NEXT: ; return to shader part epilog
|
| 173 | +; |
| 174 | +; GFX12-PACKED-LABEL: tbuffer_load_d16_xyzw: |
| 175 | +; GFX12-PACKED: ; %bb.0: ; %main_body |
| 176 | +; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0 |
| 177 | +; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen |
| 178 | +; GFX12-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 179 | +; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
| 180 | +; GFX12-PACKED-NEXT: ; return to shader part epilog |
149 | 181 | main_body:
|
150 | 182 | %data = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
|
151 | 183 | %elt = extractelement <4 x half> %data, i32 3
|
|
0 commit comments