Skip to content

Commit 0f1175c

Browse files
authored
[AMDGPU][True16][CodeGen] enable true16 for more codegen test patch 3 (#131212)
This is a NFC patch. Enable true16 mode for more CodeGen tests
1 parent b1fe7da commit 0f1175c

8 files changed

+466
-206
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll

Lines changed: 218 additions & 100 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-PACKED %s
44
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-PACKED %s
55
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10-PACKED %s
6-
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11-PACKED %s
6+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11-PACKED,GFX11-PACKED-TRUE16 %s
7+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11-PACKED,GFX11-PACKED-FAKE16 %s
78

89
define amdgpu_kernel void @tbuffer_store_d16_x(ptr addrspace(8) %rsrc, half %data) {
910
; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_x:
@@ -34,15 +35,25 @@ define amdgpu_kernel void @tbuffer_store_d16_x(ptr addrspace(8) %rsrc, half %dat
3435
; GFX10-PACKED-NEXT: tbuffer_store_format_d16_x v0, off, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED]
3536
; GFX10-PACKED-NEXT: s_endpgm
3637
;
37-
; GFX11-PACKED-LABEL: tbuffer_store_d16_x:
38-
; GFX11-PACKED: ; %bb.0: ; %main_body
39-
; GFX11-PACKED-NEXT: s_clause 0x1
40-
; GFX11-PACKED-NEXT: s_load_b32 s6, s[4:5], 0x34
41-
; GFX11-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
42-
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
43-
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s6
44-
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
45-
; GFX11-PACKED-NEXT: s_endpgm
38+
; GFX11-PACKED-TRUE16-LABEL: tbuffer_store_d16_x:
39+
; GFX11-PACKED-TRUE16: ; %bb.0: ; %main_body
40+
; GFX11-PACKED-TRUE16-NEXT: s_clause 0x1
41+
; GFX11-PACKED-TRUE16-NEXT: s_load_b32 s6, s[4:5], 0x34
42+
; GFX11-PACKED-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
43+
; GFX11-PACKED-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
44+
; GFX11-PACKED-TRUE16-NEXT: v_mov_b16_e32 v0.l, s6
45+
; GFX11-PACKED-TRUE16-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
46+
; GFX11-PACKED-TRUE16-NEXT: s_endpgm
47+
;
48+
; GFX11-PACKED-FAKE16-LABEL: tbuffer_store_d16_x:
49+
; GFX11-PACKED-FAKE16: ; %bb.0: ; %main_body
50+
; GFX11-PACKED-FAKE16-NEXT: s_clause 0x1
51+
; GFX11-PACKED-FAKE16-NEXT: s_load_b32 s6, s[4:5], 0x34
52+
; GFX11-PACKED-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
53+
; GFX11-PACKED-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
54+
; GFX11-PACKED-FAKE16-NEXT: v_mov_b32_e32 v0, s6
55+
; GFX11-PACKED-FAKE16-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
56+
; GFX11-PACKED-FAKE16-NEXT: s_endpgm
4657
main_body:
4758
call void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half %data, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 33, i32 0)
4859
ret void

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
44
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
55
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s
6-
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s
7-
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED %s
6+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED,GFX11-PACKED-TRUE16 %s
7+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED,GFX11-PACKED-FAKE16 %s
8+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED,GFX12-PACKED-TRUE16 %s
9+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED,GFX12-PACKED-FAKE16 %s
810

911
define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
1012
; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x:
@@ -104,19 +106,33 @@ define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
104106
; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1
105107
; GFX10-PACKED-NEXT: ; return to shader part epilog
106108
;
107-
; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz:
108-
; GFX11-PACKED: ; %bb.0: ; %main_body
109-
; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
110-
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
111-
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1
112-
; GFX11-PACKED-NEXT: ; return to shader part epilog
113-
;
114-
; GFX12-PACKED-LABEL: tbuffer_load_d16_xyz:
115-
; GFX12-PACKED: ; %bb.0: ; %main_body
116-
; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], null format:[BUF_FMT_32_FLOAT]
117-
; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0
118-
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, v1
119-
; GFX12-PACKED-NEXT: ; return to shader part epilog
109+
; GFX11-PACKED-TRUE16-LABEL: tbuffer_load_d16_xyz:
110+
; GFX11-PACKED-TRUE16: ; %bb.0: ; %main_body
111+
; GFX11-PACKED-TRUE16-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
112+
; GFX11-PACKED-TRUE16-NEXT: s_waitcnt vmcnt(0)
113+
; GFX11-PACKED-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
114+
; GFX11-PACKED-TRUE16-NEXT: ; return to shader part epilog
115+
;
116+
; GFX11-PACKED-FAKE16-LABEL: tbuffer_load_d16_xyz:
117+
; GFX11-PACKED-FAKE16: ; %bb.0: ; %main_body
118+
; GFX11-PACKED-FAKE16-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
119+
; GFX11-PACKED-FAKE16-NEXT: s_waitcnt vmcnt(0)
120+
; GFX11-PACKED-FAKE16-NEXT: v_mov_b32_e32 v0, v1
121+
; GFX11-PACKED-FAKE16-NEXT: ; return to shader part epilog
122+
;
123+
; GFX12-PACKED-TRUE16-LABEL: tbuffer_load_d16_xyz:
124+
; GFX12-PACKED-TRUE16: ; %bb.0: ; %main_body
125+
; GFX12-PACKED-TRUE16-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], null format:[BUF_FMT_32_FLOAT]
126+
; GFX12-PACKED-TRUE16-NEXT: s_wait_loadcnt 0x0
127+
; GFX12-PACKED-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
128+
; GFX12-PACKED-TRUE16-NEXT: ; return to shader part epilog
129+
;
130+
; GFX12-PACKED-FAKE16-LABEL: tbuffer_load_d16_xyz:
131+
; GFX12-PACKED-FAKE16: ; %bb.0: ; %main_body
132+
; GFX12-PACKED-FAKE16-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], null format:[BUF_FMT_32_FLOAT]
133+
; GFX12-PACKED-FAKE16-NEXT: s_wait_loadcnt 0x0
134+
; GFX12-PACKED-FAKE16-NEXT: v_mov_b32_e32 v0, v1
135+
; GFX12-PACKED-FAKE16-NEXT: ; return to shader part epilog
120136
main_body:
121137
%data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
122138
%elt = extractelement <3 x half> %data, i32 2

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-PACKED %s
44
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10-PACKED %s
55
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10-PACKED %s
6-
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11-PACKED %s
7-
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG %s
8-
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-GISEL %s
6+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11-PACKED,GFX11-PACKED-TRUE16 %s
7+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11-PACKED,GFX11-PACKED-FAKE16 %s
8+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG,GFX12-PACKED-SDAG-TRUE16 %s
9+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG,GFX12-PACKED-SDAG-FAKE16 %s
10+
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-GISEL,GFX12-PACKED-GISEL-TRUE16 %s
11+
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-GISEL,GFX12-PACKED-GISEL-FAKE16 %s
912

1013
define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data) {
1114
; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_x:
@@ -36,25 +39,55 @@ define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data) {
3639
; GFX10-PACKED-NEXT: tbuffer_store_format_d16_x v0, off, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED]
3740
; GFX10-PACKED-NEXT: s_endpgm
3841
;
39-
; GFX11-PACKED-LABEL: tbuffer_store_d16_x:
40-
; GFX11-PACKED: ; %bb.0: ; %main_body
41-
; GFX11-PACKED-NEXT: s_clause 0x1
42-
; GFX11-PACKED-NEXT: s_load_b32 s6, s[4:5], 0x34
43-
; GFX11-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
44-
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
45-
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s6
46-
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
47-
; GFX11-PACKED-NEXT: s_endpgm
42+
; GFX11-PACKED-TRUE16-LABEL: tbuffer_store_d16_x:
43+
; GFX11-PACKED-TRUE16: ; %bb.0: ; %main_body
44+
; GFX11-PACKED-TRUE16-NEXT: s_clause 0x1
45+
; GFX11-PACKED-TRUE16-NEXT: s_load_b32 s6, s[4:5], 0x34
46+
; GFX11-PACKED-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
47+
; GFX11-PACKED-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
48+
; GFX11-PACKED-TRUE16-NEXT: v_mov_b16_e32 v0.l, s6
49+
; GFX11-PACKED-TRUE16-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
50+
; GFX11-PACKED-TRUE16-NEXT: s_endpgm
4851
;
49-
; GFX12-PACKED-LABEL: tbuffer_store_d16_x:
50-
; GFX12-PACKED: ; %bb.0: ; %main_body
51-
; GFX12-PACKED-NEXT: s_clause 0x1
52-
; GFX12-PACKED-NEXT: s_load_b32 s6, s[4:5], 0x34
53-
; GFX12-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
54-
; GFX12-PACKED-NEXT: s_wait_kmcnt 0x0
55-
; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, s6
56-
; GFX12-PACKED-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
57-
; GFX12-PACKED-NEXT: s_endpgm
52+
; GFX11-PACKED-FAKE16-LABEL: tbuffer_store_d16_x:
53+
; GFX11-PACKED-FAKE16: ; %bb.0: ; %main_body
54+
; GFX11-PACKED-FAKE16-NEXT: s_clause 0x1
55+
; GFX11-PACKED-FAKE16-NEXT: s_load_b32 s6, s[4:5], 0x34
56+
; GFX11-PACKED-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
57+
; GFX11-PACKED-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
58+
; GFX11-PACKED-FAKE16-NEXT: v_mov_b32_e32 v0, s6
59+
; GFX11-PACKED-FAKE16-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
60+
; GFX11-PACKED-FAKE16-NEXT: s_endpgm
61+
;
62+
; GFX12-PACKED-SDAG-TRUE16-LABEL: tbuffer_store_d16_x:
63+
; GFX12-PACKED-SDAG-TRUE16: ; %bb.0: ; %main_body
64+
; GFX12-PACKED-SDAG-TRUE16-NEXT: s_clause 0x1
65+
; GFX12-PACKED-SDAG-TRUE16-NEXT: s_load_b32 s6, s[4:5], 0x34
66+
; GFX12-PACKED-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
67+
; GFX12-PACKED-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
68+
; GFX12-PACKED-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, s6
69+
; GFX12-PACKED-SDAG-TRUE16-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
70+
; GFX12-PACKED-SDAG-TRUE16-NEXT: s_endpgm
71+
;
72+
; GFX12-PACKED-SDAG-FAKE16-LABEL: tbuffer_store_d16_x:
73+
; GFX12-PACKED-SDAG-FAKE16: ; %bb.0: ; %main_body
74+
; GFX12-PACKED-SDAG-FAKE16-NEXT: s_clause 0x1
75+
; GFX12-PACKED-SDAG-FAKE16-NEXT: s_load_b32 s6, s[4:5], 0x34
76+
; GFX12-PACKED-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
77+
; GFX12-PACKED-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
78+
; GFX12-PACKED-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s6
79+
; GFX12-PACKED-SDAG-FAKE16-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
80+
; GFX12-PACKED-SDAG-FAKE16-NEXT: s_endpgm
81+
;
82+
; GFX12-PACKED-GISEL-LABEL: tbuffer_store_d16_x:
83+
; GFX12-PACKED-GISEL: ; %bb.0: ; %main_body
84+
; GFX12-PACKED-GISEL-NEXT: s_clause 0x1
85+
; GFX12-PACKED-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x34
86+
; GFX12-PACKED-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
87+
; GFX12-PACKED-GISEL-NEXT: s_wait_kmcnt 0x0
88+
; GFX12-PACKED-GISEL-NEXT: v_mov_b32_e32 v0, s6
89+
; GFX12-PACKED-GISEL-NEXT: tbuffer_store_d16_format_x v0, off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
90+
; GFX12-PACKED-GISEL-NEXT: s_endpgm
5891
main_body:
5992
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
6093
ret void
@@ -264,3 +297,6 @@ declare void @llvm.amdgcn.raw.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32,
264297
declare void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32)
265298
declare void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32)
266299
declare void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32)
300+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
301+
; GFX12-PACKED-GISEL-FAKE16: {{.*}}
302+
; GFX12-PACKED-GISEL-TRUE16: {{.*}}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.d16.ll

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
44
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
55
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s
6-
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s
6+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED,GFX11-PACKED-TRUE16 %s
7+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED,GFX11-PACKED-FAKE16 %s
78

89
define amdgpu_ps half @tbuffer_load_d16_x(ptr addrspace(8) inreg %rsrc) {
910
; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x:
@@ -101,13 +102,21 @@ define amdgpu_ps half @tbuffer_load_d16_xyz(ptr addrspace(8) inreg %rsrc) {
101102
; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1
102103
; GFX10-PACKED-NEXT: ; return to shader part epilog
103104
;
104-
; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz:
105-
; GFX11-PACKED: ; %bb.0: ; %main_body
106-
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0
107-
; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
108-
; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0)
109-
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1
110-
; GFX11-PACKED-NEXT: ; return to shader part epilog
105+
; GFX11-PACKED-TRUE16-LABEL: tbuffer_load_d16_xyz:
106+
; GFX11-PACKED-TRUE16: ; %bb.0: ; %main_body
107+
; GFX11-PACKED-TRUE16-NEXT: v_mov_b32_e32 v0, 0
108+
; GFX11-PACKED-TRUE16-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
109+
; GFX11-PACKED-TRUE16-NEXT: s_waitcnt vmcnt(0)
110+
; GFX11-PACKED-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
111+
; GFX11-PACKED-TRUE16-NEXT: ; return to shader part epilog
112+
;
113+
; GFX11-PACKED-FAKE16-LABEL: tbuffer_load_d16_xyz:
114+
; GFX11-PACKED-FAKE16: ; %bb.0: ; %main_body
115+
; GFX11-PACKED-FAKE16-NEXT: v_mov_b32_e32 v0, 0
116+
; GFX11-PACKED-FAKE16-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
117+
; GFX11-PACKED-FAKE16-NEXT: s_waitcnt vmcnt(0)
118+
; GFX11-PACKED-FAKE16-NEXT: v_mov_b32_e32 v0, v1
119+
; GFX11-PACKED-FAKE16-NEXT: ; return to shader part epilog
111120
main_body:
112121
%data = call <3 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
113122
%elt = extractelement <3 x half> %data, i32 2

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
44
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
55
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-PACKED %s
6-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-PACKED %s
6+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-PACKED,GFX11-PACKED-TRUE16 %s
7+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-PACKED,GFX11-PACKED-FAKE16 %s
78

89
define amdgpu_kernel void @tbuffer_store_d16_x(ptr addrspace(8) %rsrc, half %data, i32 %vindex) {
910
; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_x:
@@ -37,16 +38,27 @@ define amdgpu_kernel void @tbuffer_store_d16_x(ptr addrspace(8) %rsrc, half %dat
3738
; GFX10-PACKED-NEXT: tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
3839
; GFX10-PACKED-NEXT: s_endpgm
3940
;
40-
; GFX11-PACKED-LABEL: tbuffer_store_d16_x:
41-
; GFX11-PACKED: ; %bb.0: ; %main_body
42-
; GFX11-PACKED-NEXT: s_clause 0x1
43-
; GFX11-PACKED-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
44-
; GFX11-PACKED-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
45-
; GFX11-PACKED-NEXT: s_waitcnt lgkmcnt(0)
46-
; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, s6
47-
; GFX11-PACKED-NEXT: v_mov_b32_e32 v1, s7
48-
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
49-
; GFX11-PACKED-NEXT: s_endpgm
41+
; GFX11-PACKED-TRUE16-LABEL: tbuffer_store_d16_x:
42+
; GFX11-PACKED-TRUE16: ; %bb.0: ; %main_body
43+
; GFX11-PACKED-TRUE16-NEXT: s_clause 0x1
44+
; GFX11-PACKED-TRUE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
45+
; GFX11-PACKED-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
46+
; GFX11-PACKED-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
47+
; GFX11-PACKED-TRUE16-NEXT: v_mov_b16_e32 v0.l, s6
48+
; GFX11-PACKED-TRUE16-NEXT: v_mov_b32_e32 v1, s7
49+
; GFX11-PACKED-TRUE16-NEXT: tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
50+
; GFX11-PACKED-TRUE16-NEXT: s_endpgm
51+
;
52+
; GFX11-PACKED-FAKE16-LABEL: tbuffer_store_d16_x:
53+
; GFX11-PACKED-FAKE16: ; %bb.0: ; %main_body
54+
; GFX11-PACKED-FAKE16-NEXT: s_clause 0x1
55+
; GFX11-PACKED-FAKE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
56+
; GFX11-PACKED-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
57+
; GFX11-PACKED-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
58+
; GFX11-PACKED-FAKE16-NEXT: v_mov_b32_e32 v0, s6
59+
; GFX11-PACKED-FAKE16-NEXT: v_mov_b32_e32 v1, s7
60+
; GFX11-PACKED-FAKE16-NEXT: tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
61+
; GFX11-PACKED-FAKE16-NEXT: s_endpgm
5062
main_body:
5163
call void @llvm.amdgcn.struct.ptr.tbuffer.store.f16(half %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
5264
ret void

0 commit comments

Comments
 (0)