Skip to content

Commit 89f8267

Browse files
authored
[AMDGPU][True16][CodeGen] update more GFX11Plus codegen test with true16 mode (#138600)
This is a NFC patch. This patch duplicate GFX11plus runlines and apply them with "+mattr=+real-true16" and "+mattr=-real-true16" on more gfx11/gfx12 tests. And then update the test with the update script
1 parent 2f752cf commit 89f8267

27 files changed

+79020
-43756
lines changed

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll

Lines changed: 2124 additions & 1573 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll

Lines changed: 2300 additions & 1717 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll

Lines changed: 2480 additions & 1863 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll

Lines changed: 2656 additions & 2007 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll

Lines changed: 2836 additions & 2153 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll

Lines changed: 3012 additions & 2297 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll

Lines changed: 3192 additions & 2443 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll

Lines changed: 2391 additions & 1190 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll

Lines changed: 2280 additions & 1125 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll

Lines changed: 2280 additions & 1125 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll

Lines changed: 4768 additions & 2353 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll

Lines changed: 5362 additions & 2643 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll

Lines changed: 5362 additions & 2643 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll

Lines changed: 5222 additions & 2577 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll

Lines changed: 741 additions & 338 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX11 %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 %s --passes=two-address-instruction -verify-each -o - | FileCheck --check-prefixes=GFX11 %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX11 %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 %s --passes=two-address-instruction -verify-each -o - | FileCheck --check-prefixes=GFX11 %s
44

55
---
66
name: test_fmamk_reg_imm_f16

llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll

Lines changed: 4950 additions & 2446 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll

Lines changed: 5314 additions & 2604 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll

Lines changed: 5314 additions & 2604 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global-atomicrmw-fsub.ll

Lines changed: 5196 additions & 2560 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.waitcnt.out.order.ll

Lines changed: 128 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
3-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1150 %s
4-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1150,GFX1150-TRUE16 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1150,GFX1150-FAKE16 %s
6+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
7+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
58

69
define amdgpu_ps <3 x float> @gather_sample(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <8 x i32> inreg %rsrc2, <4 x i32> inreg %samp2, float %s, float %t) {
710
; GFX11-LABEL: gather_sample:
@@ -80,35 +83,69 @@ define amdgpu_ps <3 x float> @sample_gather(<8 x i32> inreg %rsrc, <4 x i32> inr
8083
}
8184

8285
define amdgpu_ps <3 x float> @sample_load(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <8 x i32> inreg %rsrc2, i16 %s.16, i16 %t.16, i16 %fragid) {
83-
; GFX11-LABEL: sample_load:
84-
; GFX11: ; %bb.0:
85-
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
86-
; GFX11-NEXT: v_mov_b32_e32 v4, 0
87-
; GFX11-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
88-
; GFX11-NEXT: s_waitcnt vmcnt(0)
89-
; GFX11-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
90-
; GFX11-NEXT: s_waitcnt vmcnt(0)
91-
; GFX11-NEXT: ; return to shader part epilog
86+
; GFX11-TRUE16-LABEL: sample_load:
87+
; GFX11-TRUE16: ; %bb.0:
88+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
89+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
90+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
91+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, 0
92+
; GFX11-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
93+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
94+
; GFX11-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
95+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
96+
; GFX11-TRUE16-NEXT: ; return to shader part epilog
9297
;
93-
; GFX1150-LABEL: sample_load:
94-
; GFX1150: ; %bb.0:
95-
; GFX1150-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
96-
; GFX1150-NEXT: v_mov_b32_e32 v4, 0
97-
; GFX1150-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
98-
; GFX1150-NEXT: s_waitcnt vmcnt(0)
99-
; GFX1150-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
100-
; GFX1150-NEXT: s_waitcnt vmcnt(0)
101-
; GFX1150-NEXT: ; return to shader part epilog
98+
; GFX11-FAKE16-LABEL: sample_load:
99+
; GFX11-FAKE16: ; %bb.0:
100+
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
101+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v4, 0
102+
; GFX11-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
103+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
104+
; GFX11-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
105+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
106+
; GFX11-FAKE16-NEXT: ; return to shader part epilog
102107
;
103-
; GFX12-LABEL: sample_load:
104-
; GFX12: ; %bb.0:
105-
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
106-
; GFX12-NEXT: v_mov_b32_e32 v4, 0
107-
; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
108-
; GFX12-NEXT: s_wait_samplecnt 0x0
109-
; GFX12-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
110-
; GFX12-NEXT: s_wait_samplecnt 0x0
111-
; GFX12-NEXT: ; return to shader part epilog
108+
; GFX1150-TRUE16-LABEL: sample_load:
109+
; GFX1150-TRUE16: ; %bb.0:
110+
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
111+
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
112+
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
113+
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, 0
114+
; GFX1150-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
115+
; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
116+
; GFX1150-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
117+
; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
118+
; GFX1150-TRUE16-NEXT: ; return to shader part epilog
119+
;
120+
; GFX1150-FAKE16-LABEL: sample_load:
121+
; GFX1150-FAKE16: ; %bb.0:
122+
; GFX1150-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
123+
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, 0
124+
; GFX1150-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
125+
; GFX1150-FAKE16-NEXT: s_waitcnt vmcnt(0)
126+
; GFX1150-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
127+
; GFX1150-FAKE16-NEXT: s_waitcnt vmcnt(0)
128+
; GFX1150-FAKE16-NEXT: ; return to shader part epilog
129+
;
130+
; GFX12-TRUE16-LABEL: sample_load:
131+
; GFX12-TRUE16: ; %bb.0:
132+
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
133+
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, 0
134+
; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
135+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
136+
; GFX12-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
137+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
138+
; GFX12-TRUE16-NEXT: ; return to shader part epilog
139+
;
140+
; GFX12-FAKE16-LABEL: sample_load:
141+
; GFX12-FAKE16: ; %bb.0:
142+
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
143+
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v4, 0
144+
; GFX12-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
145+
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
146+
; GFX12-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
147+
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
148+
; GFX12-FAKE16-NEXT: ; return to shader part epilog
112149

113150
%w = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
114151
%v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i16 %s.16, i16 %t.16, i16 %fragid, <8 x i32> %rsrc2, i32 0, i32 0)
@@ -122,35 +159,69 @@ define amdgpu_ps <3 x float> @sample_load(<8 x i32> inreg %rsrc, <4 x i32> inreg
122159
}
123160

124161
define amdgpu_ps <3 x float> @load_sample(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <8 x i32> inreg %rsrc2, i16 %s.16, i16 %t.16, i16 %fragid) {
125-
; GFX11-LABEL: load_sample:
126-
; GFX11: ; %bb.0:
127-
; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
128-
; GFX11-NEXT: v_mov_b32_e32 v4, 0
129-
; GFX11-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
130-
; GFX11-NEXT: s_waitcnt vmcnt(0)
131-
; GFX11-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
132-
; GFX11-NEXT: s_waitcnt vmcnt(0)
133-
; GFX11-NEXT: ; return to shader part epilog
162+
; GFX11-TRUE16-LABEL: load_sample:
163+
; GFX11-TRUE16: ; %bb.0:
164+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
165+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
166+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
167+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, 0
168+
; GFX11-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
169+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
170+
; GFX11-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
171+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
172+
; GFX11-TRUE16-NEXT: ; return to shader part epilog
134173
;
135-
; GFX1150-LABEL: load_sample:
136-
; GFX1150: ; %bb.0:
137-
; GFX1150-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
138-
; GFX1150-NEXT: v_mov_b32_e32 v4, 0
139-
; GFX1150-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
140-
; GFX1150-NEXT: s_waitcnt vmcnt(0)
141-
; GFX1150-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
142-
; GFX1150-NEXT: s_waitcnt vmcnt(0)
143-
; GFX1150-NEXT: ; return to shader part epilog
174+
; GFX11-FAKE16-LABEL: load_sample:
175+
; GFX11-FAKE16: ; %bb.0:
176+
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
177+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v4, 0
178+
; GFX11-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
179+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
180+
; GFX11-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
181+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
182+
; GFX11-FAKE16-NEXT: ; return to shader part epilog
144183
;
145-
; GFX12-LABEL: load_sample:
146-
; GFX12: ; %bb.0:
147-
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
148-
; GFX12-NEXT: v_mov_b32_e32 v4, 0
149-
; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
150-
; GFX12-NEXT: s_wait_samplecnt 0x0
151-
; GFX12-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
152-
; GFX12-NEXT: s_wait_samplecnt 0x0
153-
; GFX12-NEXT: ; return to shader part epilog
184+
; GFX1150-TRUE16-LABEL: load_sample:
185+
; GFX1150-TRUE16: ; %bb.0:
186+
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
187+
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
188+
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
189+
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, 0
190+
; GFX1150-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
191+
; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
192+
; GFX1150-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
193+
; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
194+
; GFX1150-TRUE16-NEXT: ; return to shader part epilog
195+
;
196+
; GFX1150-FAKE16-LABEL: load_sample:
197+
; GFX1150-FAKE16: ; %bb.0:
198+
; GFX1150-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
199+
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, 0
200+
; GFX1150-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
201+
; GFX1150-FAKE16-NEXT: s_waitcnt vmcnt(0)
202+
; GFX1150-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
203+
; GFX1150-FAKE16-NEXT: s_waitcnt vmcnt(0)
204+
; GFX1150-FAKE16-NEXT: ; return to shader part epilog
205+
;
206+
; GFX12-TRUE16-LABEL: load_sample:
207+
; GFX12-TRUE16: ; %bb.0:
208+
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
209+
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, 0
210+
; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
211+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
212+
; GFX12-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
213+
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
214+
; GFX12-TRUE16-NEXT: ; return to shader part epilog
215+
;
216+
; GFX12-FAKE16-LABEL: load_sample:
217+
; GFX12-FAKE16: ; %bb.0:
218+
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
219+
; GFX12-FAKE16-NEXT: v_mov_b32_e32 v4, 0
220+
; GFX12-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
221+
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
222+
; GFX12-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
223+
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
224+
; GFX12-FAKE16-NEXT: ; return to shader part epilog
154225

155226
%v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i16 %s.16, i16 %t.16, i16 %fragid, <8 x i32> %rsrc2, i32 0, i32 0)
156227
%w = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)

0 commit comments

Comments
 (0)