Skip to content

[AMDGPU][True16][CodeGen] enable true16 for more codegen test patch 3 #131212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

broxigarchen
Copy link
Contributor

@broxigarchen broxigarchen commented Mar 13, 2025

This is a NFC patch.

Enable true16 mode for more CodeGen tests

@broxigarchen broxigarchen changed the title enable true16 for more test [AMDGPU][True16][CodeGen] enable true16 for more codegen test patch 3 Mar 13, 2025
@broxigarchen broxigarchen marked this pull request as ready for review March 13, 2025 20:47
@llvmbot
Copy link
Member

llvmbot commented Mar 13, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

Changes

This is a NFC patch.

Enable true16 mode for more CodeGen tests


Patch is 55.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131212.diff

8 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll (+218-100)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll (+21-10)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll (+31-15)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll (+57-21)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.d16.ll (+17-8)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll (+23-11)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll (+35-17)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll (+64-24)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
index 449c4ebec889d..b6a8a1c2dea2d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
@@ -1,7 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
 
 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
 ; GFX10-LABEL: sample_d_1d:
@@ -35,21 +37,37 @@ define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: sample_d_2d:
-; GFX11:       ; %bb.0: ; %main_body
-; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_d_2d:
+; GFX11-TRUE16:       ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v3.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
-; GFX12-LABEL: sample_d_2d:
-; GFX12:       ; %bb.0: ; %main_body
-; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
-; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX12-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    ; return to shader part epilog
+; GFX11-FAKE16-LABEL: sample_d_2d:
+; GFX11-FAKE16:       ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_d_2d:
+; GFX12-TRUE16:       ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v3.l
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX12-TRUE16-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_d_2d:
+; GFX12-FAKE16:       ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
+; GFX12-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX12-FAKE16-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
@@ -66,21 +84,37 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: sample_d_3d:
-; GFX11:       ; %bb.0: ; %main_body
-; GFX11-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_d_3d:
+; GFX11-TRUE16:       ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.h, v4.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
-; GFX12-LABEL: sample_d_3d:
-; GFX12:       ; %bb.0: ; %main_body
-; GFX12-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
-; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX12-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v[5:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    ; return to shader part epilog
+; GFX11-FAKE16-LABEL: sample_d_3d:
+; GFX11-FAKE16:       ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_d_3d:
+; GFX12-TRUE16:       ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v3.h, v4.l
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX12-TRUE16-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v[5:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_d_3d:
+; GFX12-FAKE16:       ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX12-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX12-FAKE16-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v3, v[5:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
@@ -118,21 +152,37 @@ define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: sample_c_d_2d:
-; GFX11:       ; %bb.0: ; %main_body
-; GFX11-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
-; GFX11-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_d_2d:
+; GFX11-TRUE16:       ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.h, v4.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.h, v2.l
+; GFX11-TRUE16-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
-; GFX12-LABEL: sample_c_d_2d:
-; GFX12:       ; %bb.0: ; %main_body
-; GFX12-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
-; GFX12-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
-; GFX12-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    ; return to shader part epilog
+; GFX11-FAKE16-LABEL: sample_c_d_2d:
+; GFX11-FAKE16:       ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_d_2d:
+; GFX12-TRUE16:       ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v3.h, v4.l
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v1.h, v2.l
+; GFX12-TRUE16-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_d_2d:
+; GFX12-FAKE16:       ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX12-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX12-FAKE16-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
@@ -170,21 +220,37 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: sample_d_cl_2d:
-; GFX11:       ; %bb.0: ; %main_body
-; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_d_cl_2d:
+; GFX11-TRUE16:       ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v3.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
-; GFX12-LABEL: sample_d_cl_2d:
-; GFX12:       ; %bb.0: ; %main_body
-; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
-; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX12-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    ; return to shader part epilog
+; GFX11-FAKE16-LABEL: sample_d_cl_2d:
+; GFX11-FAKE16:       ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_d_cl_2d:
+; GFX12-TRUE16:       ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v3.l
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX12-TRUE16-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_d_cl_2d:
+; GFX12-FAKE16:       ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
+; GFX12-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX12-FAKE16-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
@@ -224,21 +290,37 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32>
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: sample_c_d_cl_2d:
-; GFX11:       ; %bb.0: ; %main_body
-; GFX11-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
-; GFX11-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_d_cl_2d:
+; GFX11-TRUE16:       ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.h, v4.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.h, v2.l
+; GFX11-TRUE16-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
-; GFX12-LABEL: sample_c_d_cl_2d:
-; GFX12:       ; %bb.0: ; %main_body
-; GFX12-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
-; GFX12-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
-; GFX12-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v[5:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    ; return to shader part epilog
+; GFX11-FAKE16-LABEL: sample_c_d_cl_2d:
+; GFX11-FAKE16:       ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_d_cl_2d:
+; GFX12-TRUE16:       ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v3.h, v4.l
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v1.h, v2.l
+; GFX12-TRUE16-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v[5:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_d_cl_2d:
+; GFX12-FAKE16:       ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
+; GFX12-FAKE16-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
+; GFX12-FAKE16-NEXT:    image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v[5:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
@@ -257,21 +339,39 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32>
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: sample_c_d_o_2darray_V1:
-; GFX11:       ; %bb.0: ; %main_body
-; GFX11-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
-; GFX11-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_d_o_2darray_V1:
+; GFX11-TRUE16:       ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v4.h, v5.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v3.l
+; GFX11-TRUE16-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
-; GFX12-LABEL: sample_c_d_o_2darray_V1:
-; GFX12:       ; %bb.0: ; %main_body
-; GFX12-NEXT:    v_perm_b32 v5, v5, v4, 0x5040100
-; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
-; GFX12-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v[5:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    ; return to shader part epilog
+; GFX11-FAKE16-LABEL: sample_c_d_o_2darray_V1:
+; GFX11-FAKE16:       ; %bb.0: ; %main_body
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: sample_c_d_o_2darray_V1:
+; GFX12-TRUE16:       ; %bb.0: ; %main_body
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v9, v5
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v5.l, v4.l
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v3.l
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v5.h, v9.l
+; GFX12-TRUE16-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v[5:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: sample_c_d_o_2darray_V1:
+; GFX12-FAKE16:       ; %bb.0: ; %main_body
+; GFX12-FAKE16-NEXT:    v_perm_b32 v5, v5, v4, 0x5040100
+; GFX12-FAKE16-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
+; GFX12-FAKE16-NEXT:    image_sample_c_d_o_g16 v0, [v0, v1, v2, v[5:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    ; return to shader part epilog
 main_body:
   %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret float %v
@@ -290,21 +390,39 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: sample_c_d_o_2darray_V2:
-; GFX11:       ; %bb.0: ; %main_body
-; GFX11-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
-; GFX11-NEXT:    image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11-TRUE16-LABEL: sample_c_d_o_2darray_V2:
+; GFX11-TRUE16:       ; %bb.0: ; %main_body
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v4.h, v5.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v2.h, v3.l
+; GFX11-TRUE16-NEXT:    image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
 ;
-; GFX12-LABEL: sample_c_d_o_2darray_V2:
-; GFX12:       ; %bb.0: ; %main_body
-; GFX12-NEXT:    v_perm_b32 v5, v5, v4, 0x5040100
-; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
-; GFX12-NEXT:    image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v[5:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    ; return to shader part epilog
+; GFX11-FAKE16-LABEL: sample_c_d_o_2darray...
[truncated]

@broxigarchen broxigarchen marked this pull request as draft March 14, 2025 02:43
@broxigarchen broxigarchen marked this pull request as ready for review March 14, 2025 02:44
@broxigarchen broxigarchen merged commit 0f1175c into llvm:main Mar 14, 2025
15 of 17 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants