Skip to content

AMDGPU: Add more freeze codegen tests #131843

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 19, 2025

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Mar 18, 2025

No description provided.

Copy link
Contributor Author

arsenm commented Mar 18, 2025

This stack of pull requests is managed by Graphite. Learn more about stacking.

@arsenm arsenm requested a review from frederik-h March 18, 2025 16:17
@arsenm arsenm marked this pull request as ready for review March 18, 2025 16:17
@llvmbot
Copy link
Member

llvmbot commented Mar 18, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 88.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131843.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/freeze.ll (+1963)
diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll
index 22427ee344d91..42d6e57585345 100644
--- a/llvm/test/CodeGen/AMDGPU/freeze.ll
+++ b/llvm/test/CodeGen/AMDGPU/freeze.ll
@@ -1854,3 +1854,1966 @@ define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
   store i256 %freeze, ptr addrspace(1) %ptrb, align 4
   ret void
 }
+
+define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_short v[2:3], v0, off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load i16, ptr addrspace(1) %ptra
+  %freeze = freeze i16 %a
+  store i16 %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v2i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v2i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dword v[2:3], v0, off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v2i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <2 x i16>, ptr addrspace(1) %ptra
+  %freeze = freeze <2 x i16> %a
+  store <2 x i16> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v3i16:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    global_store_short v[2:3], v1, off offset:4
+; GFX10-SDAG-NEXT:    global_store_dword v[2:3], v0, off
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v3i16:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    global_store_short v[2:3], v0, off
+; GFX10-GISEL-NEXT:    global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX10-GISEL-NEXT:    global_store_short v[2:3], v1, off offset:4
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v3i16:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b64 v[0:1], v[0:1], off
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    global_store_b16 v[2:3], v1, off offset:4
+; GFX11-SDAG-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v3i16:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b64 v[0:1], v[0:1], off
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    s_clause 0x2
+; GFX11-GISEL-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX11-GISEL-NEXT:    global_store_d16_hi_b16 v[2:3], v0, off offset:2
+; GFX11-GISEL-NEXT:    global_store_b16 v[2:3], v1, off offset:4
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <3 x i16>, ptr addrspace(1) %ptra
+  %freeze = freeze <3 x i16> %a
+  store <3 x i16> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v4i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v4i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <4 x i16>, ptr addrspace(1) %ptra
+  %freeze = freeze <4 x i16> %a
+  store <4 x i16> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v8i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v8i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v8i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_b128 v[4:7], v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <8 x i16>, ptr addrspace(1) %ptra
+  %freeze = freeze <8 x i16> %a
+  store <8 x i16> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v16i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v16i16:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    s_clause 0x1
+; GFX10-SDAG-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX10-SDAG-NEXT:    global_load_dwordx4 v[8:11], v[0:1], off
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-SDAG-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off offset:16
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    global_store_dwordx4 v[2:3], v[8:11], off
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v16i16:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_clause 0x1
+; GFX10-GISEL-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-GISEL-NEXT:    global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-GISEL-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v16i16:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    global_load_b128 v[4:7], v[0:1], off offset:16
+; GFX11-SDAG-NEXT:    global_load_b128 v[8:11], v[0:1], off
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-SDAG-NEXT:    global_store_b128 v[2:3], v[4:7], off offset:16
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_store_b128 v[2:3], v[8:11], off
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v16i16:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    global_load_b128 v[4:7], v[0:1], off
+; GFX11-GISEL-NEXT:    global_load_b128 v[8:11], v[0:1], off offset:16
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-GISEL-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <16 x i16>, ptr addrspace(1) %ptra
+  %freeze = freeze <16 x i16> %a
+  store <16 x i16> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_short v[2:3], v0, off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load half, ptr addrspace(1) %ptra
+  %freeze = freeze half %a
+  store half %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v2f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v2f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dword v[2:3], v0, off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v2f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <2 x half>, ptr addrspace(1) %ptra
+  %freeze = freeze <2 x half> %a
+  store <2 x half> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v3f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v3f16:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    global_store_short v[2:3], v1, off offset:4
+; GFX10-SDAG-NEXT:    global_store_dword v[2:3], v0, off
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v3f16:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    global_store_short v[2:3], v0, off
+; GFX10-GISEL-NEXT:    global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX10-GISEL-NEXT:    global_store_short v[2:3], v1, off offset:4
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v3f16:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b64 v[0:1], v[0:1], off
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    global_store_b16 v[2:3], v1, off offset:4
+; GFX11-SDAG-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v3f16:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b64 v[0:1], v[0:1], off
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    s_clause 0x2
+; GFX11-GISEL-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX11-GISEL-NEXT:    global_store_d16_hi_b16 v[2:3], v0, off offset:2
+; GFX11-GISEL-NEXT:    global_store_b16 v[2:3], v1, off offset:4
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <3 x half>, ptr addrspace(1) %ptra
+  %freeze = freeze <3 x half> %a
+  store <3 x half> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v4f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v4f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v4f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <4 x half>, ptr addrspace(1) %ptra
+  %freeze = freeze <4 x half> %a
+  store <4 x half> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v8f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v8f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v8f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_b128 v[4:7], v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <8 x half>, ptr addrspace(1) %ptra
+  %freeze = freeze <8 x half> %a
+  store <8 x half> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v16f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v16f16:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    s_clause 0x1
+; GFX10-SDAG-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX10-SDAG-NEXT:    global_load_dwordx4 v[8:11], v[0:1], off
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-SDAG-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off offset:16
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    global_store_dwordx4 v[2:3], v[8:11], off
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v16f16:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_clause 0x1
+; GFX10-GISEL-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-GISEL-NEXT:    global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GFX10-GISEL-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v16f16:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    global_load_b128 v[4:7], v[0:1], off offset:16
+; GFX11-SDAG-NEXT:    global_load_b128 v[8:11], v[0:1], off
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-SDAG-NEXT:    global_store_b128 v[2:3], v[4:7], off offset:16
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    global_store_b128 v[2:3], v[8:11], off
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v16f16:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    global_load_b128 v[4:7], v[0:1], off
+; GFX11-GISEL-NEXT:    global_load_b128 v[8:11], v[0:1], off offset:16
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-GISEL-NEXT:    global_store_b128 v[2:3], v[4:7], off
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    global_store_b128 v[2:3], v[8:11], off offset:16
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <16 x half>, ptr addrspace(1) %ptra
+  %freeze = freeze <16 x half> %a
+  store <16 x half> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_bf16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_short v[2:3], v0, off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_bf16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load bfloat, ptr addrspace(1) %ptra
+  %freeze = freeze bfloat %a
+  store bfloat %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v2bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v2bf16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dword v0, v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dword v[2:3], v0, off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v2bf16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <2 x bfloat>, ptr addrspace(1) %ptra
+  %freeze = freeze <2 x bfloat> %a
+  store <2 x bfloat> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v3bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v3bf16:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT:    global_store_short v[2:3], v1, off offset:4
+; GFX10-SDAG-NEXT:    global_store_dword v[2:3], v0, off
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v3bf16:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT:    global_store_short v[2:3], v0, off
+; GFX10-GISEL-NEXT:    global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX10-GISEL-NEXT:    global_store_short v[2:3], v1, off offset:4
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v3bf16:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    global_load_b64 v[0:1], v[0:1], off
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    global_store_b16 v[2:3], v1, off offset:4
+; GFX11-SDAG-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v3bf16:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_load_b64 v[0:1], v[0:1], off
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT:    s_clause 0x2
+; GFX11-GISEL-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX11-GISEL-NEXT:    global_store_d16_hi_b16 v[2:3], v0, off offset:2
+; GFX11-GISEL-NEXT:    global_store_b16 v[2:3], v1, off offset:4
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %a = load <3 x bfloat>, ptr addrspace(1) %ptra
+  %freeze = freeze <3 x bfloat> %a
+  store <3 x bfloat> %freeze, ptr addrspace(1) %ptrb
+  ret void
+}
+
+define void @freeze_v4bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v4bf16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v4bf16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %a = ...
[truncated]

Copy link
Contributor

@frederik-h frederik-h left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good. This would have caught the missing case handling in the half promotion ...

@arsenm arsenm force-pushed the users/arsenm/amdgpu/add-more-freeze-codegen-tests branch from ded45cf to 7a456ba Compare March 19, 2025 00:51
Copy link
Contributor Author

arsenm commented Mar 19, 2025

Merge activity

  • Mar 18, 11:16 PM EDT: A user started a stack merge that includes this pull request via Graphite.
  • Mar 18, 11:18 PM EDT: A user merged this pull request with Graphite.

@arsenm arsenm merged commit 5ac680c into main Mar 19, 2025
8 of 10 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/add-more-freeze-codegen-tests branch March 19, 2025 03:18
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants