-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Add more freeze codegen tests #131843
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Add more freeze codegen tests #131843
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesPatch is 88.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131843.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll
index 22427ee344d91..42d6e57585345 100644
--- a/llvm/test/CodeGen/AMDGPU/freeze.ll
+++ b/llvm/test/CodeGen/AMDGPU/freeze.ll
@@ -1854,3 +1854,1966 @@ define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
store i256 %freeze, ptr addrspace(1) %ptrb, align 4
ret void
}
+
+define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_short v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load i16, ptr addrspace(1) %ptra
+ %freeze = freeze i16 %a
+ store i16 %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v2i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <2 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <2 x i16> %a
+ store <2 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v3i16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v3i16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v3i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v3i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <3 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <3 x i16> %a
+ store <3 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v4i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v4i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <4 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <4 x i16> %a
+ store <4 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v8i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v8i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v8i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <8 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <8 x i16> %a
+ store <8 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v16i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v16i16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: s_clause 0x1
+; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v16i16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_clause 0x1
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v16i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
+; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v16i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <16 x i16>, ptr addrspace(1) %ptra
+ %freeze = freeze <16 x i16> %a
+ store <16 x i16> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_short v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load half, ptr addrspace(1) %ptra
+ %freeze = freeze half %a
+ store half %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v2f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v2f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v2f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <2 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <2 x half> %a
+ store <2 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v3f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v3f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v3f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v3f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v3f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <3 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <3 x half> %a
+ store <3 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v4f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v4f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v4f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <4 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <4 x half> %a
+ store <4 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v8f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v8f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v8f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <8 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <8 x half> %a
+ store <8 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v16f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v16f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: s_clause 0x1
+; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v16f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_clause 0x1
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v16f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
+; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v16f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <16 x half>, ptr addrspace(1) %ptra
+ %freeze = freeze <16 x half> %a
+ store <16 x half> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_bf16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_ushort v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_short v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_bf16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_u16 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load bfloat, ptr addrspace(1) %ptra
+ %freeze = freeze bfloat %a
+ store bfloat %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v2bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v2bf16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v0, v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v2bf16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v0, v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = load <2 x bfloat>, ptr addrspace(1) %ptra
+ %freeze = freeze <2 x bfloat> %a
+ store <2 x bfloat> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v3bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-SDAG-LABEL: freeze_v3bf16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-SDAG-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: freeze_v3bf16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v0, off
+; GFX10-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2
+; GFX10-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: freeze_v3bf16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: freeze_v3bf16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: s_clause 0x2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-GISEL-NEXT: global_store_d16_hi_b16 v[2:3], v0, off offset:2
+; GFX11-GISEL-NEXT: global_store_b16 v[2:3], v1, off offset:4
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %a = load <3 x bfloat>, ptr addrspace(1) %ptra
+ %freeze = freeze <3 x bfloat> %a
+ store <3 x bfloat> %freeze, ptr addrspace(1) %ptrb
+ ret void
+}
+
+define void @freeze_v4bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
+; GFX10-LABEL: freeze_v4bf16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: freeze_v4bf16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %a = ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good. This would have caught the missing case handling in the half promotion ...
ded45cf
to
7a456ba
Compare
No description provided.