-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][GFX12] Add support for new block ls instructions #96273
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][GFX12] Add support for new block ls instructions #96273
Conversation
Add MC layer support for new instructions: GLOBAL_LOAD_BLOCK GLOBAL_STORE_BLOCK SCRATCH_LOAD_BLOCK SCRATCH_STORE_BLOCK
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Mariusz Sikora (mariusz-sikora-at-amd) ChangesAdd MC layer support for new instructions: GLOBAL_LOAD_BLOCK Full diff: https://github.com/llvm/llvm-project/pull/96273.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 3d2a74adab965..8ecbd62903a24 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -966,6 +966,15 @@ defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_s
defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
let SubtargetPredicate = isGFX12Plus in {
+ let Uses = [EXEC, M0] in {
+ defm GLOBAL_LOAD_BLOCK : FLAT_Global_Load_Pseudo <"global_load_block", VReg_1024>;
+ defm GLOBAL_STORE_BLOCK : FLAT_Global_Store_Pseudo <"global_store_block", VReg_1024>;
+ }
+ let Uses = [EXEC, FLAT_SCR, M0] in {
+ defm SCRATCH_LOAD_BLOCK : FLAT_Scratch_Load_Pseudo <"scratch_load_block", VReg_1024>;
+ defm SCRATCH_STORE_BLOCK : FLAT_Scratch_Store_Pseudo <"scratch_store_block", VReg_1024>;
+ }
+
let WaveSizePredicate = isWave32 in {
let Mnemonic = "global_load_tr_b128" in
defm GLOBAL_LOAD_TR_B128_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w32", VReg_128>;
@@ -2658,6 +2667,8 @@ defm GLOBAL_STORE_BYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x024, "global_s
defm GLOBAL_STORE_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">;
defm GLOBAL_LOAD_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">;
defm GLOBAL_STORE_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">;
+defm GLOBAL_LOAD_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x053>;
+defm GLOBAL_STORE_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x054>;
defm GLOBAL_ATOMIC_SWAP : VGLOBAL_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">;
defm GLOBAL_ATOMIC_CMPSWAP : VGLOBAL_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">;
@@ -2728,3 +2739,6 @@ defm SCRATCH_LOAD_SBYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x22, "scratch_
defm SCRATCH_LOAD_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x23, "scratch_load_d16_hi_b16">;
defm SCRATCH_STORE_BYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x24, "scratch_store_d16_hi_b8">;
defm SCRATCH_STORE_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_store_d16_hi_b16">;
+
+defm SCRATCH_LOAD_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x53>;
+defm SCRATCH_STORE_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x54>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 80c623514bda1..c64b3a7c356f2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2230,7 +2230,8 @@ class getLdStRegisterOperand<RegisterClass RC> {
!eq(RC.Size, 64) : AVLdSt_64,
!eq(RC.Size, 96) : AVLdSt_96,
!eq(RC.Size, 128) : AVLdSt_128,
- !eq(RC.Size, 160) : AVLdSt_160);
+ !eq(RC.Size, 160) : AVLdSt_160,
+ !eq(RC.Size, 1024) : AVLdSt_1024);
}
class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index a8efe2b2ba35e..f1d9aec163635 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1344,6 +1344,7 @@ def AVLdSt_64 : AVLdStOperand<AV_64, "OPW64">;
def AVLdSt_96 : AVLdStOperand<AV_96, "OPW96">;
def AVLdSt_128 : AVLdStOperand<AV_128, "OPW128">;
def AVLdSt_160 : AVLdStOperand<AV_160, "OPW160">;
+def AVLdSt_1024 : AVLdStOperand<AV_1024, "OPW1024">;
//===----------------------------------------------------------------------===//
// ACSrc_* Operands with an AGPR or an inline constant
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
index 575bc1a8255a6..30bfaff8f17aa 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vflat.s
@@ -1896,6 +1896,27 @@ global_load_u8 v1, v[0:1], off offset:64
global_load_u8 v1, v[3:4], off
// GFX12: encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
+global_load_block v[9:40], v0, s[0:1] offset:-64
+// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_block v[9:40], v0, s[0:1] offset:64
+// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_block v[9:40], v5, s[2:3]
+// GFX12: encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+
+global_load_block v[9:40], v[0:1], off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+global_load_block v[9:40], v[0:1], off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+global_load_block v[9:40], v[5:6], off
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+
+global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00]
+
global_store_addtid_b32 v2, off offset:-64
// GFX12: encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
@@ -2058,6 +2079,27 @@ global_store_d16_hi_b8 v[0:1], v2, off offset:64
global_store_d16_hi_b8 v[3:4], v1, off
// GFX12: encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00]
+global_store_block v0, v[2:33], s[0:1] offset:-64
+// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+global_store_block v0, v[2:33], s[0:1] offset:64
+// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+global_store_block v1, v[3:34], s[2:3]
+// GFX12: encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+
+global_store_block v[0:1], v[2:33], off offset:-64
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+global_store_block v[0:1], v[2:33], off offset:64
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+global_store_block v[1:2], v[3:34], off
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+
+global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE
+// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00]
+
global_inv
// GFX12: encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
@@ -2490,6 +2532,36 @@ scratch_load_u8 v1, v0, s0 offset:64
scratch_load_u8 v1, v2, s1
// GFX12: encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+scratch_load_block v[3:34], off, off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], off, off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], off, s0 offset:-64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], off, s0 offset:64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], v0, off offset:-64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], v0, off offset:64
+// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], v0, s0 offset:-64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+
+scratch_load_block v[3:34], v0, s0 offset:64
+// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+
+scratch_load_block v[3:34], v2, s1
+// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+
+scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE
+// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00]
+
scratch_store_b128 off, v[2:5], off offset:-64
// GFX12: encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
@@ -2732,3 +2804,33 @@ scratch_store_d16_hi_b8 v0, v2, s0 offset:64
scratch_store_d16_hi_b8 v1, v2, s3
// GFX12: encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
+
+scratch_store_block off, v[2:33], off offset:-64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block off, v[2:33], off offset:64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block off, v[2:33], s0 offset:-64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block off, v[2:33], s0 offset:64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block v0, v[2:33], off offset:-64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block v0, v[2:33], off offset:64
+// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block v0, v[2:33], s0 offset:-64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+
+scratch_store_block v0, v[2:33], s0 offset:64
+// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+
+scratch_store_block v1, v[2:33], s3
+// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
+
+scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE
+// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
index e0b658b1fda3b..7953e0eb67c1b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt
@@ -1155,6 +1155,27 @@
# GFX12: global_load_u8 v1, v[3:4], off ; encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00
+# GFX12: global_load_block v[9:40], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: global_load_block v[9:40], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v5, s[2:3] ; encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: global_load_block v[9:40], v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v[5:6], off ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00
+
+# GFX12: global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00]
+0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00
+
# GFX12: global_store_addtid_b32 v2, off offset:64 ; encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
@@ -1257,6 +1278,27 @@
# GFX12: global_store_d16_hi_b8 v[3:4], v1, off ; encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00]
0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00
+# GFX12: global_store_block v0, v[2:33], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: global_store_block v0, v[2:33], s[0:1] offset:64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: global_store_block v1, v[3:34], s[2:3] ; encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: global_store_block v[0:1], v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: global_store_block v[0:1], v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: global_store_block v[1:2], v[3:34], off ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
+0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00]
+0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00
+
# GFX12: global_inv ; encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
@@ -1518,6 +1560,36 @@
# GFX12: scratch_load_u8 v1, v2, s1 ; encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+# GFX12: scratch_load_block v[3:34], off, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], off, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], off, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], off, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v0, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], v0, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_load_block v[3:34], v0, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
+0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v2, s1 ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
+0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00
+
+# GFX12: scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00]
+0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00
+
# GFX12: scratch_store_b128 off, v[2:5], off offset:64 ; encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
@@ -1658,3 +1730,33 @@
# GFX12: scratch_store_d16_hi_b8 v1, v2, s3 ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: scratch_store_block off, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block off, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block off, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
+0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block off, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
+0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block v0, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block v0, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block v0, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
+0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff
+
+# GFX12: scratch_store_block v0, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
+0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00
+
+# GFX12: scratch_store_block v1, v[2:33], s3 ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
+0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
+
+# GFX12: scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00]
+0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Add MC layer support for new instructions: GLOBAL_LOAD_BLOCK GLOBAL_STORE_BLOCK SCRATCH_LOAD_BLOCK SCRATCH_STORE_BLOCK Co-authored-by: Piotr Sobczak <[email protected]>
Add MC layer support for new instructions:
GLOBAL_LOAD_BLOCK
GLOBAL_STORE_BLOCK
SCRATCH_LOAD_BLOCK
SCRATCH_STORE_BLOCK