Skip to content

Commit fbf0ca6

Browse files
[AMDGPU][GFX12] Add support for new block ls instructions (#96273)
Add MC layer support for new instructions: GLOBAL_LOAD_BLOCK GLOBAL_STORE_BLOCK SCRATCH_LOAD_BLOCK SCRATCH_STORE_BLOCK Co-authored-by: Piotr Sobczak <[email protected]>
1 parent f82a595 commit fbf0ca6

File tree

5 files changed

+221
-1
lines changed

5 files changed

+221
-1
lines changed

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,15 @@ defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_s
966966
defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
967967

968968
let SubtargetPredicate = isGFX12Plus in {
969+
let Uses = [EXEC, M0] in {
970+
defm GLOBAL_LOAD_BLOCK : FLAT_Global_Load_Pseudo <"global_load_block", VReg_1024>;
971+
defm GLOBAL_STORE_BLOCK : FLAT_Global_Store_Pseudo <"global_store_block", VReg_1024>;
972+
}
973+
let Uses = [EXEC, FLAT_SCR, M0] in {
974+
defm SCRATCH_LOAD_BLOCK : FLAT_Scratch_Load_Pseudo <"scratch_load_block", VReg_1024>;
975+
defm SCRATCH_STORE_BLOCK : FLAT_Scratch_Store_Pseudo <"scratch_store_block", VReg_1024>;
976+
}
977+
969978
let WaveSizePredicate = isWave32 in {
970979
let Mnemonic = "global_load_tr_b128" in
971980
defm GLOBAL_LOAD_TR_B128_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w32", VReg_128>;
@@ -2658,6 +2667,8 @@ defm GLOBAL_STORE_BYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x024, "global_s
26582667
defm GLOBAL_STORE_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">;
26592668
defm GLOBAL_LOAD_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">;
26602669
defm GLOBAL_STORE_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">;
2670+
defm GLOBAL_LOAD_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x053>;
2671+
defm GLOBAL_STORE_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x054>;
26612672

26622673
defm GLOBAL_ATOMIC_SWAP : VGLOBAL_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">;
26632674
defm GLOBAL_ATOMIC_CMPSWAP : VGLOBAL_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">;
@@ -2728,3 +2739,6 @@ defm SCRATCH_LOAD_SBYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x22, "scratch_
27282739
defm SCRATCH_LOAD_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x23, "scratch_load_d16_hi_b16">;
27292740
defm SCRATCH_STORE_BYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x24, "scratch_store_d16_hi_b8">;
27302741
defm SCRATCH_STORE_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_store_d16_hi_b16">;
2742+
2743+
defm SCRATCH_LOAD_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x53>;
2744+
defm SCRATCH_STORE_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x54>;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2230,7 +2230,8 @@ class getLdStRegisterOperand<RegisterClass RC> {
22302230
!eq(RC.Size, 64) : AVLdSt_64,
22312231
!eq(RC.Size, 96) : AVLdSt_96,
22322232
!eq(RC.Size, 128) : AVLdSt_128,
2233-
!eq(RC.Size, 160) : AVLdSt_160);
2233+
!eq(RC.Size, 160) : AVLdSt_160,
2234+
!eq(RC.Size, 1024) : AVLdSt_1024);
22342235
}
22352236

22362237
class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,6 +1344,7 @@ def AVLdSt_64 : AVLdStOperand<AV_64, "OPW64">;
13441344
def AVLdSt_96 : AVLdStOperand<AV_96, "OPW96">;
13451345
def AVLdSt_128 : AVLdStOperand<AV_128, "OPW128">;
13461346
def AVLdSt_160 : AVLdStOperand<AV_160, "OPW160">;
1347+
def AVLdSt_1024 : AVLdStOperand<AV_1024, "OPW1024">;
13471348

13481349
//===----------------------------------------------------------------------===//
13491350
// ACSrc_* Operands with an AGPR or an inline constant

llvm/test/MC/AMDGPU/gfx12_asm_vflat.s

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1896,6 +1896,27 @@ global_load_u8 v1, v[0:1], off offset:64
18961896
global_load_u8 v1, v[3:4], off
18971897
// GFX12: encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
18981898

1899+
global_load_block v[9:40], v0, s[0:1] offset:-64
1900+
// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
1901+
1902+
global_load_block v[9:40], v0, s[0:1] offset:64
1903+
// GFX12: encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
1904+
1905+
global_load_block v[9:40], v5, s[2:3]
1906+
// GFX12: encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
1907+
1908+
global_load_block v[9:40], v[0:1], off offset:-64
1909+
// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
1910+
1911+
global_load_block v[9:40], v[0:1], off offset:64
1912+
// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
1913+
1914+
global_load_block v[9:40], v[5:6], off
1915+
// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
1916+
1917+
global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE
1918+
// GFX12: encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00]
1919+
18991920
global_store_addtid_b32 v2, off offset:-64
19001921
// GFX12: encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
19011922

@@ -2058,6 +2079,27 @@ global_store_d16_hi_b8 v[0:1], v2, off offset:64
20582079
global_store_d16_hi_b8 v[3:4], v1, off
20592080
// GFX12: encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00]
20602081

2082+
global_store_block v0, v[2:33], s[0:1] offset:-64
2083+
// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
2084+
2085+
global_store_block v0, v[2:33], s[0:1] offset:64
2086+
// GFX12: encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
2087+
2088+
global_store_block v1, v[3:34], s[2:3]
2089+
// GFX12: encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
2090+
2091+
global_store_block v[0:1], v[2:33], off offset:-64
2092+
// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
2093+
2094+
global_store_block v[0:1], v[2:33], off offset:64
2095+
// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
2096+
2097+
global_store_block v[1:2], v[3:34], off
2098+
// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
2099+
2100+
global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE
2101+
// GFX12: encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00]
2102+
20612103
global_inv
20622104
// GFX12: encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
20632105

@@ -2490,6 +2532,36 @@ scratch_load_u8 v1, v0, s0 offset:64
24902532
scratch_load_u8 v1, v2, s1
24912533
// GFX12: encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
24922534

2535+
scratch_load_block v[3:34], off, off offset:-64
2536+
// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
2537+
2538+
scratch_load_block v[3:34], off, off offset:64
2539+
// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
2540+
2541+
scratch_load_block v[3:34], off, s0 offset:-64
2542+
// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
2543+
2544+
scratch_load_block v[3:34], off, s0 offset:64
2545+
// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
2546+
2547+
scratch_load_block v[3:34], v0, off offset:-64
2548+
// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
2549+
2550+
scratch_load_block v[3:34], v0, off offset:64
2551+
// GFX12: encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
2552+
2553+
scratch_load_block v[3:34], v0, s0 offset:-64
2554+
// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
2555+
2556+
scratch_load_block v[3:34], v0, s0 offset:64
2557+
// GFX12: encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
2558+
2559+
scratch_load_block v[3:34], v2, s1
2560+
// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
2561+
2562+
scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE
2563+
// GFX12: encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00]
2564+
24932565
scratch_store_b128 off, v[2:5], off offset:-64
24942566
// GFX12: encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
24952567

@@ -2732,3 +2804,33 @@ scratch_store_d16_hi_b8 v0, v2, s0 offset:64
27322804

27332805
scratch_store_d16_hi_b8 v1, v2, s3
27342806
// GFX12: encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
2807+
2808+
scratch_store_block off, v[2:33], off offset:-64
2809+
// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
2810+
2811+
scratch_store_block off, v[2:33], off offset:64
2812+
// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
2813+
2814+
scratch_store_block off, v[2:33], s0 offset:-64
2815+
// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
2816+
2817+
scratch_store_block off, v[2:33], s0 offset:64
2818+
// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
2819+
2820+
scratch_store_block v0, v[2:33], off offset:-64
2821+
// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
2822+
2823+
scratch_store_block v0, v[2:33], off offset:64
2824+
// GFX12: encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
2825+
2826+
scratch_store_block v0, v[2:33], s0 offset:-64
2827+
// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
2828+
2829+
scratch_store_block v0, v[2:33], s0 offset:64
2830+
// GFX12: encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
2831+
2832+
scratch_store_block v1, v[2:33], s3
2833+
// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
2834+
2835+
scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE
2836+
// GFX12: encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00]

llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vflat.txt

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,27 @@
11551155
# GFX12: global_load_u8 v1, v[3:4], off ; encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
11561156
0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00
11571157

1158+
# GFX12: global_load_block v[9:40], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
1159+
0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
1160+
1161+
# GFX12: global_load_block v[9:40], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
1162+
0x00,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00
1163+
1164+
# GFX12: global_load_block v[9:40], v5, s[2:3] ; encoding: [0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
1165+
0x02,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00
1166+
1167+
# GFX12: global_load_block v[9:40], v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
1168+
0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
1169+
1170+
# GFX12: global_load_block v[9:40], v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
1171+
0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x00,0x40,0x00,0x00
1172+
1173+
# GFX12: global_load_block v[9:40], v[5:6], off ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00]
1174+
0x7c,0xc0,0x14,0xee,0x09,0x00,0x00,0x00,0x05,0x00,0x00,0x00
1175+
1176+
# GFX12: global_load_block v[9:40], v[5:6], off th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00]
1177+
0x7c,0xc0,0x14,0xee,0x09,0x00,0x24,0x00,0x05,0x00,0x00,0x00
1178+
11581179
# GFX12: global_store_addtid_b32 v2, off offset:64 ; encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
11591180
0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
11601181

@@ -1257,6 +1278,27 @@
12571278
# GFX12: global_store_d16_hi_b8 v[3:4], v1, off ; encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00]
12581279
0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00
12591280

1281+
# GFX12: global_store_block v0, v[2:33], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
1282+
0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
1283+
1284+
# GFX12: global_store_block v0, v[2:33], s[0:1] offset:64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
1285+
0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
1286+
1287+
# GFX12: global_store_block v1, v[3:34], s[2:3] ; encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
1288+
0x02,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00
1289+
1290+
# GFX12: global_store_block v[0:1], v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
1291+
0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
1292+
1293+
# GFX12: global_store_block v[0:1], v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
1294+
0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
1295+
1296+
# GFX12: global_store_block v[1:2], v[3:34], off ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00]
1297+
0x7c,0x00,0x15,0xee,0x00,0x00,0x80,0x01,0x01,0x00,0x00,0x00
1298+
1299+
# GFX12: global_store_block v[1:2], v[3:34], off th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00]
1300+
0x7c,0x00,0x15,0xee,0x00,0x00,0xa4,0x01,0x01,0x00,0x00,0x00
1301+
12601302
# GFX12: global_inv ; encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
12611303
0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
12621304

@@ -1518,6 +1560,36 @@
15181560
# GFX12: scratch_load_u8 v1, v2, s1 ; encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
15191561
0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00
15201562

1563+
# GFX12: scratch_load_block v[3:34], off, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
1564+
0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
1565+
1566+
# GFX12: scratch_load_block v[3:34], off, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
1567+
0x7c,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00
1568+
1569+
# GFX12: scratch_load_block v[3:34], off, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
1570+
0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0xc0,0xff,0xff
1571+
1572+
# GFX12: scratch_load_block v[3:34], off, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00]
1573+
0x00,0xc0,0x14,0xed,0x03,0x00,0x00,0x00,0x00,0x40,0x00,0x00
1574+
1575+
# GFX12: scratch_load_block v[3:34], v0, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
1576+
0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff
1577+
1578+
# GFX12: scratch_load_block v[3:34], v0, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
1579+
0x7c,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00
1580+
1581+
# GFX12: scratch_load_block v[3:34], v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff]
1582+
0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0xc0,0xff,0xff
1583+
1584+
# GFX12: scratch_load_block v[3:34], v0, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00]
1585+
0x00,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x00,0x40,0x00,0x00
1586+
1587+
# GFX12: scratch_load_block v[3:34], v2, s1 ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
1588+
0x01,0xc0,0x14,0xed,0x03,0x00,0x02,0x00,0x02,0x00,0x00,0x00
1589+
1590+
# GFX12: scratch_load_block v[3:34], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00]
1591+
0x01,0xc0,0x14,0xed,0x03,0x00,0x26,0x00,0x02,0x00,0x00,0x00
1592+
15211593
# GFX12: scratch_store_b128 off, v[2:5], off offset:64 ; encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
15221594
0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
15231595

@@ -1658,3 +1730,33 @@
16581730

16591731
# GFX12: scratch_store_d16_hi_b8 v1, v2, s3 ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
16601732
0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
1733+
1734+
# GFX12: scratch_store_block off, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
1735+
0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
1736+
1737+
# GFX12: scratch_store_block off, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
1738+
0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
1739+
1740+
# GFX12: scratch_store_block off, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
1741+
0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff
1742+
1743+
# GFX12: scratch_store_block off, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
1744+
0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00
1745+
1746+
# GFX12: scratch_store_block v0, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
1747+
0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff
1748+
1749+
# GFX12: scratch_store_block v0, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
1750+
0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00
1751+
1752+
# GFX12: scratch_store_block v0, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff]
1753+
0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff
1754+
1755+
# GFX12: scratch_store_block v0, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00]
1756+
0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00
1757+
1758+
# GFX12: scratch_store_block v1, v[2:33], s3 ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00]
1759+
0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00
1760+
1761+
# GFX12: scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00]
1762+
0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00

0 commit comments

Comments
 (0)