Skip to content

Commit d99b146

Browse files
authored
AMDGPU: Implement tensor_save and tensor_stop for gfx1250 (#146590)
MC layer only.
1 parent bcbc440 commit d99b146

File tree

3 files changed

+64
-0
lines changed

3 files changed

+64
-0
lines changed

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,23 @@ multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass
392392
GlobalSaddrTable<1, opName>;
393393
}
394394

395+
class FLAT_Global_Tensor_Pseudo<string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
396+
opName,
397+
(outs ),
398+
!con(!if(EnableSaddr, (ins SReg_64:$saddr, flat_offset:$offset), (ins )), (ins CPol_0:$cpol)),
399+
!if(EnableSaddr, " $saddr$offset", " ")#"$cpol"> {
400+
401+
let is_flat_global = 1;
402+
let has_vdst = 0;
403+
let has_data = 0;
404+
let has_vaddr = 0;
405+
let mayLoad = 0;
406+
let mayStore = 1;
407+
let has_saddr = 1;
408+
let enabled_saddr = EnableSaddr;
409+
let has_offset = EnableSaddr;
410+
}
411+
395412
class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = null_frag> :
396413
FLAT_Pseudo<opName, (outs), (ins CPol:$cpol), "$cpol", [(node)]> {
397414

@@ -1052,6 +1069,12 @@ let SubtargetPredicate = isGFX12Plus in {
10521069
def GLOBAL_WBINV : FLAT_Global_Invalidate_Writeback<"global_wbinv">;
10531070
} // End SubtargetPredicate = isGFX12Plus
10541071

1072+
let SubtargetPredicate = isGFX1250Plus in {
1073+
1074+
def TENSOR_SAVE : FLAT_Global_Tensor_Pseudo<"tensor_save", 1>;
1075+
def TENSOR_STOP : FLAT_Global_Tensor_Pseudo<"tensor_stop">;
1076+
} // End SubtargetPredicate = isGFX1250Plus
1077+
10551078
defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
10561079
defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
10571080
defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
@@ -3061,6 +3084,9 @@ multiclass VFLAT_Real_Atomics_gfx1250<bits<8> op, string name = get_FLAT_ps<NAME
30613084
VFLAT_Real_RTN_gfx1250<op, name>,
30623085
VFLAT_Real_SADDR_RTN_gfx1250<op, name>;
30633086

3087+
defm TENSOR_SAVE : VFLAT_Real_gfx1250<0x06e>;
3088+
defm TENSOR_STOP : VFLAT_Real_gfx1250<0x06f>;
3089+
30643090
defm GLOBAL_LOAD_TR_B128_w32 : VFLAT_Real_AllAddr_gfx1250<0x057, "global_load_tr16_b128">;
30653091
defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "global_load_tr8_b64">;
30663092

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
2+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
3+
4+
tensor_save s[0:1]
5+
// GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
6+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
7+
8+
tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS
9+
// GFX1250: tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
10+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
11+
12+
tensor_save s[0:1] offset:32
13+
// GFX1250: tensor_save s[0:1] offset:32 ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00]
14+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
15+
16+
tensor_stop
17+
// GFX1250: tensor_stop ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
18+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
19+
20+
tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS
21+
// GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
22+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
2+
3+
# GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
4+
0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
5+
6+
# GFX1250: tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
7+
0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00
8+
9+
# GFX1250: tensor_save s[0:1] offset:32 ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00]
10+
0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00
11+
12+
# GFX1250: tensor_stop ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
13+
0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
14+
15+
# GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
16+
0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00

0 commit comments

Comments
 (0)