AMDGPU: Implement tensor_save and tensor_stop for gfx1250 #146590

changpeng · 2025-07-01T18:55:40Z

MC layer only.

llvmbot · 2025-07-01T18:56:09Z

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Changpeng Fang (changpeng)

Changes

MC layer only.

Full diff: https://github.com/llvm/llvm-project/pull/146590.diff

3 Files Affected:

(modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+26)
(added) llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s (+22)
(added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt (+16)

diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9ae98de039e34..3625db9a4791f 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -392,6 +392,23 @@ multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass
     GlobalSaddrTable<1, opName>;
 }
 
+class FLAT_Global_Tensor_Pseudo<string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
+  opName,
+  (outs ),
+  !con(!if(EnableSaddr, (ins SReg_64:$saddr, flat_offset:$offset), (ins )), (ins CPol_0:$cpol)),
+  !if(EnableSaddr, " $saddr$offset", " ")#"$cpol"> {
+
+  let is_flat_global = 1;
+  let has_vdst = 0;
+  let has_data = 0;
+  let has_vaddr = 0;
+  let mayLoad = 0;
+  let mayStore = 1;
+  let has_saddr = 1;
+  let enabled_saddr = EnableSaddr;
+  let has_offset = EnableSaddr;
+}
+
 class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = null_frag> :
   FLAT_Pseudo<opName, (outs), (ins CPol:$cpol), "$cpol", [(node)]> {
 
@@ -1052,6 +1069,12 @@ let SubtargetPredicate = isGFX12Plus in {
   def GLOBAL_WBINV  : FLAT_Global_Invalidate_Writeback<"global_wbinv">;
 } // End SubtargetPredicate = isGFX12Plus
 
+let SubtargetPredicate = isGFX1250Plus in {
+
+def TENSOR_SAVE : FLAT_Global_Tensor_Pseudo<"tensor_save", 1>;
+def TENSOR_STOP : FLAT_Global_Tensor_Pseudo<"tensor_stop">;
+} // End SubtargetPredicate = isGFX1250Plus
+
 defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
 defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
 defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
@@ -3061,6 +3084,9 @@ multiclass VFLAT_Real_Atomics_gfx1250<bits<8> op, string name = get_FLAT_ps<NAME
   VFLAT_Real_RTN_gfx1250<op, name>,
   VFLAT_Real_SADDR_RTN_gfx1250<op, name>;
 
+defm TENSOR_SAVE                      : VFLAT_Real_gfx1250<0x06e>;
+defm TENSOR_STOP                      : VFLAT_Real_gfx1250<0x06f>;
+
 defm GLOBAL_LOAD_TR_B128_w32          : VFLAT_Real_AllAddr_gfx1250<0x057, "global_load_tr16_b128">;
 defm GLOBAL_LOAD_TR_B64_w32           : VFLAT_Real_AllAddr_gfx1250<0x058, "global_load_tr8_b64">;
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
new file mode 100644
index 0000000000000..07b4055f0ab9c
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+tensor_save s[0:1]
+// GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+tensor_save s[0:1] offset:32
+// GFX1250: tensor_save s[0:1] offset:32 ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+tensor_stop
+// GFX1250: tensor_stop ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
new file mode 100644
index 0000000000000..6421c6f30e177
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+
+# GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
+0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: tensor_save s[0:1] offset:32 ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00]
+0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00
+
+# GFX1250: tensor_stop ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00

shiltian · 2025-07-01T19:49:42Z

llvm/lib/Target/AMDGPU/FLATInstructions.td

@@ -1052,6 +1069,12 @@ let SubtargetPredicate = isGFX12Plus in {
  def GLOBAL_WBINV  : FLAT_Global_Invalidate_Writeback<"global_wbinv">;
 } // End SubtargetPredicate = isGFX12Plus

+let SubtargetPredicate = isGFX1250Plus in {
+


Is this blank line intended?

Is this blank line intended?

Yes. There is asymmetric empty lines for "let SubtargetPredicate = isGFX1250Plu", just want to avoid merge conflict.
Or we can fix the downstream.

AMDGPU: Implement tensor_save and tensor_stop for gfx1250

69f8ff4

MC layer only.

llvmbot added backend:AMDGPU mc Machine (object) code labels Jul 1, 2025

changpeng requested review from shiltian and rampitec July 1, 2025 18:55

shiltian approved these changes Jul 1, 2025

View reviewed changes

changpeng merged commit d99b146 into llvm:main Jul 1, 2025
10 checks passed

changpeng deleted the tensor branch July 1, 2025 21:29

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AMDGPU: Implement tensor_save and tensor_stop for gfx1250 #146590

AMDGPU: Implement tensor_save and tensor_stop for gfx1250 #146590

Uh oh!

changpeng commented Jul 1, 2025

Uh oh!

llvmbot commented Jul 1, 2025 •

edited

Loading

Uh oh!

shiltian Jul 1, 2025

Uh oh!

changpeng Jul 1, 2025

Uh oh!

Uh oh!

Uh oh!

AMDGPU: Implement tensor_save and tensor_stop for gfx1250 #146590

AMDGPU: Implement tensor_save and tensor_stop for gfx1250 #146590

Uh oh!

Conversation

changpeng commented Jul 1, 2025

Uh oh!

llvmbot commented Jul 1, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

shiltian Jul 1, 2025

Choose a reason for hiding this comment

Uh oh!

changpeng Jul 1, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Jul 1, 2025 •

edited

Loading