-
Notifications
You must be signed in to change notification settings - Fork 14.4k
AMDGPU: Implement tensor_save and tensor_stop for gfx1250 #146590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
MC layer only.
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Changpeng Fang (changpeng) ChangesMC layer only. Full diff: https://github.com/llvm/llvm-project/pull/146590.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9ae98de039e34..3625db9a4791f 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -392,6 +392,23 @@ multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass
GlobalSaddrTable<1, opName>;
}
+class FLAT_Global_Tensor_Pseudo<string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
+ opName,
+ (outs ),
+ !con(!if(EnableSaddr, (ins SReg_64:$saddr, flat_offset:$offset), (ins )), (ins CPol_0:$cpol)),
+ !if(EnableSaddr, " $saddr$offset", " ")#"$cpol"> {
+
+ let is_flat_global = 1;
+ let has_vdst = 0;
+ let has_data = 0;
+ let has_vaddr = 0;
+ let mayLoad = 0;
+ let mayStore = 1;
+ let has_saddr = 1;
+ let enabled_saddr = EnableSaddr;
+ let has_offset = EnableSaddr;
+}
+
class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = null_frag> :
FLAT_Pseudo<opName, (outs), (ins CPol:$cpol), "$cpol", [(node)]> {
@@ -1052,6 +1069,12 @@ let SubtargetPredicate = isGFX12Plus in {
def GLOBAL_WBINV : FLAT_Global_Invalidate_Writeback<"global_wbinv">;
} // End SubtargetPredicate = isGFX12Plus
+let SubtargetPredicate = isGFX1250Plus in {
+
+def TENSOR_SAVE : FLAT_Global_Tensor_Pseudo<"tensor_save", 1>;
+def TENSOR_STOP : FLAT_Global_Tensor_Pseudo<"tensor_stop">;
+} // End SubtargetPredicate = isGFX1250Plus
+
defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
@@ -3061,6 +3084,9 @@ multiclass VFLAT_Real_Atomics_gfx1250<bits<8> op, string name = get_FLAT_ps<NAME
VFLAT_Real_RTN_gfx1250<op, name>,
VFLAT_Real_SADDR_RTN_gfx1250<op, name>;
+defm TENSOR_SAVE : VFLAT_Real_gfx1250<0x06e>;
+defm TENSOR_STOP : VFLAT_Real_gfx1250<0x06f>;
+
defm GLOBAL_LOAD_TR_B128_w32 : VFLAT_Real_AllAddr_gfx1250<0x057, "global_load_tr16_b128">;
defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "global_load_tr8_b64">;
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
new file mode 100644
index 0000000000000..07b4055f0ab9c
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
+
+tensor_save s[0:1]
+// GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+tensor_save s[0:1] offset:32
+// GFX1250: tensor_save s[0:1] offset:32 ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+tensor_stop
+// GFX1250: tensor_stop ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
new file mode 100644
index 0000000000000..6421c6f30e177
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+
+# GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: tensor_save s[0:1] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
+0x00,0x80,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: tensor_save s[0:1] offset:32 ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00]
+0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x00
+
+# GFX1250: tensor_stop ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+
+# GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
+0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00
|
@@ -1052,6 +1069,12 @@ let SubtargetPredicate = isGFX12Plus in { | |||
def GLOBAL_WBINV : FLAT_Global_Invalidate_Writeback<"global_wbinv">; | |||
} // End SubtargetPredicate = isGFX12Plus | |||
|
|||
let SubtargetPredicate = isGFX1250Plus in { | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this blank line intended?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this blank line intended?
Yes. There is asymmetric empty lines for "let SubtargetPredicate = isGFX1250Plu", just want to avoid merge conflict.
Or we can fix the downstream.
MC layer only.