[AMDGPU][MC] Disallow null as saddr in flat instructions #101730

jwanggit86 · 2024-08-02T18:24:25Z

Some flat instructions have an saddr operand. When 'null' is provided as saddr, it may have the same encoding as another instruction. For example, the instructions 'global_atomic_add v1, v2, null' and 'global_atomic_add v[1:2], v2, off' have the same encoding. This patch disallows having null as saddr.

llvmbot · 2024-08-02T18:24:42Z

@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Jun Wang (jwanggit86)

Changes

Some flat instructions have an saddr operand. When 'null' is provided as saddr, it may have the same encoding as another instruction. For example, the instructions 'global_atomic_add v1, v2, null' and 'global_atomic_add v[1:2], v2, off' have the same encoding. This patch disallows having null as saddr.

Patch is 34.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101730.diff

7 Files Affected:

(modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+1)
(modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+4-4)
(modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+8)
(modified) llvm/test/MC/AMDGPU/flat-global.s (+2-2)
(added) llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s (+268)
(added) llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s (+253)
(added) llvm/test/MC/AMDGPU/gfx12_flat_instructions_err.s (+289)

diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1a0dc7098347a..9bfb62e879ae5 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -293,6 +293,7 @@ DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
 DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
 DECODE_OPERAND_REG_7(SReg_64, OPW64)
 DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
+DECODE_OPERAND_REG_7(SReg_64_XEXEC_XNULL, OPW64)
 DECODE_OPERAND_REG_7(SReg_96, OPW96)
 DECODE_OPERAND_REG_7(SReg_128, OPW128)
 DECODE_OPERAND_REG_7(SReg_256, OPW256)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 88c6039473338..916878d75ce51 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -207,7 +207,7 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
   !con(
     !con(
       !if(EnableSaddr,
-        (ins SReg_64:$saddr, VGPR_32:$vaddr),
+        (ins SReg_64_XEXEC_XNULL:$saddr, VGPR_32:$vaddr),
         (ins VReg_64:$vaddr)),
         (ins flat_offset:$offset)),
         // FIXME: Operands with default values do not work with following non-optional operands.
@@ -229,7 +229,7 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
   (outs),
   !con(
     !if(EnableSaddr,
-      (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
+      (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64_XEXEC_XNULL:$saddr),
       (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
       (ins flat_offset:$offset, CPol_0:$cpol)),
   " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
@@ -587,7 +587,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
 
     def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
       (outs),
-      (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
+      (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_0:$cpol),
       " $vaddr, $vdata, $saddr$offset$cpol">,
       GlobalSaddrTable<1, opName> {
       let has_saddr = 1;
@@ -618,7 +618,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
 
     def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
       (outs vdst_op:$vdst),
-        (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
+        (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
       " $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
       GlobalSaddrTable<1, opName#"_rtn"> {
        let has_saddr = 1;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index f1d9aec163635..9072b3028fd5e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -845,6 +845,14 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
   let HasSGPR = 1;
 }
 
+def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
+  (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE,
+       SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
+  let CopyCost = 1;
+  let AllocationPriority = 1;
+  let HasSGPR = 1;
+}
+
 def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
   (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
        SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
diff --git a/llvm/test/MC/AMDGPU/flat-global.s b/llvm/test/MC/AMDGPU/flat-global.s
index e81fae86b0558..b35ad87edeea3 100644
--- a/llvm/test/MC/AMDGPU/flat-global.s
+++ b/llvm/test/MC/AMDGPU/flat-global.s
@@ -212,8 +212,8 @@ global_store_dword v3, v1, s[2:3] offset:-8
 
 // XXX: Is this valid?
 global_store_dword v3, v1, exec
-// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
-// GFX9: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
+// GFX10-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX9-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
 // VI-ERR: :[[@LINE-3]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
 global_load_dword v1, v[3:4], s2
diff --git a/llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s b/llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s
new file mode 100644
index 0000000000000..193e91e1d0bb1
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx10_flat_instructions_err.s
@@ -0,0 +1,268 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=GFX1010,GFX10 --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck --check-prefixes=GFX1030,GFX10 --implicit-check-not=error: %s
+
+global_atomic_add v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_add v0, v2, v4, null glc
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_add_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_add_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_and v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_and v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_and_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_and_x2 v0, v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_cmpswap v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_cmpswap v0, v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_x2 v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+global_atomic_cmpswap_x2 v[0:1], v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:46: error: invalid operand for instruction
+
+global_atomic_csub v2, v4, null
+// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX1030: :[[@LINE-2]]:28: error: invalid operand for instruction
+
+global_atomic_csub v0, v2, v4, null
+// GFX1010: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX1030: :[[@LINE-2]]:32: error: invalid operand for instruction
+
+global_atomic_dec v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_dec v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_dec_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_dec_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_fcmpswap v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_fcmpswap v0, v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:40: error: invalid operand for instruction
+
+global_atomic_fcmpswap_x2 v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_fcmpswap_x2 v[0:1], v2, v[4:7], null
+// GFX10: :[[@LINE-1]]:47: error: invalid operand for instruction
+
+global_atomic_fmax v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_fmax v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_fmax_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_fmax_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_fmin v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_fmin v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_fmin_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_fmin_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_inc v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_inc v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_inc_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_inc_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_or v2, v4, null
+// GFX10: :[[@LINE-1]]:26: error: invalid operand for instruction
+
+global_atomic_or v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_atomic_or_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_atomic_or_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:41: error: invalid operand for instruction
+
+global_atomic_smax v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_smax v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_smax_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_smax_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_smin v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_smin v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_smin_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_smin_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_sub v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_sub v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_sub_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_sub_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_atomic_swap v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_swap v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_swap_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_swap_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_umax v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_umax v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_umax_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_umax_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_umin v2, v4, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_atomic_umin v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_umin_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_umin_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_xor v2, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_atomic_xor v0, v2, v4, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_xor_x2 v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_xor_x2 v[0:1], v2, v[4:5], null
+// GFX10: :[[@LINE-1]]:42: error: invalid operand for instruction
+
+global_load_dword v0, v4, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_load_dwordx2 v[0:1], v4, null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_dwordx3 v[0:2], v4, null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_dwordx4 v[0:3], v4, null
+// GFX10: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+global_load_sbyte v0, v2, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_load_sbyte_d16 v0, v2, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_sbyte_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_load_short_d16 v0, v2, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_short_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_load_sshort v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_load_ubyte v0, v2, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_load_ubyte_d16 v0, v2, null
+// GFX10: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_load_ubyte_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_load_ushort v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_store_byte v0, v2, null
+// GFX10: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+global_store_byte_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_dword v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_store_dwordx2 v0, v[2:3], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_dwordx3 v0, v[2:4], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_dwordx4 v0, v[2:5], null
+// GFX10: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_store_short v0, v2, null
+// GFX10: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+global_store_short_d16_hi v0, v2, null
+// GFX10: :[[@LINE-1]]:35: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s b/llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s
new file mode 100644
index 0000000000000..9e51b0b8cc8cd
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_flat_instructions_err.s
@@ -0,0 +1,253 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefixes=GFX11 --implicit-check-not=error: %s
+
+global_atomic_add_f32 v0, v2, null
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_f32 v0, v2, v4, null glc
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+global_atomic_add_u32 v0, v2, v4, null glc
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_add_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_and_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_and_b64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b32 v0, v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v0, v[2:5], null
+// GFX11: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_b64 v[0:1], v2, v[4:7], null
+// GFX11: :[[@LINE-1]]:47: error: invalid operand for instruction
+
+global_atomic_cmpswap_f32 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:39: error: invalid operand for instruction
+
+global_atomic_cmpswap_f32 v0, v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_csub_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+global_atomic_csub_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_dec_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_dec_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_inc_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_inc_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_f32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_f32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_i32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_i64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_max_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_max_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_f32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_f32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_i64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_i64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, null
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+global_atomic_min_u32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+global_atomic_min_u64 v[0:1], v2, v[4:5], null
+// GFX11: :[[@LINE-1]]:43: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, null
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
+
+global_atomic_or_b32 v0, v2, v4, null
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+global_atomic_or_b64 v0, v[2:3], null
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for ins...
[truncated]

jwanggit86 · 2024-08-02T18:31:13Z

For Issue 48508

rovka · 2024-08-06T11:05:32Z

Does this mean that global_atomic_add v[1:2], v2, off will work as before, but global_atomic_add v[1:2], v2, null won't be accepted anymore? This doesn't sound ideal if 'null' and 'off' are meant to be aliases (SP3 docs for GFX10 even say that 'off' is a legacy syntax and is translated to 'null').

rovka · 2024-08-06T08:56:46Z

llvm/test/MC/AMDGPU/flat-global.s

@@ -212,8 +212,8 @@ global_store_dword v3, v1, s[2:3] offset:-8

 // XXX: Is this valid?
 global_store_dword v3, v1, exec
-// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
-// GFX9: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
+// GFX10-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction


Nice! You should mention exec in the commit message too.

rovka · 2024-08-06T10:47:23Z

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

+  let AllocationPriority = 1;
+  let HasSGPR = 1;
+}
+
 def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
  (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
       SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {


This can be (add SReg_64_XEXEC_XNULL, SGPR_NULL64) so you don't repeat the whole list.

Good point!

jwanggit86 · 2024-08-09T19:12:58Z

Does this mean that global_atomic_add v[1:2], v2, off will work as before, but global_atomic_add v[1:2], v2, null won't be accepted anymore? This doesn't sound ideal if 'null' and 'off' are meant to be aliases (SP3 docs for GFX10 even say that 'off' is a legacy syntax and is translated to 'null').

The instruction global_atomic_add v[1:2], v2, null is not valid even without this patch. So already in this case null is not treated as an alias for off. The problem this patch is address is that the following 2 instructions have the same encoding:

global_atomic_add v[1:2], v2, off       ; encoding: [0x00,0x80,0xc8,0xdc,0x01,0x02,0x7d,0x00]
global_atomic_add v1, v2, null          ; encoding: [0x00,0x80,0xc8,0xdc,0x01,0x02,0x7d,0x00]

jayfoad · 2024-08-14T09:25:12Z

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+  if (MRI.getRegClass(ToSGPR) == &AMDGPU::SReg_64RegClass)
+    MRI.setRegClass(ToSGPR, &AMDGPU::SReg_64_XEXEC_XNULLRegClass);


Is this required? If it is, can you make it a bit more generic by constraining the RC with the declared RC of the saddr operand, instead of mentioning these specific RCs?

Done. Changed to declared reg class.

jayfoad · 2024-08-14T09:27:08Z

Does this mean that global_atomic_add v[1:2], v2, off will work as before, but global_atomic_add v[1:2], v2, null won't be accepted anymore? This doesn't sound ideal if 'null' and 'off' are meant to be aliases (SP3 docs for GFX10 even say that 'off' is a legacy syntax and is translated to 'null').

The instruction global_atomic_add v[1:2], v2, null is not valid even without this patch. So already in this case null is not treated as an alias for off.

I agree that this patch does not break anything that currently works correctly. I also agree with Diana that it would be even better if null was treated the same as off, so that the assembler accepted global_atomic_add v[1:2], v2, null.

rovka · 2024-08-15T08:27:12Z

The instruction global_atomic_add v[1:2], v2, null is not valid even without this patch.

Ok, in that case I'm happy with this patch after you address Jay's concern. It would be nice to follow up with another patch making null work wherever off works, if you have the time.

jwanggit86 · 2024-08-23T17:22:22Z

The instruction global_atomic_add v[1:2], v2, null is not valid even without this patch.

Ok, in that case I'm happy with this patch after you address Jay's concern. It would be nice to follow up with another patch making null work wherever off works, if you have the time.

Yes, I can do that.

jwanggit86 · 2024-08-23T17:29:34Z

Does this mean that global_atomic_add v[1:2], v2, off will work as before, but global_atomic_add v[1:2], v2, null won't be accepted anymore? This doesn't sound ideal if 'null' and 'off' are meant to be aliases (SP3 docs for GFX10 even say that 'off' is a legacy syntax and is translated to 'null').

The instruction global_atomic_add v[1:2], v2, null is not valid even without this patch. So already in this case null is not treated as an alias for off.

I agree that this patch does not break anything that currently works correctly. I also agree with Diana that it would be even better if null was treated the same as off, so that the assembler accepted global_atomic_add v[1:2], v2, null.

Will address global_atomic_add v[1:2], v2, null in another PR.

jwanggit86 · 2024-08-28T17:23:29Z

@jayfoad @arsenm ping.

jayfoad · 2024-09-02T12:21:37Z

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

+      getRegClass(MI.getDesc(), SAddr->getOperandNo(),
+                  MRI.getTargetRegisterInfo(), *MI.getParent()->getParent());
+
+  MRI.setRegClass(ToSGPR, DeclaredRC);


Thanks but I still don't understand why this code is required.

For the following mir code:

bb.0: liveins: $vgpr0_vgpr1 %0:sreg_64 = COPY $vgpr0_vgpr1 bb.1: %1:sreg_64_xexec_xnull = PHI %0, %bb.0, %2, %bb.1 %3:vgpr_32 = V_MOV_B32_e32 1, implicit $exec %4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec %2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc S_CMP_LG_U64 %2, 0, implicit-def $scc S_CBRANCH_SCC1 %bb.1, implicit $scc bb.2: S_ENDPGM 0

In the SIFixSGPRCopies pass, legalizeOperandsFLAT() is called for the GLOBAL_LOAD_DWORD_SADDR instruction in the following call trace:

=0 llvm::SIRegisterInfo::getSGPRClassForBitWidth (BitWidth=64) =1 0x0000555556ab1032 in llvm::SIRegisterInfo::getEquivalentSGPRClass (this=0x7ffff7e31380, VRC=0x55555f9eaaa0 <llvm::AMDGPU::VReg_64RegClass>) =2 0x000055555697163b in llvm::SIInstrInfo::readlaneVGPRToSGPR (this=0x7ffff7e31330, SrcReg=..., UseMI=..., MRI=...) =3 0x00005555569721c3 in llvm::SIInstrInfo::legalizeOperandsFLAT (this=0x7ffff7e31330, MRI=..., MI=...) =4 0x0000555556974904 in llvm::SIInstrInfo::legalizeOperands (this=0x7ffff7e31330, MI=..., MDT=0x55555fc25558) =5 0x0000555556978740 in llvm::SIInstrInfo::moveToVALUImpl (this=0x7ffff7e31330, Worklist=..., MDT=0x55555fc25558, Inst=...) =6 0x0000555556976bbf in llvm::SIInstrInfo::moveToVALU (this=0x7ffff7e31330, Worklist=..., MDT=0x55555fc25558) =7 0x0000555556901209 in (anonymous namespace)::SIFixSGPRCopies::lowerSpecialCase (this=0x55555fc24e80, MI=..., I=...)

Here in readlaneVGPRToSGPR () it tries to get the SGPR equivalence of the src reg class, VReg_64RegClass. This leads to the call of getSGPRClassForBitWidth(), which, for 64b always returns SReg_64RegClass, which is not what we want here.

const TargetRegisterClass * SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) { if (BitWidth == 16) return &AMDGPU::SGPR_LO16RegClass; if (BitWidth == 32) return &AMDGPU::SReg_32RegClass; if (BitWidth == 64) return &AMDGPU::SReg_64RegClass; ...

That's why in legalizeOperandsFLAT() we have to override it with the desired reg class for SAddr.

This is the wrong way to fix this. readlaneVGPRToSGPR can already check the operand class directly, and then apply getCommonSubClass to the most general result getSGPRClassForBitWidth returned. You shouldn't let it create the wrong register class and then redo it after

This is the wrong way to fix this. readlaneVGPRToSGPR can already check the operand class directly, and then apply getCommonSubClass to the most general result getSGPRClassForBitWidth returned. You shouldn't let it create the wrong register class and then redo it after

To use getCommonSubClass() as you suggested, I need to add an extra parameter to readlaneVGPRToSGPR() which holds the RC I want with a default of nullptr, as follows:

Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, - MachineRegisterInfo &MRI) const { + MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC/*=nullptr*/) const { const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); + if (DstRC) { + SRC = ST.getRegisterInfo()->getCommonSubClass(SRC, DstRC); + } Register DstReg = MRI.createVirtualRegister(SRC);

Pls let me know if this is what you had in mind.

Something like this, (but you can just use RI directly as a member of SIInstrInfo)

Also it's easier to review changes when you post them as real revisions

arsenm · 2024-09-20T18:42:14Z

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

  const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
  const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
+  if (DstRC)
+    SRC = ST.getRegisterInfo()->getCommonSubClass(SRC, DstRC);


Suggested change

SRC = ST.getRegisterInfo()->getCommonSubClass(SRC, DstRC);

SRC = RI.getCommonSubClass(SRC, DstRC);

arsenm · 2024-09-20T18:42:27Z

llvm/lib/Target/AMDGPU/SIInstrInfo.h

@@ -1214,7 +1214,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
  /// threads in the wave.
  /// \returns The SGPR register that \p SrcReg was copied to.


Comment the parameter

Some flat instructions have an saddr operand. When 'null' is provided as saddr, it may have the same encoding as another instruction. For example, the instructions 'global_atomic_add v1, v2, null' and 'global_atomic_add v[1:2], v2, off' have the same encoding. This patch disallows having null as saddr.

(2) fix tests affected by the new reg class. Todo: in final commit msg, mention EXEC as well.

the correct SGPR class.

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

jwanggit86 added backend:AMDGPU mc Machine (object) code labels Aug 2, 2024

jwanggit86 requested a review from arsenm August 2, 2024 18:24

rovka reviewed Aug 6, 2024

View reviewed changes

jwanggit86 force-pushed the disallow-null-as-saddr-in-flat-instructions branch from 7563231 to e090426 Compare August 14, 2024 00:38

llvmbot added the llvm:globalisel label Aug 14, 2024

jayfoad reviewed Aug 14, 2024

View reviewed changes

jwanggit86 force-pushed the disallow-null-as-saddr-in-flat-instructions branch from dbdfecf to 87bfe59 Compare August 23, 2024 18:47

jwanggit86 requested a review from jayfoad August 23, 2024 21:11

jayfoad reviewed Sep 2, 2024

View reviewed changes

arsenm reviewed Sep 20, 2024

View reviewed changes

jwanggit86 added 6 commits September 23, 2024 10:54

This commit: (1) define SReg_64_XEXEC in terms of SReg_64_XEXEC_XNULL

84d6f0e

(2) fix tests affected by the new reg class. Todo: in final commit msg, mention EXEC as well.

Use declared reg class for SADDR instead of hard-code.

fe8ea96

Resolve merge conflicts.

e4b7aad

Pass desired reg class to readlaneVGPRToSGPR() so that it can get

c417aaf

the correct SGPR class.

Minor code changes.

ed7cfd5

jwanggit86 force-pushed the disallow-null-as-saddr-in-flat-instructions branch from 02d7977 to ed7cfd5 Compare September 23, 2024 18:54

arsenm reviewed Sep 23, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp Outdated Show resolved Hide resolved

jwanggit86 added 2 commits September 23, 2024 12:32

Minor code change.

6032727

A test that works fine locally but fails llvm build.

d26882a

jwanggit86 requested a review from arsenm September 24, 2024 00:09

arsenm approved these changes Sep 24, 2024

View reviewed changes

arsenm merged commit f6a8eb9 into llvm:main Sep 24, 2024
8 checks passed

jwanggit86 linked an issue Sep 24, 2024 that may be closed by this pull request

[AMDGPU][MC][GFX10] Incorrect handling of FLAT instructions with saddr=null #48508

Closed

jwanggit86 deleted the disallow-null-as-saddr-in-flat-instructions branch September 24, 2024 18:19

		if (MRI.getRegClass(ToSGPR) == &AMDGPU::SReg_64RegClass)
		MRI.setRegClass(ToSGPR, &AMDGPU::SReg_64_XEXEC_XNULLRegClass);

	SRC = ST.getRegisterInfo()->getCommonSubClass(SRC, DstRC);
	SRC = RI.getCommonSubClass(SRC, DstRC);

		@@ -1214,7 +1214,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
		/// threads in the wave.
		/// \returns The SGPR register that \p SrcReg was copied to.

[AMDGPU][MC] Disallow null as saddr in flat instructions #101730

[AMDGPU][MC] Disallow null as saddr in flat instructions #101730

Uh oh!

Conversation

jwanggit86 commented Aug 2, 2024

Uh oh!

llvmbot commented Aug 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jwanggit86 commented Aug 2, 2024

Uh oh!

rovka commented Aug 6, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

jwanggit86 commented Aug 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

jayfoad commented Aug 14, 2024

Uh oh!

rovka commented Aug 15, 2024

Uh oh!

jwanggit86 commented Aug 23, 2024

Uh oh!

jwanggit86 commented Aug 23, 2024

Uh oh!

jwanggit86 commented Aug 28, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

jwanggit86 Sep 3, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Aug 2, 2024 •

edited

Loading

jwanggit86 commented Aug 9, 2024 •

edited

Loading

jwanggit86 Sep 3, 2024 •

edited

Loading