Skip to content

Commit f6a8eb9

Browse files
authored
[AMDGPU][MC] Disallow null as saddr in flat instructions (#101730)
Some flat instructions have an saddr operand. When 'null' is provided as saddr, it may have the same encoding as another instruction. For example, the instructions 'global_atomic_add v1, v2, null' and 'global_atomic_add v[1:2], v2, off' have the same encoding. This patch disallows having null as saddr.
1 parent 2c58063 commit f6a8eb9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1069
-242
lines changed

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
291291
DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
292292
DECODE_OPERAND_REG_7(SReg_64, OPW64)
293293
DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
294+
DECODE_OPERAND_REG_7(SReg_64_XEXEC_XNULL, OPW64)
294295
DECODE_OPERAND_REG_7(SReg_96, OPW96)
295296
DECODE_OPERAND_REG_7(SReg_128, OPW128)
296297
DECODE_OPERAND_REG_7(SReg_256, OPW256)

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
209209
!con(
210210
!con(
211211
!if(EnableSaddr,
212-
(ins SReg_64:$saddr, VGPR_32:$vaddr),
212+
(ins SReg_64_XEXEC_XNULL:$saddr, VGPR_32:$vaddr),
213213
(ins VReg_64:$vaddr)),
214214
(ins flat_offset:$offset)),
215215
// FIXME: Operands with default values do not work with following non-optional operands.
@@ -231,7 +231,7 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
231231
(outs),
232232
!con(
233233
!if(EnableSaddr,
234-
(ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
234+
(ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64_XEXEC_XNULL:$saddr),
235235
(ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
236236
(ins flat_offset:$offset, CPol_0:$cpol)),
237237
" $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
@@ -589,7 +589,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
589589

590590
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
591591
(outs),
592-
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
592+
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_0:$cpol),
593593
" $vaddr, $vdata, $saddr$offset$cpol">,
594594
GlobalSaddrTable<1, opName> {
595595
let has_saddr = 1;
@@ -620,7 +620,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
620620

621621
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
622622
(outs vdst_op:$vdst),
623-
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
623+
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
624624
" $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
625625
GlobalSaddrTable<1, opName#"_rtn"> {
626626
let has_saddr = 1;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6088,10 +6088,14 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
60886088
legalizeOpWithMove(MI, VOP3Idx[2]);
60896089
}
60906090

6091-
Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
6092-
MachineRegisterInfo &MRI) const {
6091+
Register SIInstrInfo::readlaneVGPRToSGPR(
6092+
Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI,
6093+
const TargetRegisterClass *DstRC /*=nullptr*/) const {
60936094
const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
60946095
const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
6096+
if (DstRC)
6097+
SRC = RI.getCommonSubClass(SRC, DstRC);
6098+
60956099
Register DstReg = MRI.createVirtualRegister(SRC);
60966100
unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
60976101

@@ -6244,7 +6248,10 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
62446248
if (moveFlatAddrToVGPR(MI))
62456249
return;
62466250

6247-
Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);
6251+
const TargetRegisterClass *DeclaredRC = getRegClass(
6252+
MI.getDesc(), SAddr->getOperandNo(), &RI, *MI.getParent()->getParent());
6253+
6254+
Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI, DeclaredRC);
62486255
SAddr->setReg(ToSGPR);
62496256
}
62506257

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,12 +1213,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
12131213
/// Fix operands in \p MI to satisfy constant bus requirements.
12141214
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
12151215

1216-
/// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only
1217-
/// be used when it is know that the value in SrcReg is same across all
1218-
/// threads in the wave.
1216+
/// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class
1217+
/// for the dst register (\p DstRC) can be optionally supplied. This function
1218+
/// can only be used when it is know that the value in SrcReg is same across
1219+
/// all threads in the wave.
12191220
/// \returns The SGPR register that \p SrcReg was copied to.
12201221
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1221-
MachineRegisterInfo &MRI) const;
1222+
MachineRegisterInfo &MRI,
1223+
const TargetRegisterClass *DstRC = nullptr) const;
12221224

12231225
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
12241226
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -849,14 +849,21 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16],
849849
let HasSGPR = 1;
850850
}
851851

852-
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
853-
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
852+
def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
853+
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE,
854854
SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
855855
let CopyCost = 1;
856856
let AllocationPriority = 1;
857857
let HasSGPR = 1;
858858
}
859859

860+
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
861+
(add SReg_64_XEXEC_XNULL, SGPR_NULL64)> {
862+
let CopyCost = 1;
863+
let AllocationPriority = 1;
864+
let HasSGPR = 1;
865+
}
866+
860867
def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32,
861868
(add SReg_64_XEXEC, EXEC)> {
862869
let CopyCost = 1;

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
6161
; GFX908-NEXT: {{ $}}
6262
; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
6363
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
64-
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
64+
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
6565
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
6666
; GFX908-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
6767
; GFX908-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -136,7 +136,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
136136
; GFX90A-NEXT: {{ $}}
137137
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
138138
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
139-
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
139+
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
140140
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
141141
; GFX90A-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
142142
; GFX90A-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -211,7 +211,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
211211
; GFX940-NEXT: {{ $}}
212212
; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
213213
; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
214-
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
214+
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
215215
; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
216216
; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
217217
; GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -286,7 +286,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
286286
; GFX11-NEXT: {{ $}}
287287
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
288288
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
289-
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
289+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
290290
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
291291
; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
292292
; GFX11-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32_xm0_xexec = SI_PS_LIVE

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
5252
; GFX90A-NEXT: {{ $}}
5353
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
5454
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
55-
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
55+
; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
5656
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5757
; GFX90A-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
5858
; GFX90A-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
@@ -146,7 +146,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
146146
; GFX940-NEXT: {{ $}}
147147
; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
148148
; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
149-
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
149+
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
150150
; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
151151
; GFX940-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
152152
; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
@@ -240,7 +240,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
240240
; GFX11-NEXT: {{ $}}
241241
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
242242
; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
243-
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
243+
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
244244
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
245245
; GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
246246
; GFX11-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_32_xm0_xexec = SI_PS_LIVE

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
120120
; GFX90A_GFX940-NEXT: {{ $}}
121121
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
122122
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
123-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
123+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
124124
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
125125
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
126126
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
@@ -138,7 +138,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
138138
; GFX90A_GFX940-NEXT: {{ $}}
139139
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
140140
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
141-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
141+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
142142
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
143143
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
144144
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in
3636
; GFX908-NEXT: {{ $}}
3737
; GFX908-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
3838
; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
39-
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
39+
; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
4040
; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4141
; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
4242
; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)
@@ -48,7 +48,7 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in
4848
; GFX90A_GFX940-NEXT: {{ $}}
4949
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
5050
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
51-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
51+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
5252
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5353
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
5454
; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn(ptr addrspace(1)
2525
; GFX90A_GFX940-NEXT: {{ $}}
2626
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
2727
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
28-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
28+
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
2929
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3030
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
3131
; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1)

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,7 @@ body: |
747747
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
748748
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
749749
; GFX9-NEXT: {{ $}}
750-
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
750+
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
751751
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
752752
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
753753
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
@@ -758,7 +758,7 @@ body: |
758758
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
759759
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
760760
; GFX10-NEXT: {{ $}}
761-
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
761+
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
762762
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
763763
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
764764
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
@@ -854,7 +854,7 @@ body: |
854854
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
855855
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
856856
; GFX9-NEXT: {{ $}}
857-
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
857+
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
858858
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
859859
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
860860
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
@@ -865,7 +865,7 @@ body: |
865865
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
866866
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
867867
; GFX10-NEXT: {{ $}}
868-
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
868+
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr0_sgpr1
869869
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
870870
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
871871
; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ body: |
2525
; WAVE32-LABEL: name: copy
2626
; WAVE32: liveins: $sgpr2_sgpr3
2727
; WAVE32-NEXT: {{ $}}
28-
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
28+
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec_xnull = COPY $sgpr2_sgpr3
2929
; WAVE32-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
3030
; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
3131
; WAVE32-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 1)

0 commit comments

Comments
 (0)