Skip to content

Commit 5cf9292

Browse files
committed
[AMDGPU] Add two TSFlags: IsAtomicNoRtn and IsAtomicRtn
We are using AtomicNoRet map in multiple places to determine if an instruction atomic, rtn or nortn atomic. This method does not work always since we have some instructions which only has rtn or nortn version. One such instruction is ds_wrxchg_rtn_b32 which does not have nortn version. This has caused changes in memory legalizer tests. Differential Revision: https://reviews.llvm.org/D96639
1 parent c465429 commit 5cf9292

14 files changed

+95
-74
lines changed

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
696696
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
697697
let glc_value = 0;
698698
let dlc_value = 0;
699+
let IsAtomicNoRet = 1;
699700
let AsmMatchConverter = "cvtMubufAtomic";
700701
}
701702

@@ -714,6 +715,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
714715
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
715716
let glc_value = 1;
716717
let dlc_value = 0;
718+
let IsAtomicRet = 1;
717719
let Constraints = "$vdata = $vdata_in";
718720
let DisableEncoding = "$vdata_in";
719721
let AsmMatchConverter = "cvtMubufAtomicReturn";

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
102102

103103
let has_data1 = 0;
104104
let has_vdst = 0;
105+
let IsAtomicNoRet = 1;
105106
}
106107

107108
multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
@@ -121,6 +122,7 @@ class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32>
121122
" $addr, $data0, $data1$offset$gds"> {
122123

123124
let has_vdst = 0;
125+
let IsAtomicNoRet = 1;
124126
}
125127

126128
multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
@@ -161,6 +163,7 @@ class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32>
161163

162164
let hasPostISelHook = 1;
163165
let has_data1 = 0;
166+
let IsAtomicRet = 1;
164167
}
165168

166169
multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32,
@@ -184,6 +187,7 @@ class DS_1A2D_RET<string opName,
184187
" $vdst, $addr, $data0, $data1$offset$gds"> {
185188

186189
let hasPostISelHook = 1;
190+
let IsAtomicRet = 1;
187191
}
188192

189193
multiclass DS_1A2D_RET_mc<string opName,

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
345345
let dlcValue = 0;
346346
let has_vdst = 0;
347347
let maybeAtomic = 1;
348+
let IsAtomicNoRet = 1;
348349
}
349350

350351
class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
@@ -354,6 +355,8 @@ class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
354355
let has_vdst = 1;
355356
let glcValue = 1;
356357
let dlcValue = 0;
358+
let IsAtomicNoRet = 0;
359+
let IsAtomicRet = 1;
357360
let PseudoInstr = NAME # "_RTN";
358361
}
359362

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class MIMGBaseOpcode : PredicateControl {
3939
bit Coordinates = 1;
4040
bit LodOrClampOrMip = 0;
4141
bit HasD16 = 0;
42+
bit IsAtomicRet = 0;
4243
}
4344

4445
def MIMGBaseOpcode : GenericEnum {
@@ -556,20 +557,22 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
556557
}
557558

558559
multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0> { // 64-bit atomics
559-
def "" : MIMGBaseOpcode {
560-
let Atomic = 1;
561-
let AtomicX2 = isCmpSwap;
562-
}
560+
let IsAtomicRet = 1 in {
561+
def "" : MIMGBaseOpcode {
562+
let Atomic = 1;
563+
let AtomicX2 = isCmpSwap;
564+
}
563565

564-
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
565-
// _V* variants have different dst size, but the size is encoded implicitly,
566-
// using dmask and tfe. Only 32-bit variant is registered with disassembler.
567-
// Other variants are reconstructed by disassembler using dmask and tfe.
568-
let VDataDwords = !if(isCmpSwap, 2, 1) in
569-
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;
570-
let VDataDwords = !if(isCmpSwap, 4, 2) in
571-
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;
572-
}
566+
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
567+
// _V* variants have different dst size, but the size is encoded implicitly,
568+
// using dmask and tfe. Only 32-bit variant is registered with disassembler.
569+
// Other variants are reconstructed by disassembler using dmask and tfe.
570+
let VDataDwords = !if(isCmpSwap, 2, 1) in
571+
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;
572+
let VDataDwords = !if(isCmpSwap, 4, 2) in
573+
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;
574+
}
575+
} // End IsAtomicRet = 1
573576
}
574577

575578
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,13 @@ enum : uint64_t {
106106
IsDOT = UINT64_C(1) << 55,
107107

108108
// FLAT instruction accesses FLAT_SCRATCH segment.
109-
IsFlatScratch = UINT64_C(1) << 56
109+
IsFlatScratch = UINT64_C(1) << 56,
110+
111+
// Atomic without return.
112+
IsAtomicNoRet = UINT64_C(1) << 57,
113+
114+
// Atomic with return.
115+
IsAtomicRet = UINT64_C(1) << 58
110116
};
111117

112118
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.

llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,7 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
112112
return false;
113113
if (!MI.mayLoad() || MI.mayStore())
114114
return false;
115-
if (AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1 ||
116-
AMDGPU::getAtomicRetOp(MI.getOpcode()) != -1)
115+
if (SIInstrInfo::isAtomic(MI))
117116
return false;
118117
if (IsVMEMClause && !isVMEMClauseInst(MI))
119118
return false;

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
538538
AMDGPU::OpName::data1),
539539
CurrScore);
540540
}
541-
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1 &&
541+
} else if (SIInstrInfo::isAtomicRet(Inst) &&
542542
Inst.getOpcode() != AMDGPU::DS_GWS_INIT &&
543543
Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_V &&
544544
Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_BR &&
@@ -560,7 +560,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
560560
&Inst, TII, TRI, MRI,
561561
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
562562
CurrScore);
563-
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
563+
} else if (SIInstrInfo::isAtomicRet(Inst)) {
564564
setExpScore(
565565
&Inst, TII, TRI, MRI,
566566
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
@@ -569,7 +569,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
569569
} else if (TII->isMIMG(Inst)) {
570570
if (Inst.mayStore()) {
571571
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
572-
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
572+
} else if (SIInstrInfo::isAtomicRet(Inst)) {
573573
setExpScore(
574574
&Inst, TII, TRI, MRI,
575575
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
@@ -582,7 +582,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
582582
} else if (TII->isMUBUF(Inst)) {
583583
if (Inst.mayStore()) {
584584
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
585-
} else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
585+
} else if (SIInstrInfo::isAtomicRet(Inst)) {
586586
setExpScore(
587587
&Inst, TII, TRI, MRI,
588588
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
@@ -1246,8 +1246,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
12461246
++FlatASCount;
12471247
if (!ST->hasVscnt())
12481248
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
1249-
else if (Inst.mayLoad() &&
1250-
AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1)
1249+
else if (Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst))
12511250
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
12521251
else
12531252
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
@@ -1275,16 +1274,15 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
12751274
Inst.getOpcode() != AMDGPU::BUFFER_GL1_INV) {
12761275
if (!ST->hasVscnt())
12771276
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
1278-
else if ((Inst.mayLoad() &&
1279-
AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1) ||
1277+
else if ((Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst)) ||
12801278
/* IMAGE_GET_RESINFO / IMAGE_GET_LOD */
12811279
(TII->isMIMG(Inst) && !Inst.mayLoad() && !Inst.mayStore()))
12821280
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
12831281
else if (Inst.mayStore())
12841282
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
12851283

12861284
if (ST->vmemWriteNeedsExpWaitcnt() &&
1287-
(Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) {
1285+
(Inst.mayStore() || SIInstrInfo::isAtomicRet(Inst))) {
12881286
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
12891287
}
12901288
} else if (TII->isSMRD(Inst)) {

llvm/lib/Target/AMDGPU/SIInstrFormats.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,12 @@ class InstSI <dag outs, dag ins, string asm = "",
135135
// Must be 0 for non-FLAT instructions.
136136
field bit IsFlatScratch = 0;
137137

138+
// Atomic without a return.
139+
field bit IsAtomicNoRet = 0;
140+
141+
// Atomic with return.
142+
field bit IsAtomicRet = 0;
143+
138144
// These need to be kept in sync with the enum in SIInstrFlags.
139145
let TSFlags{0} = SALU;
140146
let TSFlags{1} = VALU;
@@ -205,6 +211,10 @@ class InstSI <dag outs, dag ins, string asm = "",
205211

206212
let TSFlags{56} = IsFlatScratch;
207213

214+
let TSFlags{57} = IsAtomicNoRet;
215+
216+
let TSFlags{58} = IsAtomicRet;
217+
208218
let SchedRW = [Write32Bit];
209219

210220
let AsmVariantName = AMDGPUAsmVariants.Default;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,32 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
538538
return get(Opcode).TSFlags & SIInstrFlags::EXP;
539539
}
540540

541+
static bool isAtomicNoRet(const MachineInstr &MI) {
542+
return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
543+
}
544+
545+
bool isAtomicNoRet(uint16_t Opcode) const {
546+
return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
547+
}
548+
549+
static bool isAtomicRet(const MachineInstr &MI) {
550+
return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
551+
}
552+
553+
bool isAtomicRet(uint16_t Opcode) const {
554+
return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
555+
}
556+
557+
static bool isAtomic(const MachineInstr &MI) {
558+
return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
559+
SIInstrFlags::IsAtomicNoRet);
560+
}
561+
562+
bool isAtomic(uint16_t Opcode) const {
563+
return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
564+
SIInstrFlags::IsAtomicNoRet);
565+
}
566+
541567
static bool isWQM(const MachineInstr &MI) {
542568
return MI.getDesc().TSFlags & SIInstrFlags::WQM;
543569
}
@@ -1165,9 +1191,6 @@ namespace AMDGPU {
11651191
LLVM_READONLY
11661192
int getMUBUFNoLdsInst(uint16_t Opcode);
11671193

1168-
LLVM_READONLY
1169-
int getAtomicRetOp(uint16_t Opcode);
1170-
11711194
LLVM_READONLY
11721195
int getAtomicNoRetOp(uint16_t Opcode);
11731196

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2408,15 +2408,6 @@ def getMUBUFNoLdsInst : InstrMapping {
24082408
let ValueCols = [["0"]];
24092409
}
24102410

2411-
// Maps an atomic opcode to its version with a return value.
2412-
def getAtomicRetOp : InstrMapping {
2413-
let FilterClass = "AtomicNoRet";
2414-
let RowFields = ["NoRetOp"];
2415-
let ColFields = ["IsRet"];
2416-
let KeyCol = ["0"];
2417-
let ValueCols = [["1"]];
2418-
}
2419-
24202411
// Maps an atomic opcode to its returnless version.
24212412
def getAtomicNoRetOp : InstrMapping {
24222413
let FilterClass = "AtomicNoRet";

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ class SIMemoryLegalizer final : public MachineFunctionPass {
455455
/// Return true iff instruction \p MI is a atomic instruction that
456456
/// returns a result.
457457
bool isAtomicRet(const MachineInstr &MI) const {
458-
return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
458+
return SIInstrInfo::isAtomicRet(MI);
459459
}
460460

461461
/// Removes all processed atomic pseudo instructions from the current

llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -595,8 +595,7 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw(
595595
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
596596
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
597597
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
598-
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
599-
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
598+
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
600599
; GFX10-WGP-NEXT: buffer_gl0_inv
601600
; GFX10-WGP-NEXT: s_endpgm
602601
;
@@ -722,8 +721,7 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
722721
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
723722
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
724723
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
725-
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
726-
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
724+
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
727725
; GFX10-WGP-NEXT: buffer_gl0_inv
728726
; GFX10-WGP-NEXT: s_endpgm
729727
;
@@ -790,8 +788,7 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
790788
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
791789
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
792790
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
793-
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
794-
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
791+
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
795792
; GFX10-WGP-NEXT: buffer_gl0_inv
796793
; GFX10-WGP-NEXT: s_endpgm
797794
;
@@ -856,8 +853,7 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw(
856853
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
857854
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
858855
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
859-
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
860-
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
856+
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
861857
; GFX10-WGP-NEXT: buffer_gl0_inv
862858
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
863859
; GFX10-WGP-NEXT: s_endpgm
@@ -928,8 +924,7 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
928924
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
929925
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
930926
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
931-
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
932-
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
927+
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
933928
; GFX10-WGP-NEXT: buffer_gl0_inv
934929
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
935930
; GFX10-WGP-NEXT: s_endpgm
@@ -1002,8 +997,7 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
1002997
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1003998
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
1004999
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
1005-
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
1006-
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
1000+
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
10071001
; GFX10-WGP-NEXT: buffer_gl0_inv
10081002
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
10091003
; GFX10-WGP-NEXT: s_endpgm

0 commit comments

Comments
 (0)