Skip to content

Commit 171cf53

Browse files
committed
AMDGPU/GlobalISel: Handle flat/global G_ATOMIC_CMPXCHG
Custom lower this to a target instruction with the merge operands. I think it might be better to directly select this and emit a REG_SEQUENCE, but this would be more work since it would require splitting the tablegen patterns for these cases from the other atomics.
1 parent 1ce552f commit 171cf53

12 files changed

+551
-113
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>;
117117
def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
118118

119119
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>;
120+
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
121+
120122

121123
class GISelSop2Pat <
122124
SDPatternOperator node,

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
497497
defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
498498
defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
499499
defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
500+
defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>;
500501

501502

502503
def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
@@ -569,21 +570,7 @@ defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>;
569570
defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
570571
}
571572

572-
class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag<
573-
(ops node:$ptr, node:$value),
574-
(atomic_op node:$ptr, node:$value),
575-
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
576-
577573
// Legacy.
578-
def AMDGPUatomic_cmp_swap_global : PatFrag<
579-
(ops node:$ptr, node:$value),
580-
(AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress;
581-
582-
def atomic_cmp_swap_global : PatFrag<
583-
(ops node:$ptr, node:$cmp, node:$value),
584-
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress;
585-
586-
587574
def atomic_cmp_swap_global_noret : PatFrag<
588575
(ops node:$ptr, node:$cmp, node:$value),
589576
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -848,7 +848,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
848848
{G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
849849
G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
850850
G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
851-
G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
851+
G_ATOMICRMW_UMIN})
852852
.legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
853853
{S64, GlobalPtr}, {S64, LocalPtr}});
854854
if (ST.hasFlatAddressSpace()) {
@@ -858,6 +858,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
858858
getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
859859
.legalFor({{S32, LocalPtr}});
860860

861+
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
862+
// demarshalling
863+
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
864+
.customFor({{S32, GlobalPtr}, {S64, GlobalPtr},
865+
{S32, FlatPtr}, {S64, FlatPtr}})
866+
.legalFor({{S32, LocalPtr}, {S64, LocalPtr},
867+
{S32, RegionPtr}, {S64, RegionPtr}});
868+
861869
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
862870
.lower();
863871

@@ -1116,6 +1124,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
11161124
return legalizeFMad(MI, MRI, B);
11171125
case TargetOpcode::G_FDIV:
11181126
return legalizeFDIV(MI, MRI, B);
1127+
case TargetOpcode::G_ATOMIC_CMPXCHG:
1128+
return legalizeAtomicCmpXChg(MI, MRI, B);
11191129
default:
11201130
return false;
11211131
}
@@ -1724,6 +1734,33 @@ bool AMDGPULegalizerInfo::legalizeFMad(
17241734
return Helper.lowerFMad(MI) == LegalizerHelper::Legalized;
17251735
}
17261736

1737+
bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
1738+
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
1739+
Register DstReg = MI.getOperand(0).getReg();
1740+
Register PtrReg = MI.getOperand(1).getReg();
1741+
Register CmpVal = MI.getOperand(2).getReg();
1742+
Register NewVal = MI.getOperand(3).getReg();
1743+
1744+
assert(SITargetLowering::isFlatGlobalAddrSpace(
1745+
MRI.getType(PtrReg).getAddressSpace()) &&
1746+
"this should not have been custom lowered");
1747+
1748+
LLT ValTy = MRI.getType(CmpVal);
1749+
LLT VecTy = LLT::vector(2, ValTy);
1750+
1751+
B.setInstr(MI);
1752+
Register PackedVal = B.buildBuildVector(VecTy, { NewVal, CmpVal }).getReg(0);
1753+
1754+
B.buildInstr(AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG)
1755+
.addDef(DstReg)
1756+
.addUse(PtrReg)
1757+
.addUse(PackedVal)
1758+
.setMemRefs(MI.memoperands());
1759+
1760+
MI.eraseFromParent();
1761+
return true;
1762+
}
1763+
17271764
// Return the use branch instruction, otherwise null if the usage is invalid.
17281765
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
17291766
MachineRegisterInfo &MRI) {

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
7272
bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI,
7373
MachineIRBuilder &B) const;
7474

75+
bool legalizeAtomicCmpXChg(MachineInstr &MI, MachineRegisterInfo &MRI,
76+
MachineIRBuilder &B) const;
77+
7578
Register getLiveInRegister(MachineRegisterInfo &MRI,
7679
Register Reg, LLT Ty) const;
7780

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2957,7 +2957,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
29572957
case AMDGPU::G_ATOMICRMW_UMAX:
29582958
case AMDGPU::G_ATOMICRMW_UMIN:
29592959
case AMDGPU::G_ATOMICRMW_FADD:
2960-
case AMDGPU::G_ATOMIC_CMPXCHG: {
2960+
case AMDGPU::G_ATOMIC_CMPXCHG:
2961+
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
29612962
return getDefaultMappingAllVGPR(MI);
29622963
}
29632964
case AMDGPU::G_BRCOND: {

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 32 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -370,27 +370,6 @@ multiclass FLAT_Global_Atomic_Pseudo<
370370
FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>,
371371
FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>;
372372

373-
class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
374-
(ops node:$ptr, node:$value),
375-
(atomic_op node:$ptr, node:$value),
376-
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
377-
>;
378-
379-
def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
380-
def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
381-
def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
382-
def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
383-
def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
384-
def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
385-
def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
386-
def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
387-
def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
388-
def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
389-
def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
390-
def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>;
391-
def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>;
392-
393-
394373

395374
//===----------------------------------------------------------------------===//
396375
// Flat Instructions
@@ -425,84 +404,84 @@ def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR
425404
}
426405

427406
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
428-
VGPR_32, i32, atomic_cmp_swap_flat,
407+
VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
429408
v2i32, VReg_64>;
430409

431410
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
432-
VReg_64, i64, atomic_cmp_swap_flat,
411+
VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
433412
v2i64, VReg_128>;
434413

435414
defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
436-
VGPR_32, i32, atomic_swap_flat>;
415+
VGPR_32, i32, atomic_swap_flat_32>;
437416

438417
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
439-
VReg_64, i64, atomic_swap_flat>;
418+
VReg_64, i64, atomic_swap_flat_64>;
440419

441420
defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
442-
VGPR_32, i32, atomic_add_flat>;
421+
VGPR_32, i32, atomic_load_add_flat_32>;
443422

444423
defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
445-
VGPR_32, i32, atomic_sub_flat>;
424+
VGPR_32, i32, atomic_load_sub_flat_32>;
446425

447426
defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
448-
VGPR_32, i32, atomic_min_flat>;
427+
VGPR_32, i32, atomic_load_min_flat_32>;
449428

450429
defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
451-
VGPR_32, i32, atomic_umin_flat>;
430+
VGPR_32, i32, atomic_load_umin_flat_32>;
452431

453432
defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
454-
VGPR_32, i32, atomic_max_flat>;
433+
VGPR_32, i32, atomic_load_max_flat_32>;
455434

456435
defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
457-
VGPR_32, i32, atomic_umax_flat>;
436+
VGPR_32, i32, atomic_load_umax_flat_32>;
458437

459438
defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
460-
VGPR_32, i32, atomic_and_flat>;
439+
VGPR_32, i32, atomic_load_and_flat_32>;
461440

462441
defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
463-
VGPR_32, i32, atomic_or_flat>;
442+
VGPR_32, i32, atomic_load_or_flat_32>;
464443

465444
defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
466-
VGPR_32, i32, atomic_xor_flat>;
445+
VGPR_32, i32, atomic_load_xor_flat_32>;
467446

468447
defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
469-
VGPR_32, i32, atomic_inc_flat>;
448+
VGPR_32, i32, atomic_inc_flat_32>;
470449

471450
defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
472-
VGPR_32, i32, atomic_dec_flat>;
451+
VGPR_32, i32, atomic_dec_flat_32>;
473452

474453
defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
475-
VReg_64, i64, atomic_add_flat>;
454+
VReg_64, i64, atomic_load_add_flat_64>;
476455

477456
defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
478-
VReg_64, i64, atomic_sub_flat>;
457+
VReg_64, i64, atomic_load_sub_flat_64>;
479458

480459
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
481-
VReg_64, i64, atomic_min_flat>;
460+
VReg_64, i64, atomic_load_min_flat_64>;
482461

483462
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
484-
VReg_64, i64, atomic_umin_flat>;
463+
VReg_64, i64, atomic_load_umin_flat_64>;
485464

486465
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
487-
VReg_64, i64, atomic_max_flat>;
466+
VReg_64, i64, atomic_load_max_flat_64>;
488467

489468
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
490-
VReg_64, i64, atomic_umax_flat>;
469+
VReg_64, i64, atomic_load_umax_flat_64>;
491470

492471
defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
493-
VReg_64, i64, atomic_and_flat>;
472+
VReg_64, i64, atomic_load_and_flat_64>;
494473

495474
defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
496-
VReg_64, i64, atomic_or_flat>;
475+
VReg_64, i64, atomic_load_or_flat_64>;
497476

498477
defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
499-
VReg_64, i64, atomic_xor_flat>;
478+
VReg_64, i64, atomic_load_xor_flat_64>;
500479

501480
defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
502-
VReg_64, i64, atomic_inc_flat>;
481+
VReg_64, i64, atomic_inc_flat_64>;
503482

504483
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
505-
VReg_64, i64, atomic_dec_flat>;
484+
VReg_64, i64, atomic_dec_flat_64>;
506485

507486
// GFX7-, GFX10-only flat instructions.
508487
let SubtargetPredicate = isGFX7GFX10 in {
@@ -556,11 +535,11 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
556535

557536
let is_flat_global = 1 in {
558537
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
559-
VGPR_32, i32, AMDGPUatomic_cmp_swap_global,
538+
VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32,
560539
v2i32, VReg_64>;
561540

562541
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
563-
VReg_64, i64, AMDGPUatomic_cmp_swap_global,
542+
VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
564543
v2i64, VReg_128>;
565544

566545
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
@@ -813,7 +792,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
813792
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
814793
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
815794
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
816-
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
795+
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
817796
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
818797

819798
def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
@@ -827,7 +806,7 @@ def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
827806
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
828807
def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
829808
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
830-
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
809+
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
831810
def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
832811

833812
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
@@ -923,7 +902,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i3
923902
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
924903
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
925904
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
926-
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
905+
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
927906
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
928907

929908
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
@@ -937,7 +916,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64,
937916
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
938917
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
939918
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
940-
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
919+
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
941920
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
942921

943922
def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1326,13 +1326,6 @@ EVT SITargetLowering::getOptimalMemOpType(
13261326
return MVT::Other;
13271327
}
13281328

1329-
static bool isFlatGlobalAddrSpace(unsigned AS) {
1330-
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
1331-
AS == AMDGPUAS::FLAT_ADDRESS ||
1332-
AS == AMDGPUAS::CONSTANT_ADDRESS ||
1333-
AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
1334-
}
1335-
13361329
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
13371330
unsigned DestAS) const {
13381331
return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,14 @@ class SITargetLowering final : public AMDGPUTargetLowering {
260260

261261
bool isMemOpUniform(const SDNode *N) const;
262262
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
263+
264+
static bool isFlatGlobalAddrSpace(unsigned AS) {
265+
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
266+
AS == AMDGPUAS::FLAT_ADDRESS ||
267+
AS == AMDGPUAS::CONSTANT_ADDRESS ||
268+
AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
269+
}
270+
263271
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
264272
bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
265273

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2018,3 +2018,14 @@ def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
20182018
let InOperandList = (ins type1:$src);
20192019
let hasSideEffects = 0;
20202020
}
2021+
2022+
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
2023+
// operand Expects a MachineMemOperand in addition to explicit
2024+
// operands.
2025+
def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction {
2026+
let OutOperandList = (outs type0:$oldval);
2027+
let InOperandList = (ins ptype1:$addr, type0:$cmpval_nnenwval);
2028+
let hasSideEffects = 0;
2029+
let mayLoad = 1;
2030+
let mayStore = 1;
2031+
}

0 commit comments

Comments
 (0)