Skip to content

Commit 3e12115

Browse files
committed
[NVPTX] Added intrinsics for atom.gen.{sys|cta}.* instructions.
These are only available on sm_60+ GPUs. Differential Revision: https://reviews.llvm.org/D24943 llvm-svn: 282607
1 parent f002212 commit 3e12115

File tree

9 files changed

+483
-16
lines changed

9 files changed

+483
-16
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,39 @@ let TargetPrefix = "nvvm" in {
729729
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
730730
[IntrArgMemOnly, NoCapture<0>]>;
731731

732+
class SCOPED_ATOMIC2_impl<LLVMType elty>
733+
: Intrinsic<[elty],
734+
[LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>],
735+
[IntrArgMemOnly, NoCapture<0>]>;
736+
class SCOPED_ATOMIC3_impl<LLVMType elty>
737+
: Intrinsic<[elty],
738+
[LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>,
739+
LLVMMatchType<0>],
740+
[IntrArgMemOnly, NoCapture<0>]>;
741+
742+
multiclass PTXAtomicWithScope2<LLVMType elty> {
743+
def _cta : SCOPED_ATOMIC2_impl<elty>;
744+
def _sys : SCOPED_ATOMIC2_impl<elty>;
745+
}
746+
multiclass PTXAtomicWithScope3<LLVMType elty> {
747+
def _cta : SCOPED_ATOMIC3_impl<elty>;
748+
def _sys : SCOPED_ATOMIC3_impl<elty>;
749+
}
750+
multiclass PTXAtomicWithScope2_fi {
751+
defm _f: PTXAtomicWithScope2<llvm_anyfloat_ty>;
752+
defm _i: PTXAtomicWithScope2<llvm_anyint_ty>;
753+
}
754+
defm int_nvvm_atomic_add_gen : PTXAtomicWithScope2_fi;
755+
defm int_nvvm_atomic_inc_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
756+
defm int_nvvm_atomic_dec_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
757+
defm int_nvvm_atomic_exch_gen_i: PTXAtomicWithScope2<llvm_anyint_ty>;
758+
defm int_nvvm_atomic_xor_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
759+
defm int_nvvm_atomic_max_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
760+
defm int_nvvm_atomic_min_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
761+
defm int_nvvm_atomic_or_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
762+
defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
763+
defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3<llvm_anyint_ty>;
764+
732765
// Bar.Sync
733766

734767
// The builtin for "bar.sync 0" is called __syncthreads. Unlike most of the

llvm/lib/Target/NVPTX/NVPTX.td

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ def SM61 : SubtargetFeature<"sm_61", "SmVersion", "61",
5151
def SM62 : SubtargetFeature<"sm_62", "SmVersion", "62",
5252
"Target SM 6.2">;
5353

54+
def SATOM : SubtargetFeature<"satom", "HasAtomScope", "true",
55+
"Atomic operations with scope">;
56+
5457
// PTX Versions
5558
def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
5659
"Use PTX version 3.2">;
@@ -81,9 +84,9 @@ def : Proc<"sm_37", [SM37, PTX41]>;
8184
def : Proc<"sm_50", [SM50, PTX40]>;
8285
def : Proc<"sm_52", [SM52, PTX41]>;
8386
def : Proc<"sm_53", [SM53, PTX42]>;
84-
def : Proc<"sm_60", [SM60, PTX50]>;
85-
def : Proc<"sm_61", [SM61, PTX50]>;
86-
def : Proc<"sm_62", [SM62, PTX50]>;
87+
def : Proc<"sm_60", [SM60, PTX50, SATOM]>;
88+
def : Proc<"sm_61", [SM61, PTX50, SATOM]>;
89+
def : Proc<"sm_62", [SM62, PTX50, SATOM]>;
8790

8891
def NVPTXInstrInfo : InstrInfo {
8992
}

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3274,27 +3274,42 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
32743274
return false;
32753275

32763276
case Intrinsic::nvvm_atomic_load_add_f32:
3277-
Info.opc = ISD::INTRINSIC_W_CHAIN;
3278-
Info.memVT = MVT::f32;
3279-
Info.ptrVal = I.getArgOperand(0);
3280-
Info.offset = 0;
3281-
Info.vol = 0;
3282-
Info.readMem = true;
3283-
Info.writeMem = true;
3284-
Info.align = 0;
3285-
return true;
3286-
32873277
case Intrinsic::nvvm_atomic_load_inc_32:
32883278
case Intrinsic::nvvm_atomic_load_dec_32:
3279+
3280+
case Intrinsic::nvvm_atomic_add_gen_f_cta:
3281+
case Intrinsic::nvvm_atomic_add_gen_f_sys:
3282+
case Intrinsic::nvvm_atomic_add_gen_i_cta:
3283+
case Intrinsic::nvvm_atomic_add_gen_i_sys:
3284+
case Intrinsic::nvvm_atomic_and_gen_i_cta:
3285+
case Intrinsic::nvvm_atomic_and_gen_i_sys:
3286+
case Intrinsic::nvvm_atomic_cas_gen_i_cta:
3287+
case Intrinsic::nvvm_atomic_cas_gen_i_sys:
3288+
case Intrinsic::nvvm_atomic_dec_gen_i_cta:
3289+
case Intrinsic::nvvm_atomic_dec_gen_i_sys:
3290+
case Intrinsic::nvvm_atomic_inc_gen_i_cta:
3291+
case Intrinsic::nvvm_atomic_inc_gen_i_sys:
3292+
case Intrinsic::nvvm_atomic_max_gen_i_cta:
3293+
case Intrinsic::nvvm_atomic_max_gen_i_sys:
3294+
case Intrinsic::nvvm_atomic_min_gen_i_cta:
3295+
case Intrinsic::nvvm_atomic_min_gen_i_sys:
3296+
case Intrinsic::nvvm_atomic_or_gen_i_cta:
3297+
case Intrinsic::nvvm_atomic_or_gen_i_sys:
3298+
case Intrinsic::nvvm_atomic_exch_gen_i_cta:
3299+
case Intrinsic::nvvm_atomic_exch_gen_i_sys:
3300+
case Intrinsic::nvvm_atomic_xor_gen_i_cta:
3301+
case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
3302+
auto &DL = I.getModule()->getDataLayout();
32893303
Info.opc = ISD::INTRINSIC_W_CHAIN;
3290-
Info.memVT = MVT::i32;
3304+
Info.memVT = getValueType(DL, I.getType());
32913305
Info.ptrVal = I.getArgOperand(0);
32923306
Info.offset = 0;
32933307
Info.vol = 0;
32943308
Info.readMem = true;
32953309
Info.writeMem = true;
32963310
Info.align = 0;
32973311
return true;
3312+
}
32983313

32993314
case Intrinsic::nvvm_ldu_global_i:
33003315
case Intrinsic::nvvm_ldu_global_f:

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">;
131131
def useAtomRedG64forGen64 :
132132
Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">;
133133
def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">;
134+
def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">;
135+
def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">;
136+
def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">;
137+
def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
134138
def hasVote : Predicate<"Subtarget->hasVote()">;
135139
def hasDouble : Predicate<"Subtarget->hasDouble()">;
136140
def reqPTX20 : Predicate<"Subtarget->reqPTX20()">;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,8 +1377,204 @@ defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
13771377
defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
13781378
".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
13791379

1380+
// Support for scoped atomic operations. Matches
1381+
// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1382+
// and converts it into the appropriate instruction.
1383+
// NOTE: not all possible combinations are implemented
1384+
// 'space' is limited to generic as it's the only one needed to support CUDA.
1385+
// 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1386+
class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1387+
dag ins, dag Operands>
1388+
: NVPTXInst<(outs regclass:$result), ins,
1389+
AsmStr,
1390+
[(set regclass:$result, Operands)]>,
1391+
Requires<Preds>;
1392+
1393+
// Define instruction variants for all addressing modes.
1394+
multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1395+
NVPTXRegClass regclass, Operand ImmType,
1396+
SDNode Imm, ValueType ImmTy,
1397+
list<Predicate> Preds> {
1398+
let AddedComplexity = 1 in {
1399+
def : ATOM23_impl<AsmStr, regclass, Preds,
1400+
(ins Int32Regs:$src, regclass:$b),
1401+
(Intr Int32Regs:$src, regclass:$b)>;
1402+
def : ATOM23_impl<AsmStr, regclass, Preds,
1403+
(ins Int64Regs:$src, regclass:$b),
1404+
(Intr Int64Regs:$src, regclass:$b)>;
1405+
}
1406+
// tablegen can't infer argument types from Intrinsic (though it can
1407+
// from Instruction) so we have to enforce specific type on
1408+
// immediates via explicit cast to ImmTy.
1409+
def : ATOM23_impl<AsmStr, regclass, Preds,
1410+
(ins Int32Regs:$src, ImmType:$b),
1411+
(Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1412+
def : ATOM23_impl<AsmStr, regclass, Preds,
1413+
(ins Int64Regs:$src, ImmType:$b),
1414+
(Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1415+
}
1416+
1417+
multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
1418+
NVPTXRegClass regclass, Operand ImmType,
1419+
SDNode Imm, ValueType ImmTy,
1420+
list<Predicate> Preds> {
1421+
// Variants for register/immediate permutations of $b and $c
1422+
let AddedComplexity = 2 in {
1423+
def : ATOM23_impl<AsmStr, regclass, Preds,
1424+
(ins Int32Regs:$src, regclass:$b, regclass:$c),
1425+
(Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1426+
def : ATOM23_impl<AsmStr, regclass, Preds,
1427+
(ins Int64Regs:$src, regclass:$b, regclass:$c),
1428+
(Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1429+
}
1430+
let AddedComplexity = 1 in {
1431+
def : ATOM23_impl<AsmStr, regclass, Preds,
1432+
(ins Int32Regs:$src, ImmType:$b, regclass:$c),
1433+
(Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1434+
def : ATOM23_impl<AsmStr, regclass, Preds,
1435+
(ins Int64Regs:$src, ImmType:$b, regclass:$c),
1436+
(Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1437+
def : ATOM23_impl<AsmStr, regclass, Preds,
1438+
(ins Int32Regs:$src, regclass:$b, ImmType:$c),
1439+
(Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1440+
def : ATOM23_impl<AsmStr, regclass, Preds,
1441+
(ins Int64Regs:$src, regclass:$b, ImmType:$c),
1442+
(Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1443+
}
1444+
def : ATOM23_impl<AsmStr, regclass, Preds,
1445+
(ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1446+
(Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1447+
def : ATOM23_impl<AsmStr, regclass, Preds,
1448+
(ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1449+
(Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1450+
}
1451+
1452+
// Constructs instrinsic name and instruction asm strings.
1453+
multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1454+
string ScopeStr, string SpaceStr,
1455+
NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1456+
ValueType ImmTy, list<Predicate> Preds> {
1457+
defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1458+
# !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1459+
# "." # OpStr # "." # TypeStr
1460+
# " \t$result, [$src], $b;",
1461+
!cast<Intrinsic>(
1462+
"int_nvvm_atomic_" # OpStr
1463+
# "_" # SpaceStr # "_" # IntTypeStr
1464+
# !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1465+
regclass, ImmType, Imm, ImmTy, Preds>;
1466+
}
1467+
multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1468+
string ScopeStr, string SpaceStr,
1469+
NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1470+
ValueType ImmTy, list<Predicate> Preds> {
1471+
defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1472+
# !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1473+
# "." # OpStr # "." # TypeStr
1474+
# " \t$result, [$src], $b, $c;",
1475+
!cast<Intrinsic>(
1476+
"int_nvvm_atomic_" # OpStr
1477+
# "_" # SpaceStr # "_" # IntTypeStr
1478+
# !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1479+
regclass, ImmType, Imm, ImmTy, Preds>;
1480+
}
1481+
1482+
// Constructs variants for different address spaces.
1483+
// For now we only need variants for generic space pointers.
1484+
multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1485+
string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1486+
SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1487+
defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1488+
regclass, ImmType, Imm, ImmTy, Preds>;
1489+
}
1490+
multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1491+
string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1492+
SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1493+
defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1494+
regclass, ImmType, Imm, ImmTy, Preds>;
1495+
}
1496+
1497+
// Constructs variants for different scopes of atomic op.
1498+
multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1499+
NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1500+
ValueType ImmTy, list<Predicate> Preds> {
1501+
// .gpu scope is default and is currently covered by existing
1502+
// atomics w/o explicitly specified scope.
1503+
defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1504+
regclass, ImmType, Imm, ImmTy,
1505+
!listconcat(Preds,[hasAtomScope])>;
1506+
defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1507+
regclass, ImmType, Imm, ImmTy,
1508+
!listconcat(Preds,[hasAtomScope])>;
1509+
}
1510+
multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1511+
NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1512+
list<Predicate> Preds> {
1513+
// No need to define ".gpu"-scoped atomics. They do the same thing
1514+
// as the regular, non-scoped atomics defined elsewhere.
1515+
defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1516+
regclass, ImmType, Imm, ImmTy,
1517+
!listconcat(Preds,[hasAtomScope])>;
1518+
defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1519+
regclass, ImmType, Imm, ImmTy,
1520+
!listconcat(Preds,[hasAtomScope])>;
1521+
}
13801522

1523+
// atom.add
1524+
multiclass ATOM2_add_impl<string OpStr> {
1525+
defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1526+
defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1527+
defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1528+
defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1529+
[hasAtomAddF32]>;
1530+
defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1531+
[hasAtomAddF64]>;
1532+
}
1533+
1534+
// atom.{and,or,xor}
1535+
multiclass ATOM2_bitwise_impl<string OpStr> {
1536+
defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1537+
defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1538+
[hasAtomBitwise64]>;
1539+
}
1540+
1541+
// atom.exch
1542+
multiclass ATOM2_exch_impl<string OpStr> {
1543+
defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1544+
defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1545+
}
1546+
1547+
// atom.{min,max}
1548+
multiclass ATOM2_minmax_impl<string OpStr> {
1549+
defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1550+
defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1551+
defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1552+
[hasAtomMinMax64]>;
1553+
defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1554+
[hasAtomMinMax64]>;
1555+
}
1556+
1557+
// atom.{inc,dec}
1558+
multiclass ATOM2_incdec_impl<string OpStr> {
1559+
defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1560+
}
1561+
1562+
// atom.cas
1563+
multiclass ATOM3_cas_impl<string OpStr> {
1564+
defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1565+
defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1566+
}
13811567

1568+
defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1569+
defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1570+
defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1571+
defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1572+
defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1573+
defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1574+
defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1575+
defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1576+
defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
1577+
defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
13821578

13831579
//-----------------------------------
13841580
// Support for ldu on sm_20 or later

llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ void NVPTXSubtarget::anchor() {}
2929
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
3030
StringRef FS) {
3131
// Provide the default CPU if we don't have one.
32-
if (CPU.empty() && FS.size())
33-
llvm_unreachable("we are not using FeatureStr");
3432
TargetName = CPU.empty() ? "sm_20" : CPU;
3533

3634
ParseSubtargetFeatures(TargetName, FS);

llvm/lib/Target/NVPTX/NVPTXSubtarget.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
4848
// FrameLowering class because TargetFrameLowering is abstract.
4949
NVPTXFrameLowering FrameLowering;
5050

51+
protected:
52+
// Processor supports scoped atomic operations.
53+
bool HasAtomScope;
54+
5155
public:
5256
/// This constructor initializes the data members to match that
5357
/// of the specified module.
@@ -77,6 +81,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
7781
bool hasAtomRedGen32() const { return SmVersion >= 20; }
7882
bool hasAtomRedGen64() const { return SmVersion >= 20; }
7983
bool hasAtomAddF32() const { return SmVersion >= 20; }
84+
bool hasAtomAddF64() const { return SmVersion >= 60; }
85+
bool hasAtomScope() const { return HasAtomScope; }
86+
bool hasAtomBitwise64() const { return SmVersion >= 32; }
87+
bool hasAtomMinMax64() const { return SmVersion >= 32; }
8088
bool hasVote() const { return SmVersion >= 12; }
8189
bool hasDouble() const { return SmVersion >= 13; }
8290
bool reqPTX20() const { return SmVersion >= 20; }

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,29 @@ static bool isNVVMAtomic(const IntrinsicInst *II) {
4242
case Intrinsic::nvvm_atomic_load_add_f32:
4343
case Intrinsic::nvvm_atomic_load_inc_32:
4444
case Intrinsic::nvvm_atomic_load_dec_32:
45+
46+
case Intrinsic::nvvm_atomic_add_gen_f_cta:
47+
case Intrinsic::nvvm_atomic_add_gen_f_sys:
48+
case Intrinsic::nvvm_atomic_add_gen_i_cta:
49+
case Intrinsic::nvvm_atomic_add_gen_i_sys:
50+
case Intrinsic::nvvm_atomic_and_gen_i_cta:
51+
case Intrinsic::nvvm_atomic_and_gen_i_sys:
52+
case Intrinsic::nvvm_atomic_cas_gen_i_cta:
53+
case Intrinsic::nvvm_atomic_cas_gen_i_sys:
54+
case Intrinsic::nvvm_atomic_dec_gen_i_cta:
55+
case Intrinsic::nvvm_atomic_dec_gen_i_sys:
56+
case Intrinsic::nvvm_atomic_inc_gen_i_cta:
57+
case Intrinsic::nvvm_atomic_inc_gen_i_sys:
58+
case Intrinsic::nvvm_atomic_max_gen_i_cta:
59+
case Intrinsic::nvvm_atomic_max_gen_i_sys:
60+
case Intrinsic::nvvm_atomic_min_gen_i_cta:
61+
case Intrinsic::nvvm_atomic_min_gen_i_sys:
62+
case Intrinsic::nvvm_atomic_or_gen_i_cta:
63+
case Intrinsic::nvvm_atomic_or_gen_i_sys:
64+
case Intrinsic::nvvm_atomic_exch_gen_i_cta:
65+
case Intrinsic::nvvm_atomic_exch_gen_i_sys:
66+
case Intrinsic::nvvm_atomic_xor_gen_i_cta:
67+
case Intrinsic::nvvm_atomic_xor_gen_i_sys:
4568
return true;
4669
}
4770
}

0 commit comments

Comments
 (0)