@@ -1377,8 +1377,204 @@ defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1377
1377
defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1378
1378
".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
1379
1379
1380
+ // Support for scoped atomic operations. Matches
1381
+ // int_nvvm_atomic_{op}_{space}_{type}_{scope}
1382
+ // and converts it into the appropriate instruction.
1383
+ // NOTE: not all possible combinations are implemented
1384
+ // 'space' is limited to generic as it's the only one needed to support CUDA.
1385
+ // 'scope' = 'gpu' is default and is handled by regular atomic instructions.
1386
+ class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1387
+ dag ins, dag Operands>
1388
+ : NVPTXInst<(outs regclass:$result), ins,
1389
+ AsmStr,
1390
+ [(set regclass:$result, Operands)]>,
1391
+ Requires<Preds>;
1392
+
1393
+ // Define instruction variants for all addressing modes.
1394
+ multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
1395
+ NVPTXRegClass regclass, Operand ImmType,
1396
+ SDNode Imm, ValueType ImmTy,
1397
+ list<Predicate> Preds> {
1398
+ let AddedComplexity = 1 in {
1399
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1400
+ (ins Int32Regs:$src, regclass:$b),
1401
+ (Intr Int32Regs:$src, regclass:$b)>;
1402
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1403
+ (ins Int64Regs:$src, regclass:$b),
1404
+ (Intr Int64Regs:$src, regclass:$b)>;
1405
+ }
1406
+ // tablegen can't infer argument types from Intrinsic (though it can
1407
+ // from Instruction) so we have to enforce specific type on
1408
+ // immediates via explicit cast to ImmTy.
1409
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1410
+ (ins Int32Regs:$src, ImmType:$b),
1411
+ (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1412
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1413
+ (ins Int64Regs:$src, ImmType:$b),
1414
+ (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1415
+ }
1416
+
1417
+ multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
1418
+ NVPTXRegClass regclass, Operand ImmType,
1419
+ SDNode Imm, ValueType ImmTy,
1420
+ list<Predicate> Preds> {
1421
+ // Variants for register/immediate permutations of $b and $c
1422
+ let AddedComplexity = 2 in {
1423
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1424
+ (ins Int32Regs:$src, regclass:$b, regclass:$c),
1425
+ (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1426
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1427
+ (ins Int64Regs:$src, regclass:$b, regclass:$c),
1428
+ (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1429
+ }
1430
+ let AddedComplexity = 1 in {
1431
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1432
+ (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1433
+ (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1434
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1435
+ (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1436
+ (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1437
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1438
+ (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1439
+ (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1440
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1441
+ (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1442
+ (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1443
+ }
1444
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1445
+ (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1446
+ (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1447
+ def : ATOM23_impl<AsmStr, regclass, Preds,
1448
+ (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1449
+ (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1450
+ }
1451
+
1452
+ // Constructs instrinsic name and instruction asm strings.
1453
+ multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1454
+ string ScopeStr, string SpaceStr,
1455
+ NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1456
+ ValueType ImmTy, list<Predicate> Preds> {
1457
+ defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1458
+ # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1459
+ # "." # OpStr # "." # TypeStr
1460
+ # " \t$result, [$src], $b;",
1461
+ !cast<Intrinsic>(
1462
+ "int_nvvm_atomic_" # OpStr
1463
+ # "_" # SpaceStr # "_" # IntTypeStr
1464
+ # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1465
+ regclass, ImmType, Imm, ImmTy, Preds>;
1466
+ }
1467
+ multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1468
+ string ScopeStr, string SpaceStr,
1469
+ NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1470
+ ValueType ImmTy, list<Predicate> Preds> {
1471
+ defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1472
+ # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1473
+ # "." # OpStr # "." # TypeStr
1474
+ # " \t$result, [$src], $b, $c;",
1475
+ !cast<Intrinsic>(
1476
+ "int_nvvm_atomic_" # OpStr
1477
+ # "_" # SpaceStr # "_" # IntTypeStr
1478
+ # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1479
+ regclass, ImmType, Imm, ImmTy, Preds>;
1480
+ }
1481
+
1482
+ // Constructs variants for different address spaces.
1483
+ // For now we only need variants for generic space pointers.
1484
+ multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1485
+ string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1486
+ SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1487
+ defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1488
+ regclass, ImmType, Imm, ImmTy, Preds>;
1489
+ }
1490
+ multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1491
+ string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1492
+ SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1493
+ defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1494
+ regclass, ImmType, Imm, ImmTy, Preds>;
1495
+ }
1496
+
1497
+ // Constructs variants for different scopes of atomic op.
1498
+ multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1499
+ NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1500
+ ValueType ImmTy, list<Predicate> Preds> {
1501
+ // .gpu scope is default and is currently covered by existing
1502
+ // atomics w/o explicitly specified scope.
1503
+ defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1504
+ regclass, ImmType, Imm, ImmTy,
1505
+ !listconcat(Preds,[hasAtomScope])>;
1506
+ defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1507
+ regclass, ImmType, Imm, ImmTy,
1508
+ !listconcat(Preds,[hasAtomScope])>;
1509
+ }
1510
+ multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1511
+ NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1512
+ list<Predicate> Preds> {
1513
+ // No need to define ".gpu"-scoped atomics. They do the same thing
1514
+ // as the regular, non-scoped atomics defined elsewhere.
1515
+ defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1516
+ regclass, ImmType, Imm, ImmTy,
1517
+ !listconcat(Preds,[hasAtomScope])>;
1518
+ defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1519
+ regclass, ImmType, Imm, ImmTy,
1520
+ !listconcat(Preds,[hasAtomScope])>;
1521
+ }
1380
1522
1523
+ // atom.add
1524
+ multiclass ATOM2_add_impl<string OpStr> {
1525
+ defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1526
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1527
+ defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1528
+ defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1529
+ [hasAtomAddF32]>;
1530
+ defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1531
+ [hasAtomAddF64]>;
1532
+ }
1533
+
1534
+ // atom.{and,or,xor}
1535
+ multiclass ATOM2_bitwise_impl<string OpStr> {
1536
+ defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1537
+ defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1538
+ [hasAtomBitwise64]>;
1539
+ }
1540
+
1541
+ // atom.exch
1542
+ multiclass ATOM2_exch_impl<string OpStr> {
1543
+ defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1544
+ defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1545
+ }
1546
+
1547
+ // atom.{min,max}
1548
+ multiclass ATOM2_minmax_impl<string OpStr> {
1549
+ defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1550
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1551
+ defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1552
+ [hasAtomMinMax64]>;
1553
+ defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1554
+ [hasAtomMinMax64]>;
1555
+ }
1556
+
1557
+ // atom.{inc,dec}
1558
+ multiclass ATOM2_incdec_impl<string OpStr> {
1559
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1560
+ }
1561
+
1562
+ // atom.cas
1563
+ multiclass ATOM3_cas_impl<string OpStr> {
1564
+ defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1565
+ defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1566
+ }
1381
1567
1568
+ defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1569
+ defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1570
+ defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1571
+ defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1572
+ defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1573
+ defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1574
+ defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1575
+ defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1576
+ defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
1577
+ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1382
1578
1383
1579
//-----------------------------------
1384
1580
// Support for ldu on sm_20 or later
0 commit comments