Skip to content

Commit 07a6d73

Browse files
authored
[AMDGPU] CodeGen for GFX12 VFLAT, VSCRATCH and VGLOBAL instructions (#75493)
1 parent 214d32c commit 07a6d73

File tree

57 files changed

+13866
-373
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+13866
-373
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2486,6 +2486,11 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var"
24862486
[IntrNoMem, IntrConvergent, IntrWillReturn,
24872487
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
24882488

2489+
def int_amdgcn_flat_atomic_fmin_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
2490+
def int_amdgcn_flat_atomic_fmax_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
2491+
def int_amdgcn_global_atomic_fmin_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
2492+
def int_amdgcn_global_atomic_fmax_num : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
2493+
24892494
//===----------------------------------------------------------------------===//
24902495
// Deep learning intrinsics.
24912496
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,6 +1166,11 @@ bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const {
11661166
if (isNoUnsignedWrap(Addr))
11671167
return true;
11681168

1169+
// Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
1170+
// values.
1171+
if (AMDGPU::isGFX12Plus(*Subtarget))
1172+
return true;
1173+
11691174
auto LHS = Addr.getOperand(0);
11701175
auto RHS = Addr.getOperand(1);
11711176

@@ -1701,7 +1706,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
17011706
}
17021707

17031708
VAddr = Addr;
1704-
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1709+
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
17051710
return true;
17061711
}
17071712

@@ -1769,7 +1774,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
17691774
CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
17701775
VOffset = SDValue(VMov, 0);
17711776
SAddr = LHS;
1772-
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1777+
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
17731778
return true;
17741779
}
17751780
}
@@ -1809,7 +1814,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
18091814
}
18101815

18111816
if (SAddr) {
1812-
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1817+
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
18131818
return true;
18141819
}
18151820
}
@@ -1825,7 +1830,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
18251830
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
18261831
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
18271832
VOffset = SDValue(VMov, 0);
1828-
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1833+
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
18291834
return true;
18301835
}
18311836

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,10 @@ defm int_amdgcn_global_atomic_fmax : noret_op;
642642
defm int_amdgcn_global_atomic_csub : noret_op;
643643
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
644644
defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
645+
defm int_amdgcn_flat_atomic_fmin_num : noret_op;
646+
defm int_amdgcn_flat_atomic_fmax_num : noret_op;
647+
defm int_amdgcn_global_atomic_fmin_num : noret_op;
648+
defm int_amdgcn_global_atomic_fmax_num : noret_op;
645649

646650
multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
647651
let HasNoUse = true in

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4663,9 +4663,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
46634663
case Intrinsic::amdgcn_global_atomic_csub:
46644664
case Intrinsic::amdgcn_global_atomic_fmin:
46654665
case Intrinsic::amdgcn_global_atomic_fmax:
4666+
case Intrinsic::amdgcn_global_atomic_fmin_num:
4667+
case Intrinsic::amdgcn_global_atomic_fmax_num:
46664668
case Intrinsic::amdgcn_flat_atomic_fadd:
46674669
case Intrinsic::amdgcn_flat_atomic_fmin:
46684670
case Intrinsic::amdgcn_flat_atomic_fmax:
4671+
case Intrinsic::amdgcn_flat_atomic_fmin_num:
4672+
case Intrinsic::amdgcn_flat_atomic_fmax_num:
46694673
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
46704674
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
46714675
return getDefaultMappingAllVGPR(MI);

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,13 @@ def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
241241
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd>;
242242
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
243243
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
244+
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>;
245+
def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>;
244246
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd>;
245247
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>;
246248
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>;
249+
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>;
250+
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>;
247251
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd_v2bf16>;
248252
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd_v2bf16>;
249253
def : SourceOfDivergence<int_amdgcn_ds_fadd>;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,8 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
10261026
case Intrinsic::amdgcn_flat_atomic_fadd:
10271027
case Intrinsic::amdgcn_flat_atomic_fmax:
10281028
case Intrinsic::amdgcn_flat_atomic_fmin:
1029+
case Intrinsic::amdgcn_flat_atomic_fmax_num:
1030+
case Intrinsic::amdgcn_flat_atomic_fmin_num:
10291031
OpIndexes.push_back(0);
10301032
return true;
10311033
default:
@@ -1100,7 +1102,9 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
11001102
}
11011103
case Intrinsic::amdgcn_flat_atomic_fadd:
11021104
case Intrinsic::amdgcn_flat_atomic_fmax:
1103-
case Intrinsic::amdgcn_flat_atomic_fmin: {
1105+
case Intrinsic::amdgcn_flat_atomic_fmin:
1106+
case Intrinsic::amdgcn_flat_atomic_fmax_num:
1107+
case Intrinsic::amdgcn_flat_atomic_fmin_num: {
11041108
Type *DestTy = II->getType();
11051109
Type *SrcTy = NewV->getType();
11061110
unsigned NewAS = SrcTy->getPointerAddressSpace();

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,19 +1072,43 @@ class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
10721072
(inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
10731073
>;
10741074

1075-
multiclass FlatAtomicPat <string inst, string node, ValueType vt,
1076-
ValueType data_vt = vt> {
1077-
defvar rtnNode = !cast<PatFrags>(node#"_"#vt.Size);
1078-
defvar noRtnNode = !cast<PatFrags>(node#"_noret_"#vt.Size);
1079-
1080-
def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1081-
(!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1075+
multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt,
1076+
ValueType data_vt = vt, bit isIntr = 0> {
1077+
defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_"#vt.Size));
10821078

10831079
let AddedComplexity = 1 in
10841080
def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
10851081
(!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
10861082
}
10871083

1084+
multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt,
1085+
ValueType data_vt = vt, bit isIntr = 0> {
1086+
defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_"#vt.Size));
1087+
1088+
def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1089+
(!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1090+
}
1091+
1092+
multiclass FlatAtomicPat <string inst, string node, ValueType vt,
1093+
ValueType data_vt = vt, bit isIntr = 0> :
1094+
FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>,
1095+
FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>;
1096+
1097+
multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt,
1098+
ValueType data_vt = vt> {
1099+
defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1100+
}
1101+
1102+
multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt,
1103+
ValueType data_vt = vt> {
1104+
defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1105+
}
1106+
1107+
multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt,
1108+
ValueType data_vt = vt> :
1109+
FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>,
1110+
FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1111+
10881112
class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node,
10891113
ValueType vt, ValueType data_vt = vt> : GCNPat <
10901114
(vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)),
@@ -1305,10 +1329,10 @@ multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
13051329
multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
13061330
ValueType data_vt = vt> {
13071331
let AddedComplexity = 11 in
1308-
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<PatFrags>(node), vt, data_vt>;
1332+
def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>;
13091333

13101334
let AddedComplexity = 13 in
1311-
def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<PatFrags>(node), vt, data_vt>;
1335+
def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>;
13121336
}
13131337

13141338
multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
@@ -1508,10 +1532,14 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i
15081532
let OtherPredicates = [isGFX10Plus] in {
15091533
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
15101534
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1511-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
1512-
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
15131535
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
15141536
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1537+
}
1538+
1539+
let OtherPredicates = [isGFX10GFX11] in {
1540+
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
1541+
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
1542+
15151543
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
15161544
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
15171545
}
@@ -1527,6 +1555,13 @@ defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_f
15271555
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>;
15281556
}
15291557

1558+
let OtherPredicates = [isGFX12Only] in {
1559+
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>;
1560+
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>;
1561+
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
1562+
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>;
1563+
}
1564+
15301565
let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
15311566
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
15321567
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,9 +1230,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12301230
case Intrinsic::amdgcn_global_atomic_fadd:
12311231
case Intrinsic::amdgcn_global_atomic_fmin:
12321232
case Intrinsic::amdgcn_global_atomic_fmax:
1233+
case Intrinsic::amdgcn_global_atomic_fmin_num:
1234+
case Intrinsic::amdgcn_global_atomic_fmax_num:
12331235
case Intrinsic::amdgcn_flat_atomic_fadd:
12341236
case Intrinsic::amdgcn_flat_atomic_fmin:
12351237
case Intrinsic::amdgcn_flat_atomic_fmax:
1238+
case Intrinsic::amdgcn_flat_atomic_fmin_num:
1239+
case Intrinsic::amdgcn_flat_atomic_fmax_num:
12361240
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
12371241
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
12381242
Info.opc = ISD::INTRINSIC_W_CHAIN;
@@ -1315,6 +1319,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
13151319
case Intrinsic::amdgcn_flat_atomic_fadd:
13161320
case Intrinsic::amdgcn_flat_atomic_fmin:
13171321
case Intrinsic::amdgcn_flat_atomic_fmax:
1322+
case Intrinsic::amdgcn_flat_atomic_fmin_num:
1323+
case Intrinsic::amdgcn_flat_atomic_fmax_num:
13181324
case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
13191325
case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
13201326
case Intrinsic::amdgcn_global_atomic_csub: {
@@ -8642,8 +8648,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
86428648
}
86438649
case Intrinsic::amdgcn_global_atomic_fmin:
86448650
case Intrinsic::amdgcn_global_atomic_fmax:
8651+
case Intrinsic::amdgcn_global_atomic_fmin_num:
8652+
case Intrinsic::amdgcn_global_atomic_fmax_num:
86458653
case Intrinsic::amdgcn_flat_atomic_fmin:
8646-
case Intrinsic::amdgcn_flat_atomic_fmax: {
8654+
case Intrinsic::amdgcn_flat_atomic_fmax:
8655+
case Intrinsic::amdgcn_flat_atomic_fmin_num:
8656+
case Intrinsic::amdgcn_flat_atomic_fmax_num: {
86478657
MemSDNode *M = cast<MemSDNode>(Op);
86488658
SDValue Ops[] = {
86498659
M->getOperand(0), // Chain
@@ -8653,12 +8663,16 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
86538663
unsigned Opcode = 0;
86548664
switch (IntrID) {
86558665
case Intrinsic::amdgcn_global_atomic_fmin:
8656-
case Intrinsic::amdgcn_flat_atomic_fmin: {
8666+
case Intrinsic::amdgcn_global_atomic_fmin_num:
8667+
case Intrinsic::amdgcn_flat_atomic_fmin:
8668+
case Intrinsic::amdgcn_flat_atomic_fmin_num: {
86578669
Opcode = AMDGPUISD::ATOMIC_LOAD_FMIN;
86588670
break;
86598671
}
86608672
case Intrinsic::amdgcn_global_atomic_fmax:
8661-
case Intrinsic::amdgcn_flat_atomic_fmax: {
8673+
case Intrinsic::amdgcn_global_atomic_fmax_num:
8674+
case Intrinsic::amdgcn_flat_atomic_fmax:
8675+
case Intrinsic::amdgcn_flat_atomic_fmax_num: {
86628676
Opcode = AMDGPUISD::ATOMIC_LOAD_FMAX;
86638677
break;
86648678
}

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8689,16 +8689,13 @@ bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
86898689
AddrSpace == AMDGPUAS::GLOBAL_ADDRESS))
86908690
return false;
86918691

8692-
bool AllowNegative = FlatVariant != SIInstrFlags::FLAT;
8693-
if (ST.hasNegativeScratchOffsetBug() &&
8694-
FlatVariant == SIInstrFlags::FlatScratch)
8695-
AllowNegative = false;
86968692
if (ST.hasNegativeUnalignedScratchOffsetBug() &&
86978693
FlatVariant == SIInstrFlags::FlatScratch && Offset < 0 &&
86988694
(Offset % 4) != 0) {
86998695
return false;
87008696
}
87018697

8698+
bool AllowNegative = allowNegativeFlatOffset(FlatVariant);
87028699
unsigned N = AMDGPU::getNumFlatOffsetBits(ST);
87038700
return isIntN(N, Offset) && (AllowNegative || Offset >= 0);
87048701
}
@@ -8709,12 +8706,10 @@ SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace,
87098706
uint64_t FlatVariant) const {
87108707
int64_t RemainderOffset = COffsetVal;
87118708
int64_t ImmField = 0;
8712-
bool AllowNegative = FlatVariant != SIInstrFlags::FLAT;
8713-
if (ST.hasNegativeScratchOffsetBug() &&
8714-
FlatVariant == SIInstrFlags::FlatScratch)
8715-
AllowNegative = false;
87168709

8710+
bool AllowNegative = allowNegativeFlatOffset(FlatVariant);
87178711
const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST) - 1;
8712+
87188713
if (AllowNegative) {
87198714
// Use signed division by a power of two to truncate towards 0.
87208715
int64_t D = 1LL << NumBits;
@@ -8738,6 +8733,14 @@ SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace,
87388733
return {ImmField, RemainderOffset};
87398734
}
87408735

8736+
bool SIInstrInfo::allowNegativeFlatOffset(uint64_t FlatVariant) const {
8737+
if (ST.hasNegativeScratchOffsetBug() &&
8738+
FlatVariant == SIInstrFlags::FlatScratch)
8739+
return false;
8740+
8741+
return FlatVariant != SIInstrFlags::FLAT || AMDGPU::isGFX12Plus(ST);
8742+
}
8743+
87418744
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) {
87428745
switch (ST.getGeneration()) {
87438746
default:

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
12941294
unsigned AddrSpace,
12951295
uint64_t FlatVariant) const;
12961296

1297+
/// Returns true if negative offsets are allowed for the given \p FlatVariant.
1298+
bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
1299+
12971300
/// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
12981301
/// Return -1 if the target-specific opcode for the pseudo instruction does
12991302
/// not exist. If Opcode is not a pseudo instruction, this is identity.

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s
33
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
4+
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
45

56
define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) {
67
; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic

0 commit comments

Comments
 (0)