Skip to content

Commit c2093b8

Browse files
committed
[AMDGPU] Add target features for GDS and GWS
GFX9 subtargets from GFX90A onwards lack GDS but still have GWS. Differential Revision: https://reviews.llvm.org/D156713
1 parent 8f973d5 commit c2093b8

File tree

8 files changed

+90
-36
lines changed

8 files changed

+90
-36
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,18 @@ def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs",
899899
"Enable the architected SGPRs"
900900
>;
901901

902+
def FeatureGDS : SubtargetFeature<"gds",
903+
"HasGDS",
904+
"true",
905+
"Has Global Data Share"
906+
>;
907+
908+
def FeatureGWS : SubtargetFeature<"gws",
909+
"HasGWS",
910+
"true",
911+
"Has Global Wave Sync"
912+
>;
913+
902914
// Dummy feature used to disable assembler instructions.
903915
def FeatureDisable : SubtargetFeature<"",
904916
"FeatureDisable","true",
@@ -917,7 +929,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
917929
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
918930
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
919931
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
920-
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts
932+
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
933+
FeatureGDS, FeatureGWS
921934
]
922935
>;
923936

@@ -928,7 +941,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
928941
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
929942
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
930943
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
931-
FeatureImageInsts
944+
FeatureImageInsts, FeatureGDS, FeatureGWS
932945
]
933946
>;
934947

@@ -943,7 +956,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
943956
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
944957
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
945958
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
946-
FeatureUnalignedBufferAccess, FeatureImageInsts
959+
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS
947960
]
948961
>;
949962

@@ -961,7 +974,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
961974
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
962975
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
963976
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
964-
FeatureNegativeScratchOffsetBug
977+
FeatureNegativeScratchOffsetBug, FeatureGWS
965978
]
966979
>;
967980

@@ -980,7 +993,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
980993
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
981994
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
982995
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
983-
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts
996+
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts,
997+
FeatureGDS, FeatureGWS
984998
]
985999
>;
9861000

@@ -999,7 +1013,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
9991013
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
10001014
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
10011015
FeatureA16, FeatureFastDenormalF32, FeatureG16,
1002-
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
1016+
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
1017+
FeatureGWS
10031018
]
10041019
>;
10051020

@@ -1104,28 +1119,32 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
11041119

11051120
def FeatureISAVersion9_0_0 : FeatureSet<
11061121
!listconcat(FeatureISAVersion9_0_Common.Features,
1107-
[FeatureMadMixInsts,
1122+
[FeatureGDS,
1123+
FeatureMadMixInsts,
11081124
FeatureDsSrc2Insts,
11091125
FeatureExtendedImageInsts,
11101126
FeatureImageGather4D16Bug])>;
11111127

11121128
def FeatureISAVersion9_0_2 : FeatureSet<
11131129
!listconcat(FeatureISAVersion9_0_Common.Features,
1114-
[FeatureMadMixInsts,
1130+
[FeatureGDS,
1131+
FeatureMadMixInsts,
11151132
FeatureDsSrc2Insts,
11161133
FeatureExtendedImageInsts,
11171134
FeatureImageGather4D16Bug])>;
11181135

11191136
def FeatureISAVersion9_0_4 : FeatureSet<
11201137
!listconcat(FeatureISAVersion9_0_Common.Features,
1121-
[FeatureDsSrc2Insts,
1138+
[FeatureGDS,
1139+
FeatureDsSrc2Insts,
11221140
FeatureExtendedImageInsts,
11231141
FeatureFmaMixInsts,
11241142
FeatureImageGather4D16Bug])>;
11251143

11261144
def FeatureISAVersion9_0_6 : FeatureSet<
11271145
!listconcat(FeatureISAVersion9_0_Common.Features,
1128-
[HalfRate64Ops,
1146+
[FeatureGDS,
1147+
HalfRate64Ops,
11291148
FeatureFmaMixInsts,
11301149
FeatureDsSrc2Insts,
11311150
FeatureExtendedImageInsts,
@@ -1139,7 +1158,8 @@ def FeatureISAVersion9_0_6 : FeatureSet<
11391158

11401159
def FeatureISAVersion9_0_8 : FeatureSet<
11411160
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
1142-
[HalfRate64Ops,
1161+
[FeatureGDS,
1162+
HalfRate64Ops,
11431163
FeatureDsSrc2Insts,
11441164
FeatureExtendedImageInsts,
11451165
FeatureAtomicBufferGlobalPkAddF16NoRtnInsts,
@@ -1148,7 +1168,8 @@ def FeatureISAVersion9_0_8 : FeatureSet<
11481168

11491169
def FeatureISAVersion9_0_9 : FeatureSet<
11501170
!listconcat(FeatureISAVersion9_0_Common.Features,
1151-
[FeatureMadMixInsts,
1171+
[FeatureGDS,
1172+
FeatureMadMixInsts,
11521173
FeatureDsSrc2Insts,
11531174
FeatureExtendedImageInsts,
11541175
FeatureImageInsts,
@@ -1168,7 +1189,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
11681189

11691190
def FeatureISAVersion9_0_C : FeatureSet<
11701191
!listconcat(FeatureISAVersion9_0_Common.Features,
1171-
[FeatureMadMixInsts,
1192+
[FeatureGDS,
1193+
FeatureMadMixInsts,
11721194
FeatureDsSrc2Insts,
11731195
FeatureExtendedImageInsts,
11741196
FeatureImageGather4D16Bug])>;
@@ -1836,6 +1858,10 @@ def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">;
18361858

18371859
def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
18381860

1861+
def HasGDS : Predicate<"Subtarget->hasGDS()">;
1862+
1863+
def HasGWS : Predicate<"Subtarget->hasGWS()">;
1864+
18391865
// Include AMDGPU TD files
18401866
include "SISchedule.td"
18411867
include "GCNProcessors.td"

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2369,8 +2369,9 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {
23692369
}
23702370

23712371
void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2372-
if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2373-
!Subtarget->hasGWSSemaReleaseAll()) {
2372+
if (!Subtarget->hasGWS() ||
2373+
(IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2374+
!Subtarget->hasGWSSemaReleaseAll())) {
23742375
// Let this error.
23752376
SelectCode(N);
23762377
return;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1581,8 +1581,8 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {
15811581

15821582
bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
15831583
Intrinsic::ID IID) const {
1584-
if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1585-
!STI.hasGWSSemaReleaseAll())
1584+
if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1585+
!STI.hasGWSSemaReleaseAll()))
15861586
return false;
15871587

15881588
// intrinsic ID, vsrc, offset

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -969,8 +969,10 @@ multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
969969
!cast<PatFrag>(frag#"_local_"#vt.Size)>;
970970
}
971971

972-
def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
973-
/* complexity */ 0, /* gds */ 1>;
972+
let OtherPredicates = [HasGDS] in {
973+
def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
974+
/* complexity */ 0, /* gds */ 1>;
975+
}
974976
}
975977

976978
multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
@@ -989,12 +991,14 @@ multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
989991
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
990992
}
991993

992-
def : DSAtomicRetPat<inst, vt,
993-
!cast<PatFrag>(frag#"_region_m0_"#vt.Size),
994-
/* complexity */ 0, /* gds */ 1>;
995-
def : DSAtomicRetPat<noRetInst, vt,
996-
!cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
997-
/* complexity */ 1, /* gds */ 1>;
994+
let OtherPredicates = [HasGDS] in {
995+
def : DSAtomicRetPat<inst, vt,
996+
!cast<PatFrag>(frag#"_region_m0_"#vt.Size),
997+
/* complexity */ 0, /* gds */ 1>;
998+
def : DSAtomicRetPat<noRetInst, vt,
999+
!cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1000+
/* complexity */ 1, /* gds */ 1>;
1001+
}
9981002
}
9991003

10001004

@@ -1024,10 +1028,12 @@ multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueT
10241028
/* complexity */ 1>;
10251029
}
10261030

1027-
def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
1028-
/* complexity */ 0, /* gds */ 1>;
1029-
def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1030-
/* complexity */ 1, /* gds */ 1>;
1031+
let OtherPredicates = [HasGDS] in {
1032+
def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
1033+
/* complexity */ 0, /* gds */ 1>;
1034+
def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1035+
/* complexity */ 1, /* gds */ 1>;
1036+
}
10311037
}
10321038
} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
10331039

@@ -1047,10 +1053,12 @@ multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
10471053
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
10481054
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
10491055

1050-
def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
1051-
/* complexity */ 0, /* gds */ 1>;
1052-
def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1053-
/* complexity */ 1, /* gds */ 1>;
1056+
let OtherPredicates = [HasGDS] in {
1057+
def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
1058+
/* complexity */ 0, /* gds */ 1>;
1059+
def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1060+
/* complexity */ 1, /* gds */ 1>;
1061+
}
10541062
}
10551063
} // End SubtargetPredicate = isGFX11Plus
10561064

llvm/lib/Target/AMDGPU/GCNProcessors.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
// The code produced for "generic" is only useful for tests and cannot
1010
// reasonably be expected to execute on any particular target.
1111
def : ProcessorModel<"generic", NoSchedModel,
12-
[FeatureWavefrontSize64]
12+
[FeatureWavefrontSize64, FeatureGDS, FeatureGWS]
1313
>;
1414

1515
def : ProcessorModel<"generic-hsa", NoSchedModel,
16-
[FeatureWavefrontSize64, FeatureFlatAddressSpace]
16+
[FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace]
1717
>;
1818

1919
//===------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
180180
bool HasArchitectedFlatScratch = false;
181181
bool EnableFlatScratch = false;
182182
bool HasArchitectedSGPRs = false;
183+
bool HasGDS = false;
184+
bool HasGWS = false;
183185
bool AddNoCarryInsts = false;
184186
bool HasUnpackedD16VMem = false;
185187
bool LDSMisalignedBug = false;
@@ -1155,6 +1157,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
11551157
/// \returns true if the architected SGPRs are enabled.
11561158
bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }
11571159

1160+
/// \returns true if Global Data Share is supported.
1161+
bool hasGDS() const { return HasGDS; }
1162+
1163+
/// \returns true if Global Wave Sync is supported.
1164+
bool hasGWS() const { return HasGWS; }
1165+
11581166
/// \returns true if the machine has merged shaders in which s0-s7 are
11591167
/// reserved by the hardware and user SGPRs start at s8
11601168
bool hasMergedShaders() const {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1414,7 +1414,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
14141414
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
14151415
return isLegalMUBUFAddressingMode(AM);
14161416

1417-
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
1417+
if (AS == AMDGPUAS::LOCAL_ADDRESS ||
1418+
(AS == AMDGPUAS::REGION_ADDRESS && Subtarget->hasGDS())) {
14181419
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
14191420
// field.
14201421
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
; RUN: not --crash llc -march=amdgcn -mcpu=gfx90a < %s 2>&1 | FileCheck %s
2+
3+
; GDS is not supported on GFX12+
4+
; CHECK: LLVM ERROR: Cannot select: {{.*}} AtomicLoadAdd
5+
6+
define amdgpu_kernel void @atomic_add_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
7+
%val = atomicrmw volatile add ptr addrspace(2) %gds, i32 5 acq_rel
8+
store i32 %val, ptr addrspace(1) %out
9+
ret void
10+
}

0 commit comments

Comments
 (0)