Skip to content

Commit 5f039ef

Browse files
committed
[AMDGPU] Add "lds-buffer-load-insts" attribute for all targets < gfx11
This are used to restrict the availability of buffer_load_lds intrinsics to targets that actually have this instructions.
1 parent 2b7fe90 commit 5f039ef

File tree

9 files changed

+31
-15
lines changed

9 files changed

+31
-15
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
260260

261261
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
262262
CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
263-
for (auto F : {"image-insts", "gws"})
263+
for (auto F : {"image-insts", "gws", "lds-buffer-load-insts"})
264264
ReadOnlyFeatures.insert(F);
265265
HalfArgsAndReturns = true;
266266
}

clang/test/CodeGen/link-builtin-bitcode.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,6 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in
4444
// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
4545

4646
// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
47-
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
48-
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
49-
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" }
47+
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
48+
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
49+
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" }

clang/test/CodeGenCXX/dynamic-cast-address-space.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,9 @@ const B& f(A *a) {
111111
// CHECK: attributes #[[ATTR3]] = { nounwind }
112112
// CHECK: attributes #[[ATTR4]] = { noreturn }
113113
//.
114-
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
114+
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
115115
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
116-
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
116+
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
117117
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
118118
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
119119
//.

flang/test/Lower/OpenMP/target_cpu_features.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
1212
!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
1313
!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
14-
!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts",
15-
!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>
14+
!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+lds-buffer-load-insts",
15+
!AMDGCN-SAME: "+mai-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>
1616

1717
!NVPTX: module attributes {
1818
!NVPTX-SAME: fir.target_cpu = "sm_80"

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,12 @@ def FeatureLshlAddU64Inst
12731273
: SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true",
12741274
"Has v_lshl_add_u64 instruction">;
12751275

1276+
def FeatureLDSBufferLoad : SubtargetFeature<"lds-buffer-load-insts",
1277+
"HasLDSBufferLoad",
1278+
"true",
1279+
"The platform has buffer_load lds instructions"
1280+
>;
1281+
12761282
// Dummy feature used to disable assembler instructions.
12771283
def FeatureDisable : SubtargetFeature<"",
12781284
"FeatureDisable","true",
@@ -1294,7 +1300,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
12941300
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
12951301
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
12961302
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
1297-
FeatureVmemWriteVgprInOrder
1303+
FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
12981304
]
12991305
>;
13001306

@@ -1308,7 +1314,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
13081314
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
13091315
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
13101316
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
1311-
FeatureVmemWriteVgprInOrder
1317+
FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
13121318
]
13131319
>;
13141320

@@ -1324,7 +1330,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
13241330
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
13251331
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
13261332
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
1327-
FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder
1333+
FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
13281334
]
13291335
>;
13301336

@@ -1343,7 +1349,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
13431349
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
13441350
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
13451351
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
1346-
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder
1352+
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
13471353
]
13481354
>;
13491355

@@ -1367,7 +1373,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
13671373
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
13681374
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
13691375
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
1370-
FeatureVmemWriteVgprInOrder
1376+
FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
13711377
]
13721378
>;
13731379

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3369,7 +3369,8 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
33693369
}
33703370

33713371
bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
3372-
assert(!AMDGPU::isGFX12Plus(STI));
3372+
if (!Subtarget->hasLDSBufferLoad())
3373+
return false;
33733374
unsigned Opc;
33743375
unsigned Size = MI.getOperand(3).getImm();
33753376

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
193193
bool SupportsSRAMECC = false;
194194
bool DynamicVGPR = false;
195195
bool DynamicVGPRBlockSize32 = false;
196+
bool HasLDSBufferLoad = false;
196197

197198
// This should not be used directly. 'TargetID' tracks the dynamic settings
198199
// for SRAMECC.
@@ -1319,6 +1320,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13191320
return hasGFX950Insts();
13201321
}
13211322

1323+
bool hasLDSBufferLoad() const { return HasLDSBufferLoad; }
1324+
13221325
bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
13231326

13241327
bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; }

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10104,7 +10104,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1010410104
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
1010510105
case Intrinsic::amdgcn_struct_buffer_load_lds:
1010610106
case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
10107-
assert(!AMDGPU::isGFX12Plus(*Subtarget));
10107+
if (!Subtarget->hasLDSBufferLoad())
10108+
return SDValue();
1010810109
unsigned Opc;
1010910110
bool HasVIndex =
1011010111
IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds ||

llvm/lib/TargetParser/TargetParser.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
374374
Features["prng-inst"] = true;
375375
Features["wavefrontsize32"] = true;
376376
Features["wavefrontsize64"] = true;
377+
Features["lds-buffer-load-insts"] = true;
377378
} else if (T.isAMDGCN()) {
378379
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
379380
switch (Kind) {
@@ -459,6 +460,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
459460
Features["s-memrealtime"] = true;
460461
Features["s-memtime-inst"] = true;
461462
Features["gws"] = true;
463+
Features["lds-buffer-load-insts"] = true;
462464
break;
463465
case GK_GFX1012:
464466
case GK_GFX1011:
@@ -483,6 +485,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
483485
Features["s-memrealtime"] = true;
484486
Features["s-memtime-inst"] = true;
485487
Features["gws"] = true;
488+
Features["lds-buffer-load-insts"] = true;
486489
break;
487490
case GK_GFX950:
488491
Features["bitop3-insts"] = true;
@@ -533,6 +536,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
533536
Features["ci-insts"] = true;
534537
Features["s-memtime-inst"] = true;
535538
Features["gws"] = true;
539+
Features["lds-buffer-load-insts"] = true;
536540
break;
537541
case GK_GFX90A:
538542
Features["gfx90a-insts"] = true;
@@ -585,6 +589,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
585589
Features["image-insts"] = true;
586590
Features["s-memtime-inst"] = true;
587591
Features["gws"] = true;
592+
Features["lds-buffer-load-insts"] = true;
588593
break;
589594
case GK_NONE:
590595
break;

0 commit comments

Comments
 (0)