Skip to content

Commit f1b9fd1

Browse files
committed
[AMDGPU][Attributor] Skip update if an AA is at its initial state
1 parent f403727 commit f1b9fd1

8 files changed

+140
-79
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 87 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,16 @@ struct AAAMDSizeRangeAttribute
819819
if (!CallerInfo || !CallerInfo->isValidState())
820820
return false;
821821

822+
/// When the caller AA is in its initial state, the state remains valid
823+
/// but awaits propagation. We skip processing in this case. Note that we
824+
/// must return true since the state is still considered valid.
825+
if (CallerInfo->isAtInitialState()) {
826+
LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
827+
<< Caller->getName()
828+
<< " is still at initial state. Skip the update.\n");
829+
return true;
830+
}
831+
822832
Change |=
823833
clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
824834

@@ -863,6 +873,15 @@ struct AAAMDSizeRangeAttribute
863873
/*ForceReplace=*/true);
864874
}
865875

876+
/// The initial state of `IntegerRangeState` represents an empty set, which
877+
/// does not constitute a valid range. This empty state complicates
878+
/// propagation, particularly for arithmetic operations like
879+
/// `getAssumed().getUpper() - 1`. Therefore, it is recommended to skip the
880+
/// initial state during processing.
881+
bool isAtInitialState() const {
882+
return isValidState() && getAssumed().isEmptySet();
883+
}
884+
866885
const std::string getAsStr(Attributor *) const override {
867886
std::string Str;
868887
raw_string_ostream OS(Str);
@@ -919,6 +938,11 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
919938
Attributor &A);
920939

921940
ChangeStatus manifest(Attributor &A) override {
941+
if (isAtInitialState()) {
942+
LLVM_DEBUG(dbgs() << '[' << getName()
943+
<< "] Still at initial state. No manifest.\n";);
944+
return ChangeStatus::UNCHANGED;
945+
}
922946
Function *F = getAssociatedFunction();
923947
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
924948
return emitAttributeIfNotDefaultAfterClamp(
@@ -1145,31 +1169,71 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11451169
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
11461170
ChangeStatus Change = ChangeStatus::UNCHANGED;
11471171

1172+
Function *F = getAssociatedFunction();
1173+
1174+
const auto *AAFlatWorkGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
1175+
*this, IRPosition::function(*F), DepClassTy::REQUIRED);
1176+
if (!AAFlatWorkGroupSize || !AAFlatWorkGroupSize->isValidState()) {
1177+
LLVM_DEBUG(
1178+
dbgs() << '[' << getName()
1179+
<< "] AAAMDFlatWorkGroupSize is unavailable or invalid.\n");
1180+
return ChangeStatus::UNCHANGED;
1181+
}
1182+
1183+
if (AAFlatWorkGroupSize->isAtInitialState()) {
1184+
LLVM_DEBUG(dbgs() << '[' << getName()
1185+
<< "] AAAMDFlatWorkGroupSize is still at initial "
1186+
"state. Skip the update.\n");
1187+
return ChangeStatus::UNCHANGED;
1188+
}
1189+
1190+
auto CurrentWorkGroupSize = std::make_pair(
1191+
AAFlatWorkGroupSize->getAssumed().getLower().getZExtValue(),
1192+
AAFlatWorkGroupSize->getAssumed().getUpper().getZExtValue() - 1);
1193+
1194+
auto DoUpdate = [&](std::pair<unsigned, unsigned> WavesPerEU,
1195+
std::pair<unsigned, unsigned> FlatWorkGroupSize) {
1196+
auto [Min, Max] =
1197+
InfoCache.getEffectiveWavesPerEU(*F, WavesPerEU, FlatWorkGroupSize);
1198+
ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
1199+
IntegerRangeState IRS(CR);
1200+
Change |= clampStateAndIndicateChange(this->getState(), IRS);
1201+
};
1202+
1203+
// We need to clamp once if we are not at initial state, because
1204+
// AAAMDFlatWorkGroupSize could be updated in last iteration.
1205+
if (!isAtInitialState()) {
1206+
auto CurrentWavesPerEU =
1207+
std::make_pair(getAssumed().getLower().getZExtValue(),
1208+
getAssumed().getUpper().getZExtValue() - 1);
1209+
DoUpdate(CurrentWavesPerEU, CurrentWorkGroupSize);
1210+
}
1211+
11481212
auto CheckCallSite = [&](AbstractCallSite CS) {
11491213
Function *Caller = CS.getInstruction()->getFunction();
1150-
Function *Func = getAssociatedFunction();
1214+
11511215
LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1152-
<< "->" << Func->getName() << '\n');
1216+
<< "->" << F->getName() << '\n');
11531217

1154-
const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
1218+
const auto *AAWavesPerEU = A.getAAFor<AAAMDWavesPerEU>(
11551219
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1156-
const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
1157-
*this, IRPosition::function(*Func), DepClassTy::REQUIRED);
1158-
if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() ||
1159-
!AssumedGroupSize->isValidState())
1220+
if (!AAWavesPerEU || !AAWavesPerEU->isValidState()) {
1221+
LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
1222+
<< Caller->getName()
1223+
<< " is unavailable or invalid.\n");
11601224
return false;
1225+
}
1226+
if (AAWavesPerEU->isAtInitialState()) {
1227+
LLVM_DEBUG(dbgs() << '[' << getName() << "] Caller "
1228+
<< Caller->getName()
1229+
<< " is still at initial state. Skip the update.\n");
1230+
return true;
1231+
}
11611232

1162-
unsigned Min, Max;
1163-
std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
1164-
*Caller,
1165-
{CallerInfo->getAssumed().getLower().getZExtValue(),
1166-
CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
1167-
{AssumedGroupSize->getAssumed().getLower().getZExtValue(),
1168-
AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
1169-
ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
1170-
IntegerRangeState CallerRangeState(CallerRange);
1171-
Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
1172-
1233+
auto CallerWavesPerEU = std::make_pair(
1234+
AAWavesPerEU->getAssumed().getLower().getZExtValue(),
1235+
AAWavesPerEU->getAssumed().getUpper().getZExtValue() - 1);
1236+
DoUpdate(CallerWavesPerEU, CurrentWorkGroupSize);
11731237
return true;
11741238
};
11751239

@@ -1185,6 +1249,11 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11851249
Attributor &A);
11861250

11871251
ChangeStatus manifest(Attributor &A) override {
1252+
if (isAtInitialState()) {
1253+
LLVM_DEBUG(dbgs() << '[' << getName()
1254+
<< "] Still at initial state. No manifest.\n";);
1255+
return ChangeStatus::UNCHANGED;
1256+
}
11881257
Function *F = getAssociatedFunction();
11891258
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
11901259
return emitAttributeIfNotDefaultAfterClamp(

llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ define void @func_call_asm() #3 {
688688
;
689689
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
690690
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
691-
; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR26:[0-9]+]]
691+
; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR24:[0-9]+]]
692692
; ATTRIBUTOR_HSA-NEXT: ret void
693693
;
694694
call void asm sideeffect "", ""() #3
@@ -717,7 +717,7 @@ define amdgpu_kernel void @func_kern_defined() #3 {
717717
; AKF_HSA-NEXT: ret void
718718
;
719719
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined
720-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
720+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
721721
; ATTRIBUTOR_HSA-NEXT: call void @defined.func()
722722
; ATTRIBUTOR_HSA-NEXT: ret void
723723
;
@@ -845,7 +845,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
845845
; AKF_HSA-NEXT: ret void
846846
;
847847
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
848-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
848+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
849849
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
850850
; ATTRIBUTOR_HSA-NEXT: ret void
851851
;
@@ -861,7 +861,7 @@ define void @func_sanitize_address() #4 {
861861
; AKF_HSA-NEXT: ret void
862862
;
863863
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
864-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
864+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
865865
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4
866866
; ATTRIBUTOR_HSA-NEXT: ret void
867867
;
@@ -877,7 +877,7 @@ define void @func_indirect_sanitize_address() #3 {
877877
; AKF_HSA-NEXT: ret void
878878
;
879879
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
880-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
880+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
881881
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
882882
; ATTRIBUTOR_HSA-NEXT: ret void
883883
;
@@ -893,7 +893,7 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
893893
; AKF_HSA-NEXT: ret void
894894
;
895895
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
896-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
896+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
897897
; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address()
898898
; ATTRIBUTOR_HSA-NEXT: ret void
899899
;
@@ -928,7 +928,7 @@ define internal void @enqueue_block_def() #6 {
928928
; AKF_HSA-NEXT: ret void
929929
;
930930
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
931-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
931+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
932932
; ATTRIBUTOR_HSA-NEXT: ret void
933933
;
934934
ret void
@@ -941,7 +941,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() {
941941
; AKF_HSA-NEXT: ret void
942942
;
943943
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
944-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
944+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
945945
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl()
946946
; ATTRIBUTOR_HSA-NEXT: ret void
947947
;
@@ -956,7 +956,7 @@ define amdgpu_kernel void @kern_call_enqueued_block_def() {
956956
; AKF_HSA-NEXT: ret void
957957
;
958958
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
959-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24:[0-9]+]] {
959+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
960960
; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def()
961961
; ATTRIBUTOR_HSA-NEXT: ret void
962962
;
@@ -969,7 +969,7 @@ define void @unused_enqueue_block() {
969969
; AKF_HSA-NEXT: ret void
970970
;
971971
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
972-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
972+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
973973
; ATTRIBUTOR_HSA-NEXT: ret void
974974
;
975975
ret void
@@ -980,7 +980,7 @@ define internal void @known_func() {
980980
; AKF_HSA-NEXT: ret void
981981
;
982982
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
983-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR25]] {
983+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
984984
; ATTRIBUTOR_HSA-NEXT: ret void
985985
;
986986
ret void
@@ -994,8 +994,8 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() {
994994
; AKF_HSA-NEXT: ret void
995995
;
996996
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
997-
; ATTRIBUTOR_HSA-SAME: () #[[ATTR24]] {
998-
; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR27:[0-9]+]]
997+
; ATTRIBUTOR_HSA-SAME: () #[[ATTR23]] {
998+
; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR25:[0-9]+]]
999999
; ATTRIBUTOR_HSA-NEXT: ret void
10001000
;
10011001
call void @known_func() #6
@@ -1041,17 +1041,15 @@ attributes #6 = { "enqueued-block" }
10411041
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
10421042
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
10431043
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1044-
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
1045-
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1046-
; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1047-
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
1048-
; ATTRIBUTOR_HSA: attributes #[[ATTR21:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
1049-
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
1050-
; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "uniform-work-group-size"="false" }
1051-
; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
1052-
; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1053-
; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { nounwind }
1054-
; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "enqueued-block" }
1044+
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1045+
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1046+
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
1047+
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
1048+
; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
1049+
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "uniform-work-group-size"="false" }
1050+
; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
1051+
; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { nounwind }
1052+
; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "enqueued-block" }
10551053
;.
10561054
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
10571055
;.

0 commit comments

Comments
 (0)