Skip to content

[AMDGPU] Adding the amdgpu-num-work-groups function attribute #75647

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -2011,6 +2011,13 @@ def AMDGPUNumVGPR : InheritableAttr {
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
}

def AMDGPUNumWorkGroups : InheritableAttr {
let Spellings = [Clang<"amdgpu_num_work_groups", 0>];
let Args = [UnsignedArgument<"NumWorkGroups">];
let Documentation = [AMDGPUNumWorkGroupsDocs];
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
}

def AMDGPUKernelCall : DeclOrTypeAttr {
let Spellings = [Clang<"amdgpu_kernel">];
let Documentation = [Undocumented];
Expand Down
23 changes: 23 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,29 @@ An error will be given if:
}];
}

def AMDGPUNumWorkGroupsDocs : Documentation {
let Category = DocCatAMDGPUAttributes;
let Content = [{
The number of work groups specifies the number of work groups when the kernel
is dispatched.

Clang supports the
``__attribute__((amdgpu_num_work_groups(<num>)))`` attribute for the
AMDGPU target. This attribute may be attached to a kernel function definition
and is an optimization hint.

``<num>`` parameter specifies the number of work groups.

If specified, the AMDGPU target backend might be able to produce better machine
code.

An error will be given if:
- Specified values violate subtarget specifications;
- Specified values are not compatible with values provided through other
attributes.
}];
}

def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
let Content = [{
Clang supports several different calling conventions, depending on the target
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,13 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}

if (const auto *Attr = FD->getAttr<AMDGPUNumWorkGroupsAttr>()) {
uint32_t NumWG = Attr->getNumWorkGroups();

if (NumWG != 0)
F->addFnAttr("amdgpu-num-work-groups", llvm::utostr(NumWG));
}
}

/// Emits control constants used to change per-architecture behaviour in the
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8051,6 +8051,16 @@ static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(::new (S.Context) AMDGPUNumVGPRAttr(S.Context, AL, NumVGPR));
}

static void handleAMDGPUNumWorkGroupsAttr(Sema &S, Decl *D,
const ParsedAttr &AL) {
uint32_t NumWG = 0;
Expr *NumWGExpr = AL.getArgAsExpr(0);
if (!checkUInt32Argument(S, AL, NumWGExpr, NumWG))
return;

D->addAttr(::new (S.Context) AMDGPUNumWorkGroupsAttr(S.Context, AL, NumWG));
}

static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
const ParsedAttr &AL) {
// If we try to apply it to a function pointer, don't warn, but don't
Expand Down Expand Up @@ -9058,6 +9068,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_AMDGPUNumVGPR:
handleAMDGPUNumVGPRAttr(S, D, AL);
break;
case ParsedAttr::AT_AMDGPUNumWorkGroups:
handleAMDGPUNumWorkGroupsAttr(S, D, AL);
break;
case ParsedAttr::AT_AVRSignal:
handleAVRSignalAttr(S, D, AL);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
// CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
// CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
// CHECK-NEXT: AMDGPUNumWorkGroups (SubjectMatchRule_function)
// CHECK-NEXT: AMDGPUWavesPerEU (SubjectMatchRule_function)
// CHECK-NEXT: AVRSignal (SubjectMatchRule_function)
// CHECK-NEXT: AbiTag (SubjectMatchRule_record_not_is_union, SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_namespace)
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,10 @@ MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,

Kern[".max_flat_workgroup_size"] =
Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
unsigned NumWG = MFI.getNumWorkGroups();
if (NumWG != 0) {
Kern[".num_work_groups"] = Kern.getDocument()->getNode(NumWG);
}
Kern[".sgpr_spill_count"] =
Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
Kern[".vgpr_spill_count"] =
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1108,3 +1108,9 @@ void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() {
return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs;
}

unsigned AMDGPUSubtarget::getNumWorkGroups(const Function &F) const {
const unsigned Default = 0;
return AMDGPU::getUnsignedIntegerAttribute(F, "amdgpu-num-work-groups",
Default);
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ class AMDGPUSubtarget {
/// 2) dimension.
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;

/// Return the number of work groups for the function.
unsigned getNumWorkGroups(const Function &F) const;

/// Return true if only a single workitem can be active in a wave.
bool isSingleLaneExecution(const Function &Kernel) const;

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
NumWorkGroups = ST.getNumWorkGroups(F);

Occupancy = ST.computeOccupancy(F, getLDSSize());
CallingConv::ID CC = F.getCallingConv();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,

const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;

// Default/requested number of work groups for the function.
unsigned NumWorkGroups = 0;

private:
unsigned NumUserSGPRs = 0;
unsigned NumSystemSGPRs = 0;
Expand Down Expand Up @@ -1094,6 +1097,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,

// \returns true if a function needs or may need AGPRs.
bool usesAGPRs(const MachineFunction &MF) const;

/// \returns Default/requested number of work groups for this function.
unsigned getNumWorkGroups() const { return NumWorkGroups; }
};

} // end namespace llvm
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1221,6 +1221,22 @@ getIntegerPairAttribute(const Function &F, StringRef Name,
return Ints;
}

unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name,
unsigned Default) {
Attribute A = F.getFnAttribute(Name);
if (!A.isStringAttribute())
return Default;

LLVMContext &Ctx = F.getContext();
unsigned IntVal = Default;
StringRef Str = A.getValueAsString();
if (Str.trim().getAsInteger(0, IntVal)) {
Ctx.emitError("can't parse integer attribute " + Name);
return Default;
}
return IntVal;
}

unsigned getVmcntBitMask(const IsaVersion &Version) {
return (1 << (getVmcntBitWidthLo(Version.Major) +
getVmcntBitWidthHi(Version.Major))) -
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,15 @@ bool shouldEmitConstantsToTextSection(const Triple &TT);
/// to integer.
int getIntegerAttribute(const Function &F, StringRef Name, int Default);

/// \returns Unsigned Integer value requested using \p F's \p Name attribute.
///
/// \returns \p Default if attribute is not present.
///
/// \returns \p Default and emits error if requested value cannot be converted
/// to integer.
unsigned getUnsignedIntegerAttribute(const Function &F, StringRef Name,
unsigned Default);

/// \returns A pair of integer values requested using \p F's \p Name attribute
/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
/// is false).
Expand Down
82 changes: 82 additions & 0 deletions llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-work-groups.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s

; Attribute not specified.
; CHECK-LABEL: {{^}}empty_no_attribute:
define amdgpu_kernel void @empty_no_attribute() {
entry:
ret void
}

; Ignore if number of work groups is 0.
; CHECK-LABEL: {{^}}empty_num_work_groups_0:
define amdgpu_kernel void @empty_num_work_groups_0() #0 {
entry:
ret void
}
attributes #0 = {"amdgpu-num-work-groups"="0"}

; Exactly 1 work group.
; CHECK-LABEL: {{^}}empty_num_work_groups_1:
define amdgpu_kernel void @empty_num_work_groups_1() #1 {
entry:
ret void
}
attributes #1 = {"amdgpu-num-work-groups"="1"}

; Exactly 5 work groups.
; CHECK-LABEL: {{^}}empty_num_work_groups_5:
define amdgpu_kernel void @empty_num_work_groups_5() #2 {
entry:
ret void
}
attributes #2 = {"amdgpu-num-work-groups"="5"}

; Exactly 32 work groups.
; CHECK-LABEL: {{^}}empty_num_work_groups_32:
define amdgpu_kernel void @empty_num_work_groups_32() #3 {
entry:
ret void
}
attributes #3 = {"amdgpu-num-work-groups"="32"}

; Exactly 50 work groups.
; CHECK-LABEL: {{^}}empty_num_work_groups_50:
define amdgpu_kernel void @empty_num_work_groups_50() #4 {
entry:
ret void
}
attributes #4 = {"amdgpu-num-work-groups"="50"}

; Exactly 256 work groups.
; CHECK-LABEL: {{^}}empty_num_work_groups_256:
define amdgpu_kernel void @empty_num_work_groups_256() #5 {
entry:
ret void
}
attributes #5 = {"amdgpu-num-work-groups"="256"}

; Exactly 1024 work groups.
; CHECK-LABEL: {{^}}empty_num_work_groups_1024:
define amdgpu_kernel void @empty_num_work_groups_1024() #6 {
entry:
ret void
}
attributes #6 = {"amdgpu-num-work-groups"="1024"}

; CHECK: .amdgpu_metadata
; CHECK: .name: empty_no_attribute
; CHECK-NEXT: .private_segment_fixed_size: 0
; CHECK: .name: empty_num_work_groups_0
; CHECK-NEXT: .private_segment_fixed_size: 0
; CHECK: .name: empty_num_work_groups_1
; CHECK-NEXT: .num_work_groups: 1
; CHECK: .name: empty_num_work_groups_5
; CHECK-NEXT: .num_work_groups: 5
; CHECK: .name: empty_num_work_groups_32
; CHECK-NEXT: .num_work_groups: 32
; CHECK: .name: empty_num_work_groups_50
; CHECK-NEXT: .num_work_groups: 50
; CHECK: .name: empty_num_work_groups_256
; CHECK-NEXT: .num_work_groups: 256
; CHECK: .name: empty_num_work_groups_1024
; CHECK-NEXT: .num_work_groups: 1024