Skip to content

Commit aefa7dc

Browse files
kerbowabcahoon
authored andcommitted
[AMDGPU] Add IR lowering changes for preloaded kernargs
Cherry-pick 7b70af2 Preloaded kernel arguments should not be lowered in the IR pass AMDGPULowerKernelArguments. Therefore it's necessary to calculate the total number of user SGPRs that are available for preloading and how many SGPRs would be required to preload each argument to determine whether we should skip lowering i.e. the argument will be preloaded instead. Reviewed By: bcahoon Differential Revision: https://reviews.llvm.org/D156853 Change-Id: Id5a1410daf3fdf73d2ff4dbb8a32f5b60f9653d1
1 parent 895c298 commit aefa7dc

File tree

2 files changed

+527
-2
lines changed

2 files changed

+527
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,59 @@
1414
#include "AMDGPU.h"
1515
#include "GCNSubtarget.h"
1616
#include "llvm/CodeGen/TargetPassConfig.h"
17-
#include "llvm/IR/IntrinsicsAMDGPU.h"
1817
#include "llvm/IR/IRBuilder.h"
18+
#include "llvm/IR/IntrinsicsAMDGPU.h"
1919
#include "llvm/IR/MDBuilder.h"
2020
#include "llvm/Target/TargetMachine.h"
21+
2122
#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"
2223

2324
using namespace llvm;
2425

2526
namespace {
2627

27-
class AMDGPULowerKernelArguments : public FunctionPass{
28+
class PreloadKernelArgInfo {
29+
private:
30+
Function &F;
31+
const GCNSubtarget &ST;
32+
unsigned NumFreeUserSGPRs;
33+
34+
public:
35+
SmallVector<llvm::Metadata *, 8> KernelArgMetadata;
36+
37+
PreloadKernelArgInfo(Function &F, const GCNSubtarget &ST) : F(F), ST(ST) {
38+
setInitialFreeUserSGPRsCount();
39+
}
40+
41+
// Returns the maximum number of user SGPRs that we have available to preload
42+
// arguments.
43+
void setInitialFreeUserSGPRsCount() {
44+
const unsigned MaxUserSGPRs = ST.getMaxNumUserSGPRs();
45+
GCNUserSGPRUsageInfo UserSGPRInfo(F, ST);
46+
47+
NumFreeUserSGPRs = MaxUserSGPRs - UserSGPRInfo.getNumUsedUserSGPRs();
48+
}
49+
50+
bool tryAllocPreloadSGPRs(unsigned AllocSize, uint64_t ArgOffset,
51+
uint64_t LastExplicitArgOffset) {
52+
// Check if this argument may be loaded into the same register as the
53+
// previous argument.
54+
if (!isAligned(Align(4), ArgOffset) && AllocSize < 4)
55+
return true;
56+
57+
// Pad SGPRs for kernarg alignment.
58+
unsigned Padding = ArgOffset - LastExplicitArgOffset;
59+
unsigned PaddingSGPRs = alignTo(Padding, 4) / 4;
60+
unsigned NumPreloadSGPRs = alignTo(AllocSize, 4) / 4;
61+
if (NumPreloadSGPRs + PaddingSGPRs > NumFreeUserSGPRs)
62+
return false;
63+
64+
NumFreeUserSGPRs -= (NumPreloadSGPRs + PaddingSGPRs);
65+
return true;
66+
}
67+
};
68+
69+
class AMDGPULowerKernelArguments : public FunctionPass {
2870
public:
2971
static char ID;
3072

@@ -88,6 +130,9 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
88130

89131
unsigned AS = KernArgSegment->getType()->getPointerAddressSpace();
90132
uint64_t ExplicitArgOffset = 0;
133+
// Preloaded kernel arguments must be sequential.
134+
bool InPreloadSequence = true;
135+
PreloadKernelArgInfo PreloadInfo(F, ST);
91136

92137
for (Argument &Arg : F.args()) {
93138
const bool IsByRef = Arg.hasByRefAttr();
@@ -99,8 +144,19 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
99144
uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
100145

101146
uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
147+
uint64_t LastExplicitArgOffset = ExplicitArgOffset;
102148
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
103149

150+
// Try to preload this argument into user SGPRs.
151+
if (Arg.hasInRegAttr() && InPreloadSequence && ST.hasKernargPreload() &&
152+
!ST.needsKernargPreloadBackwardsCompatibility() &&
153+
!Arg.getType()->isAggregateType())
154+
if (PreloadInfo.tryAllocPreloadSGPRs(AllocSize, EltOffset,
155+
LastExplicitArgOffset))
156+
continue;
157+
158+
InPreloadSequence = false;
159+
104160
if (Arg.use_empty())
105161
continue;
106162

0 commit comments

Comments
 (0)