Skip to content

Commit c7eb846

Browse files
committed
[AMDGPU] Merge AMDGPULDSUtils into AMDGPUMemoryUtils
Differential Revision: https://reviews.llvm.org/D119502
1 parent 4072e36 commit c7eb846

9 files changed

+140
-186
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
#include "AMDGPU.h"
3030
#include "Utils/AMDGPUBaseInfo.h"
31-
#include "Utils/AMDGPULDSUtils.h"
31+
#include "Utils/AMDGPUMemoryUtils.h"
3232
#include "llvm/ADT/STLExtras.h"
3333
#include "llvm/IR/Constants.h"
3434
#include "llvm/IR/DerivedTypes.h"

llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
#include "AMDGPU.h"
8484
#include "GCNSubtarget.h"
8585
#include "Utils/AMDGPUBaseInfo.h"
86-
#include "Utils/AMDGPULDSUtils.h"
86+
#include "Utils/AMDGPUMemoryUtils.h"
8787
#include "llvm/ADT/DenseMap.h"
8888
#include "llvm/ADT/STLExtras.h"
8989
#include "llvm/ADT/SetOperations.h"

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,10 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC) {
14431443
}
14441444
}
14451445

1446+
bool isKernelCC(const Function *Func) {
1447+
return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
1448+
}
1449+
14461450
bool hasXNACK(const MCSubtargetInfo &STI) {
14471451
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
14481452
}

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,8 @@ bool isEntryFunctionCC(CallingConv::ID CC);
741741
LLVM_READNONE
742742
bool isModuleEntryFunctionCC(CallingConv::ID CC);
743743

744+
bool isKernelCC(const Function *Func);
745+
744746
// FIXME: Remove this when calling conventions cleaned up
745747
LLVM_READNONE
746748
inline bool isKernel(CallingConv::ID CC) {

llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp

Lines changed: 0 additions & 144 deletions
This file was deleted.

llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h

Lines changed: 0 additions & 38 deletions
This file was deleted.

llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,16 @@
88

99
#include "AMDGPUMemoryUtils.h"
1010
#include "AMDGPU.h"
11+
#include "AMDGPUBaseInfo.h"
12+
#include "llvm/ADT/SetVector.h"
1113
#include "llvm/ADT/SmallSet.h"
1214
#include "llvm/Analysis/AliasAnalysis.h"
1315
#include "llvm/Analysis/MemorySSA.h"
16+
#include "llvm/IR/DataLayout.h"
1417
#include "llvm/IR/Instructions.h"
15-
#include "llvm/IR/IntrinsicsAMDGPU.h"
1618
#include "llvm/IR/IntrinsicInst.h"
19+
#include "llvm/IR/IntrinsicsAMDGPU.h"
20+
#include "llvm/IR/ReplaceConstant.h"
1721

1822
#define DEBUG_TYPE "amdgpu-memory-utils"
1923

@@ -23,6 +27,117 @@ namespace llvm {
2327

2428
namespace AMDGPU {
2529

30+
Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
31+
return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
32+
GV->getValueType());
33+
}
34+
35+
static void collectFunctionUses(User *U, const Function *F,
36+
SetVector<Instruction *> &InstUsers) {
37+
SmallVector<User *> Stack{U};
38+
39+
while (!Stack.empty()) {
40+
U = Stack.pop_back_val();
41+
42+
if (auto *I = dyn_cast<Instruction>(U)) {
43+
if (I->getFunction() == F)
44+
InstUsers.insert(I);
45+
continue;
46+
}
47+
48+
if (!isa<ConstantExpr>(U))
49+
continue;
50+
51+
append_range(Stack, U->users());
52+
}
53+
}
54+
55+
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) {
56+
SetVector<Instruction *> InstUsers;
57+
58+
collectFunctionUses(C, F, InstUsers);
59+
for (Instruction *I : InstUsers) {
60+
convertConstantExprsToInstructions(I, C);
61+
}
62+
}
63+
64+
static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
65+
const Function *F) {
66+
// We are not interested in kernel LDS lowering for module LDS itself.
67+
if (F && GV.getName() == "llvm.amdgcn.module.lds")
68+
return false;
69+
70+
bool Ret = false;
71+
SmallPtrSet<const User *, 8> Visited;
72+
SmallVector<const User *, 16> Stack(GV.users());
73+
74+
assert(!F || isKernelCC(F));
75+
76+
while (!Stack.empty()) {
77+
const User *V = Stack.pop_back_val();
78+
Visited.insert(V);
79+
80+
if (isa<GlobalValue>(V)) {
81+
// This use of the LDS variable is the initializer of a global variable.
82+
// This is ill formed. The address of an LDS variable is kernel dependent
83+
// and unknown until runtime. It can't be written to a global variable.
84+
continue;
85+
}
86+
87+
if (auto *I = dyn_cast<Instruction>(V)) {
88+
const Function *UF = I->getFunction();
89+
if (UF == F) {
90+
// Used from this kernel, we want to put it into the structure.
91+
Ret = true;
92+
} else if (!F) {
93+
// For module LDS lowering, lowering is required if the user instruction
94+
// is from non-kernel function.
95+
Ret |= !isKernelCC(UF);
96+
}
97+
continue;
98+
}
99+
100+
// User V should be a constant, recursively visit users of V.
101+
assert(isa<Constant>(V) && "Expected a constant.");
102+
append_range(Stack, V->users());
103+
}
104+
105+
return Ret;
106+
}
107+
108+
std::vector<GlobalVariable *> findVariablesToLower(Module &M,
109+
const Function *F) {
110+
std::vector<llvm::GlobalVariable *> LocalVars;
111+
for (auto &GV : M.globals()) {
112+
if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
113+
continue;
114+
}
115+
if (!GV.hasInitializer()) {
116+
// addrspace(3) without initializer implies cuda/hip extern __shared__
117+
// the semantics for such a variable appears to be that all extern
118+
// __shared__ variables alias one another, in which case this transform
119+
// is not required
120+
continue;
121+
}
122+
if (!isa<UndefValue>(GV.getInitializer())) {
123+
// Initializers are unimplemented for LDS address space.
124+
// Leave such variables in place for consistent error reporting.
125+
continue;
126+
}
127+
if (GV.isConstant()) {
128+
// A constant undef variable can't be written to, and any load is
129+
// undef, so it should be eliminated by the optimizer. It could be
130+
// dropped by the back end if not. This pass skips over it.
131+
continue;
132+
}
133+
if (!shouldLowerLDSToStruct(GV, F)) {
134+
continue;
135+
}
136+
LocalVars.push_back(&GV);
137+
}
138+
return LocalVars;
139+
}
140+
26141
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
27142
Instruction *DefInst = Def->getMemoryInst();
28143

llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,32 @@
99
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
1010
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
1111

12+
#include <vector>
13+
1214
namespace llvm {
1315

16+
struct Align;
1417
class AAResults;
18+
class ConstantExpr;
19+
class DataLayout;
20+
class Function;
21+
class GlobalVariable;
1522
class LoadInst;
1623
class MemoryDef;
1724
class MemorySSA;
25+
class Module;
1826
class Value;
1927

2028
namespace AMDGPU {
2129

30+
Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
31+
32+
std::vector<GlobalVariable *> findVariablesToLower(Module &M,
33+
const Function *F = nullptr);
34+
35+
/// Replace all uses of constant \p C with instructions in \p F.
36+
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F);
37+
2238
/// Given a \p Def clobbering a load from \p Ptr accroding to the MSSA check
2339
/// if this is actually a memory update or an artifical clobber to facilitate
2440
/// ordering constraints.

llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
add_llvm_component_library(LLVMAMDGPUUtils
22
AMDGPUAsmUtils.cpp
33
AMDGPUBaseInfo.cpp
4-
AMDGPULDSUtils.cpp
54
AMDGPUMemoryUtils.cpp
65
AMDGPUPALMetadata.cpp
76
AMDKernelCodeTUtils.cpp

0 commit comments

Comments
 (0)