Skip to content

Commit fa46a0c

Browse files
committed
comments
1 parent e5c5fc7 commit fa46a0c

File tree

2 files changed

+13
-16
lines changed

2 files changed

+13
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ char &llvm::AMDGPUPromoteAllocaToVectorID = AMDGPUPromoteAllocaToVector::ID;
220220
PreservedAnalyses AMDGPUPromoteAllocaPass::run(Function &F,
221221
FunctionAnalysisManager &AM) {
222222
auto &LI = AM.getResult<LoopAnalysis>(F);
223-
bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(F, /*PromoteToLDS*/ true);
223+
bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(F, /*PromoteToLDS=*/true);
224224
if (Changed) {
225225
PreservedAnalyses PA;
226226
PA.preserveSet<CFGAnalyses>();
@@ -232,7 +232,7 @@ PreservedAnalyses AMDGPUPromoteAllocaPass::run(Function &F,
232232
PreservedAnalyses
233233
AMDGPUPromoteAllocaToVectorPass::run(Function &F, FunctionAnalysisManager &AM) {
234234
auto &LI = AM.getResult<LoopAnalysis>(F);
235-
bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(F, /*PromoteToLDS*/ false);
235+
bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(F, /*PromoteToLDS=*/false);
236236
if (Changed) {
237237
PreservedAnalyses PA;
238238
PA.preserveSet<CFGAnalyses>();
@@ -257,7 +257,7 @@ static void collectAllocaUses(AllocaInst &Alloca,
257257
for (auto &U : Cur->uses()) {
258258
Uses.push_back(&U);
259259

260-
if (isa<GetElementPtrInst>(U.getUser()) || isa<BitCastInst>(U.getUser()))
260+
if (isa<GetElementPtrInst>(U.getUser()))
261261
WorkList.push_back(cast<Instruction>(U.getUser()));
262262
}
263263
}
@@ -267,10 +267,6 @@ void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
267267
SmallVectorImpl<AllocaInst *> &Allocas) {
268268
DenseMap<AllocaInst *, unsigned> Scores;
269269

270-
LLVM_DEBUG(dbgs() << "Before sorting allocas:\n"; for (auto *A
271-
: Allocas) dbgs()
272-
<< " " << *A << "\n";);
273-
274270
for (auto *Alloca : Allocas) {
275271
LLVM_DEBUG(dbgs() << "Scoring: " << *Alloca << "\n");
276272
unsigned &Score = Scores[Alloca];
@@ -279,7 +275,7 @@ void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
279275
collectAllocaUses(*Alloca, Uses);
280276
for (auto *U : Uses) {
281277
Instruction *Inst = cast<Instruction>(U->getUser());
282-
if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst))
278+
if (isa<GetElementPtrInst>(Inst))
283279
continue;
284280
unsigned UserScore =
285281
1 + (LoopUserWeight * LI.getLoopDepth(Inst->getParent()));
@@ -293,9 +289,13 @@ void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
293289
return Scores.at(A) > Scores.at(B);
294290
});
295291

296-
LLVM_DEBUG(dbgs() << "After sorting allocas:\n"; for (auto *A
297-
: Allocas) dbgs()
298-
<< " " << *A << "\n";);
292+
// clang-format off
293+
LLVM_DEBUG(
294+
dbgs() << "Sorted Worklist:\n";
295+
for (auto *A: Allocas)
296+
dbgs() << " " << *A << "\n";
297+
);
298+
// clang-format on
299299
}
300300

301301
bool AMDGPUPromoteAllocaImpl::run(Function &F, bool PromoteToLDS) {

llvm/test/CodeGen/AMDGPU/promote-alloca-scoring.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -debug-only=amdgpu-promote-alloca -amdgpu-promote-alloca-to-vector-limit=512 -passes=amdgpu-promote-alloca %s -o - 2>&1 | FileCheck %s
22
; REQUIRES: asserts
33

4-
; CHECK: Before sorting allocas:
5-
; CHECK-NEXT: %simpleuser = alloca [4 x i64], align 4, addrspace(5)
6-
; CHECK-NEXT: %manyusers = alloca [4 x i64], align 4, addrspace(5)
7-
; CHECK-NEXT: Scoring: %simpleuser = alloca [4 x i64], align 4, addrspace(5)
4+
; CHECK: Scoring: %simpleuser = alloca [4 x i64], align 4, addrspace(5)
85
; CHECK-NEXT: [+1]: store i32 42, ptr addrspace(5) %simpleuser, align 4
96
; CHECK-NEXT: => Final Score:1
107
; CHECK-NEXT: Scoring: %manyusers = alloca [4 x i64], align 4, addrspace(5)
@@ -13,7 +10,7 @@
1310
; CHECK-NEXT: [+1]: store i32 %v1.ext, ptr addrspace(5) %manyusers.2, align 4
1411
; CHECK-NEXT: [+1]: %v1 = load i8, ptr addrspace(5) %manyusers.2, align 1
1512
; CHECK-NEXT: => Final Score:4
16-
; CHECK-NEXT: After sorting allocas:
13+
; CHECK-NEXT: Sorted Worklist:
1714
; CHECK-NEXT: %manyusers = alloca [4 x i64], align 4, addrspace(5)
1815
; CHECK-NEXT: %simpleuser = alloca [4 x i64], align 4, addrspace(5)
1916
define amdgpu_kernel void @simple_users_scores() #0 {

0 commit comments

Comments
 (0)