Skip to content

Commit 4f829bd

Browse files
authored
[SYCL] Optimize back-to-back ControlBarrier calls (#16750)
This pass removes redundant __spirv_ControlBarrier call (as well as ITT annotations surrounding it) in case if it's neighboring another __spirv_ControlBarrier call with the same memory scope and memory semantics arguments. If the calls have different execution scope arguments - then pick the one with the 'bigger' scope. --------- Signed-off-by: Sidorov, Dmitry <[email protected]>
1 parent deb3c1c commit 4f829bd

File tree

6 files changed

+291
-0
lines changed

6 files changed

+291
-0
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//==- SYCLOptimizeBackToBackBarrier.h - SYCLOptimizeBackToBackBarrier Pass -==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass cleans up back-to-back ControlBarrier calls.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
#ifndef LLVM_SYCL_OPTIMIZE_BACK_TO_BACK_BARRIER_H
13+
#define LLVM_SYCL_OPTIMIZE_BACK_TO_BACK_BARRIER_H
14+
15+
#include "llvm/IR/PassManager.h"
16+
17+
namespace llvm {
18+
19+
class SYCLOptimizeBackToBackBarrierPass
20+
: public PassInfoMixin<SYCLOptimizeBackToBackBarrierPass> {
21+
public:
22+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
23+
24+
static bool isRequired() { return true; }
25+
};
26+
27+
} // namespace llvm
28+
29+
#endif // LLVM_SYCL_OPTIMIZE_BACK_TO_BACK_BARRIER_H

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@
163163
#include "llvm/SYCLLowerIR/SYCLConditionalCallOnDevice.h"
164164
#include "llvm/SYCLLowerIR/SYCLCreateNVVMAnnotations.h"
165165
#include "llvm/SYCLLowerIR/SYCLJointMatrixTransform.h"
166+
#include "llvm/SYCLLowerIR/SYCLOptimizeBackToBackBarrier.h"
166167
#include "llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h"
167168
#include "llvm/SYCLLowerIR/SYCLPropagateJointMatrixUsage.h"
168169
#include "llvm/SYCLLowerIR/SYCLVirtualFunctionsAnalysis.h"

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ MODULE_PASS("esimd-remove-host-code", ESIMDRemoveHostCodePass());
173173
MODULE_PASS("esimd-remove-optnone-noinline", ESIMDRemoveOptnoneNoinlinePass());
174174
MODULE_PASS("sycl-conditional-call-on-device", SYCLConditionalCallOnDevicePass())
175175
MODULE_PASS("sycl-joint-matrix-transform", SYCLJointMatrixTransformPass())
176+
MODULE_PASS("sycl-optimize-back-to-back-barrier", SYCLOptimizeBackToBackBarrierPass())
176177
MODULE_PASS("sycl-propagate-aspects-usage", SYCLPropagateAspectsUsagePass())
177178
MODULE_PASS("sycl-propagate-joint-matrix-usage", SYCLPropagateJointMatrixUsagePass())
178179
MODULE_PASS("sycl-add-opt-level-attribute", SYCLAddOptLevelAttributePass())

llvm/lib/SYCLLowerIR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ add_llvm_component_library(LLVMSYCLLowerIR
6565
SYCLDeviceRequirements.cpp
6666
SYCLKernelParamOptInfo.cpp
6767
SYCLJointMatrixTransform.cpp
68+
SYCLOptimizeBackToBackBarrier.cpp
6869
SYCLPropagateAspectsUsage.cpp
6970
SYCLPropagateJointMatrixUsage.cpp
7071
SYCLVirtualFunctionsAnalysis.cpp
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
//=== SYCLOptimizeBackToBackBarrier.cpp - SYCL barrier optimization pass ===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass cleans up back-to-back ControlBarrier calls.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "llvm/SYCLLowerIR/SYCLOptimizeBackToBackBarrier.h"
14+
15+
#include "llvm/IR/IRBuilder.h"
16+
17+
using namespace llvm;
18+
19+
namespace {
20+
21+
static constexpr char CONTROL_BARRIER[] = "_Z22__spirv_ControlBarrieriii";
22+
static constexpr char ITT_BARRIER[] = "__itt_offload_wg_barrier_wrapper";
23+
static constexpr char ITT_RESUME[] = "__itt_offload_wi_resume_wrapper";
24+
25+
// Known scopes in SPIR-V.
26+
enum class Scope {
27+
CrossDevice = 0,
28+
Device = 1,
29+
Workgroup = 2,
30+
Subgroup = 3,
31+
Invocation = 4
32+
};
33+
34+
enum class CompareRes { BIGGER = 0, SMALLER = 1, EQUAL = 2, UNKNOWN = 3 };
35+
36+
// This map is added in case of any future scopes are added to SPIR-V and/or
37+
// SYCL.
38+
const std::unordered_map<uint64_t, uint64_t> ScopeWeights = {
39+
{static_cast<uint64_t>(Scope::CrossDevice), 1000},
40+
{static_cast<uint64_t>(Scope::Device), 800},
41+
{static_cast<uint64_t>(Scope::Workgroup), 600},
42+
{static_cast<uint64_t>(Scope::Subgroup), 400},
43+
{static_cast<uint64_t>(Scope::Invocation), 10}};
44+
45+
inline CompareRes compareScopesWithWeights(const uint64_t LHS,
46+
const uint64_t RHS) {
47+
auto LHSIt = ScopeWeights.find(LHS);
48+
auto RHSIt = ScopeWeights.find(RHS);
49+
50+
if (LHSIt == ScopeWeights.end() || RHSIt == ScopeWeights.end())
51+
return CompareRes::UNKNOWN;
52+
53+
const uint64_t LHSWeight = LHSIt->second;
54+
const uint64_t RHSWeight = RHSIt->second;
55+
56+
if (LHSWeight > RHSWeight)
57+
return CompareRes::BIGGER;
58+
if (LHSWeight < RHSWeight)
59+
return CompareRes::SMALLER;
60+
return CompareRes::EQUAL;
61+
}
62+
63+
// The function removes back-to-back ControlBarrier calls in case if they
64+
// have the same memory scope and memory semantics arguments. When two
65+
// back-to-back ControlBarriers are having different execution scope arguments -
66+
// pick the one with the 'bigger' scope.
67+
// It also cleans up ITT annotations surrounding the removed barrier call.
68+
bool processControlBarrier(Function *F) {
69+
BasicBlock *PrevBB = nullptr;
70+
llvm::SmallPtrSet<Instruction *, 8> ToErase;
71+
for (auto I = F->user_begin(), E = F->user_end(); I != E;) {
72+
User *U = *I++;
73+
auto *CI = dyn_cast<CallInst>(U);
74+
if (!CI)
75+
continue;
76+
77+
// New basic block - new processing.
78+
BasicBlock *CurrentBB = CI->getParent();
79+
if (CurrentBB != PrevBB) {
80+
PrevBB = CurrentBB;
81+
continue;
82+
}
83+
84+
llvm::SmallPtrSet<Instruction *, 2> ToEraseLocalITT;
85+
BasicBlock::iterator It(CI);
86+
// Iterate over the basic block storing back-to-back barriers and their ITT
87+
// annotations into ToErase container.
88+
while (It != CurrentBB->begin()) {
89+
--It;
90+
auto *Cand = dyn_cast<CallInst>(&*It);
91+
if (!Cand)
92+
break;
93+
CallInst *CIToRemove = Cand;
94+
StringRef CandName = Cand->getCalledFunction()->getName();
95+
if (CandName == ITT_RESUME || CandName == ITT_BARRIER) {
96+
ToEraseLocalITT.insert(Cand);
97+
continue;
98+
} else if (CandName == CONTROL_BARRIER) {
99+
bool EqualOps = true;
100+
const auto *ExecutionScopeCI = CI->getOperand(0);
101+
const auto *ExecutionScopeCand = Cand->getOperand(0);
102+
if (ExecutionScopeCI != ExecutionScopeCand) {
103+
if (isa<ConstantInt>(ExecutionScopeCI) &&
104+
isa<ConstantInt>(ExecutionScopeCand)) {
105+
const auto ConstScopeCI =
106+
cast<ConstantInt>(ExecutionScopeCI)->getZExtValue();
107+
const auto ConstScopeCand =
108+
cast<ConstantInt>(ExecutionScopeCand)->getZExtValue();
109+
// Pick ControlBarrier with the 'bigger' execution scope.
110+
const auto Compare =
111+
compareScopesWithWeights(ConstScopeCI, ConstScopeCand);
112+
if (Compare == CompareRes::SMALLER)
113+
CIToRemove = CI;
114+
else if (Compare == CompareRes::UNKNOWN)
115+
// Unknown scopes = unknown rules. Keep ControlBarrier call.
116+
EqualOps = false;
117+
} else
118+
EqualOps = false;
119+
}
120+
// TODO: may be handle a case with not-matching memory scope and
121+
// memory semantic arguments in a smart way.
122+
for (unsigned I = 1; I != CI->getNumOperands(); ++I) {
123+
if (CI->getOperand(I) != Cand->getOperand(I)) {
124+
EqualOps = false;
125+
break;
126+
}
127+
}
128+
if (EqualOps) {
129+
ToErase.insert(CIToRemove);
130+
for (auto *ITT : ToEraseLocalITT)
131+
ToErase.insert(ITT);
132+
ToEraseLocalITT.clear();
133+
}
134+
}
135+
}
136+
}
137+
138+
if (ToErase.empty())
139+
return false;
140+
141+
for (auto *I : ToErase) {
142+
I->dropAllReferences();
143+
I->eraseFromParent();
144+
}
145+
146+
return true;
147+
}
148+
149+
} // namespace
150+
151+
PreservedAnalyses
152+
SYCLOptimizeBackToBackBarrierPass::run(Module &M, ModuleAnalysisManager &MAM) {
153+
bool ModuleChanged = false;
154+
for (Function &F : M)
155+
if (F.isDeclaration())
156+
if (F.getName() == CONTROL_BARRIER)
157+
ModuleChanged |= processControlBarrier(&F);
158+
159+
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
160+
}
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; RUN: opt -passes=sycl-optimize-back-to-back-barrier -S < %s | FileCheck %s
2+
; The test checks if back-to-back __spirv_ControlBarrier and ITT annotations are
3+
; removed.
4+
5+
; CHECK-LABEL: define spir_func void @_Z3fooii(i32 %[[#Scope1:]], i32 %[[#Scope2:]])
6+
; CHECK: call spir_func void @__itt_offload_wg_barrier_wrapper()
7+
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 noundef 2, i32 noundef 1, i32 noundef 912)
8+
; CHECK-NEXT: call spir_func void @__itt_offload_wi_resume_wrapper()
9+
; CHECK-NEXT: call spir_func void @__itt_offload_wg_barrier_wrapper()
10+
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 2, i32 noundef 912)
11+
; CHECK-NEXT: call spir_func void @__itt_offload_wi_resume_wrapper()
12+
; CHECK-NEXT: call spir_func void @__itt_offload_wg_barrier_wrapper()
13+
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 noundef 64, i32 noundef 2, i32 noundef 912)
14+
; CHECK-NEXT: call spir_func void @__itt_offload_wi_resume_wrapper()
15+
; CHECK-NEXT: call spir_func void @__itt_offload_wg_barrier_wrapper()
16+
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 %[[#Scope1]], i32 noundef 2, i32 noundef 912)
17+
; CHECK-NEXT: call spir_func void @__itt_offload_wi_resume_wrapper()
18+
; CHECK-NEXT: call spir_func void @__itt_offload_wg_barrier_wrapper()
19+
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 %[[#Scope2]], i32 noundef 2, i32 noundef 912)
20+
; CHECK-NEXT: call spir_func void @__itt_offload_wi_resume_wrapper()
21+
; CHECK-NEXT: ret void
22+
23+
; CHECK-LABEL: define dso_local void @_Z3booi
24+
; CHECK: call spir_func void @__itt_offload_wg_barrier_wrapper()
25+
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 0)
26+
; CHECK-NEXT: call spir_func void @__itt_offload_wi_resume_wrapper()
27+
; CHECK: call spir_func void @__itt_offload_wg_barrier_wrapper()
28+
; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 0)
29+
; CHECK-NEXT: call spir_func void @__itt_offload_wi_resume_wrapper()
30+
31+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
32+
target triple = "spirv64-unknown-unknown"
33+
34+
define spir_func void @_Z3fooii(i32 %0, i32 %1) {
35+
call spir_func void @__itt_offload_wg_barrier_wrapper()
36+
call void @_Z22__spirv_ControlBarrieriii(i32 noundef 4, i32 noundef 1, i32 noundef 912)
37+
call spir_func void @__itt_offload_wi_resume_wrapper()
38+
39+
call spir_func void @__itt_offload_wg_barrier_wrapper()
40+
call void @_Z22__spirv_ControlBarrieriii(i32 noundef 2, i32 noundef 1, i32 noundef 912)
41+
call spir_func void @__itt_offload_wi_resume_wrapper()
42+
43+
call spir_func void @__itt_offload_wg_barrier_wrapper()
44+
call void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 1, i32 noundef 912)
45+
call spir_func void @__itt_offload_wi_resume_wrapper()
46+
47+
call spir_func void @__itt_offload_wg_barrier_wrapper()
48+
call void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 1, i32 noundef 912)
49+
call spir_func void @__itt_offload_wi_resume_wrapper()
50+
51+
call spir_func void @__itt_offload_wg_barrier_wrapper()
52+
call void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 2, i32 noundef 912)
53+
call spir_func void @__itt_offload_wi_resume_wrapper()
54+
55+
call spir_func void @__itt_offload_wg_barrier_wrapper()
56+
call void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 2, i32 noundef 912)
57+
call spir_func void @__itt_offload_wi_resume_wrapper()
58+
59+
call spir_func void @__itt_offload_wg_barrier_wrapper()
60+
call void @_Z22__spirv_ControlBarrieriii(i32 noundef 64, i32 noundef 2, i32 noundef 912)
61+
call spir_func void @__itt_offload_wi_resume_wrapper()
62+
63+
call spir_func void @__itt_offload_wg_barrier_wrapper()
64+
call void @_Z22__spirv_ControlBarrieriii(i32 %0, i32 noundef 2, i32 noundef 912)
65+
call spir_func void @__itt_offload_wi_resume_wrapper()
66+
67+
call spir_func void @__itt_offload_wg_barrier_wrapper()
68+
call void @_Z22__spirv_ControlBarrieriii(i32 %0, i32 noundef 2, i32 noundef 912)
69+
call spir_func void @__itt_offload_wi_resume_wrapper()
70+
71+
call spir_func void @__itt_offload_wg_barrier_wrapper()
72+
call void @_Z22__spirv_ControlBarrieriii(i32 %1, i32 noundef 2, i32 noundef 912)
73+
call spir_func void @__itt_offload_wi_resume_wrapper()
74+
75+
ret void
76+
}
77+
78+
define dso_local void @_Z3booi(i32 noundef %0) local_unnamed_addr #0 {
79+
%2 = icmp eq i32 %0, 0
80+
br i1 %2, label %3, label %4
81+
82+
3: ; preds = %1
83+
call spir_func void @__itt_offload_wg_barrier_wrapper()
84+
call void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 0)
85+
call spir_func void @__itt_offload_wi_resume_wrapper()
86+
br label %4
87+
88+
4: ; preds = %3, %1
89+
call spir_func void @__itt_offload_wg_barrier_wrapper()
90+
call void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 0)
91+
call spir_func void @__itt_offload_wi_resume_wrapper()
92+
ret void
93+
}
94+
95+
declare spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef, i32 noundef, i32 noundef)
96+
97+
declare spir_func void @__itt_offload_wg_barrier_wrapper()
98+
99+
declare spir_func void @__itt_offload_wi_resume_wrapper()

0 commit comments

Comments
 (0)