Skip to content

Commit 4cf325a

Browse files
authored
[SYCL][NATIVECPU] Remove unneeded accesses to state thread_local (#17215)
No longer generate unneeded reads from state struct thread_local. Also removed unused local.
1 parent db7eac4 commit 4cf325a

File tree

2 files changed

+22
-7
lines changed

2 files changed

+22
-7
lines changed

llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,24 @@ static Function *getReplaceFunc(Module &M, StringRef Name, const Use &U,
210210
}
211211

212212
static Value *getStateArg(Function *F, llvm::Constant *StateTLS) {
213-
// Todo: we should probably cache the state thread local load here
214-
// to avoid re-emitting it for each builtin
215213
if (StateTLS) {
216-
IRBuilder<> BB(&*F->getEntryBlock().getFirstInsertionPt());
214+
// Find previous read from thread_local, if any
215+
const auto IP = F->getEntryBlock().getFirstInsertionPt();
216+
if (IP.isValid()) {
217+
if (const CallInst *I = dyn_cast<CallInst>(&*IP)) {
218+
if (I->getIntrinsicID() == Intrinsic::threadlocal_address &&
219+
I->getOperand(0) == StateTLS) {
220+
const auto Next = std::next(IP);
221+
if (Next.isValid()) {
222+
if (LoadInst *LI = dyn_cast<LoadInst>(&*Next)) {
223+
if (LI->getPointerOperand() == I)
224+
return LI;
225+
}
226+
}
227+
}
228+
}
229+
}
230+
IRBuilder<> BB(&*IP);
217231
llvm::Value *V = BB.CreateThreadLocalAddress(StateTLS);
218232
return BB.CreateLoad(StateTLS->getType(), V);
219233
}
@@ -336,7 +350,6 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
336350
#ifdef NATIVECPU_USE_OCK
337351
{
338352
SmallSet<Function *, 5> RemovableFuncs;
339-
SmallVector<Function *, 5> WrapperFuncs;
340353

341354
for (auto &OldF : OldKernels) {
342355
// If vectorization occurred, at this point we have a wrapper function

sycl/test/check_device_code/native_cpu/native_cpu_builtins.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ int main() {
2828
// CHECK: call{{.*}}__dpcpp_nativecpu_get_global_id(i32 0, ptr addrspace(1) %2)
2929
// CHECK-NOT: @llvm.threadlocal
3030

31-
// CHECK-TL: %[[VAL1:.*]] = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @_ZL28nativecpu_thread_local_state)
32-
// CHECK-TL-NEXT %[[VAL2:.*]] = load ptr addrspace(1), ptr addrspace(1) %VAL1, align 8
33-
// CHECK-TL-NEXT %{{.*}} = call i64 @__dpcpp_nativecpu_get_wg_size(i32 0, ptr addrspace(1) %VAL2)
31+
// CHECK-TL: define void @_ZTSN4sycl3_V16detail19__pf_kernel_wrapperI5Test1EE.NativeCPUKernel({{.*}}
32+
// CHECK-TL-NEXT:entry:
33+
// CHECK-TL-NEXT: %[[VAL1:.*]] = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @_ZL28nativecpu_thread_local_state)
34+
// CHECK-TL-NEXT: %[[VAL2:.*]] = load ptr addrspace(1), ptr addrspace(1) %[[VAL1]], align 8
35+
// CHECK-TL-NEXT: %{{.*}} = call i64 @__dpcpp_nativecpu_get_wg_size(i32 0, ptr addrspace(1) %[[VAL2]])
3436

3537
// CHECK-TL: %{{.*}} = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @_ZL28nativecpu_thread_local_state)
3638
// CHECK-TL-DAG: store ptr addrspace(1) %{{.*}}, ptr addrspace(1) %{{.*}}, align 8

0 commit comments

Comments
 (0)