Skip to content

[SYCL] Add ITT annotations for SPIR functions as well #3377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ ModulePass *llvm::createSPIRITTAnnotationsPass() {

namespace {

// Check for calling convention of a function.
bool isSPIRKernel(Function &F) {
// Check for calling convention of a function. Return true if it's SPIR kernel.
inline bool isSPIRKernel(Function &F) {
return F.getCallingConv() == CallingConv::SPIR_KERNEL;
}

Expand Down Expand Up @@ -240,20 +240,24 @@ PreservedAnalyses SPIRITTAnnotationsPass::run(Module &M,
SPIRV_GROUP_FMAX, SPIRV_GROUP_UMAX, SPIRV_GROUP_SMAX};

for (Function &F : M) {
// Annotate only SPIR kernels
if (F.isDeclaration() || !isSPIRKernel(F))
if (F.isDeclaration())
continue;

// Work item start/finish annotations are only for SPIR kernels
bool IsSPIRKernel = isSPIRKernel(F);

// At the beggining of a kernel insert work item start annotation
// instruction.
IRModified |= insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_START,
&*inst_begin(F));
if (IsSPIRKernel)
IRModified |= insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_START,
&*inst_begin(F));

for (BasicBlock &BB : F) {
// Insert Finish instruction before return instruction
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
IRModified |=
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_FINISH, RI);
if (IsSPIRKernel)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
IRModified |=
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_FINISH, RI);
for (Instruction &I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ if.end.i: ; preds = %entry
; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 0, i32 0)
%call3.i.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i34, i32 1, i32 896) #2
call spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %add.ptr.i34)
%ptridx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4
%ptridx.ascast.i.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i.i to i32 addrspace(4)*
store i32 %call3.i.i.i.i, i32 addrspace(4)* %ptridx.ascast.i.i.i, align 4, !tbaa !14
Expand All @@ -60,6 +61,18 @@ _ZZN2cl4sycl7handler24parallel_for_lambda_implI11load_kernelIiEZZ9load_testIiEvN
ret void
}

define weak_odr dso_local spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %ptr) {
entry:
; CHECK-LABEL: spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %{{.*}}) {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]], i32 0, i32 0)
; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_S]],{{.*}}, i32 896
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_S]], i32 0, i32 0)
call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %ptr, i32 1, i32 896) #2
; CHECK-NOT: call void @__itt_offload_wi_finish_wrapper()
ret void
}

; Function Attrs: convergent
declare dso_local spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)*, i32, i32) local_unnamed_addr #1

Expand Down
15 changes: 15 additions & 0 deletions llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ if.end.i: ; preds = %entry
; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 1, i32 0
tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i.i) #2
tail call spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %add.ptr.i)
br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit

_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit: ; preds = %entry, %if.end.i
Expand All @@ -54,6 +55,20 @@ _ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIi
ret void
}

define weak_odr dso_local spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %ptr) {
entry:
; CHECK-LABEL: spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %{{.*}}) {
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]], i32 1, i32 0)
; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_S]],{{.*}}, i32 896
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_S]], i32 1, i32 0)
%0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15
%1 = extractelement <3 x i64> %0, i64 0
%conv = trunc i64 %1 to i32
tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %ptr, i32 1, i32 896, i32 %conv) #2
; CHECK-NOT: call void @__itt_offload_wi_finish_wrapper()
ret void
}

; Function Attrs: convergent
declare dso_local spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)*, i32, i32, i32) local_unnamed_addr #1

Expand Down