Skip to content

Commit a44b216

Browse files
committed
[OPENMP][NVPTX]Mark barrier functions calls as convergent.
Added convergent attribute to the barrier functions calls for correct optimizations. llvm-svn: 366437
1 parent ec2a7c4 commit a44b216

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2669,8 +2669,9 @@ void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
26692669
llvm::ConstantPointerNull::get(
26702670
cast<llvm::PointerType>(getIdentTyPointerTy())),
26712671
llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
2672-
CGF.EmitRuntimeCall(
2672+
llvm::CallInst *Call = CGF.EmitRuntimeCall(
26732673
createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
2674+
Call->setConvergent();
26742675
}
26752676

26762677
void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
@@ -2684,7 +2685,9 @@ void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
26842685
unsigned Flags = getDefaultFlagsForBarriers(Kind);
26852686
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
26862687
getThreadID(CGF, Loc)};
2687-
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2688+
llvm::CallInst *Call = CGF.EmitRuntimeCall(
2689+
createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
2690+
Call->setConvergent();
26882691
}
26892692

26902693
void CGOpenMPRuntimeNVPTX::emitCriticalRegion(

clang/test/OpenMP/nvptx_parallel_codegen.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ int bar(int n){
8888
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
8989
//
9090
// CHECK: [[AWAIT_WORK]]
91-
// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
91+
// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#BARRIER_ATTRS:]]
9292
// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]
9393
// CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
9494
// store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
@@ -318,10 +318,10 @@ int bar(int n){
318318
// CHECK: define internal void [[PARALLEL_FN4]](
319319
// CHECK: [[A:%.+]] = alloca i[[SZ:32|64]],
320320
// CHECK: store i[[SZ]] 45, i[[SZ]]* %a,
321-
// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
321+
// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#BARRIER_ATTRS]]
322322
// CHECK: ret void
323323

324-
// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[BARRIER_ATTRS:.+]]
324+
// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#BARRIER_ATTRS]]
325325

326326
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}_worker()
327327
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}(
@@ -367,6 +367,6 @@ int bar(int n){
367367
// CHECK: store i32 [[NEW_CC_VAL]], i32* [[CC]],
368368
// CHECK: br label
369369

370-
// CHECK: attributes #[[BARRIER_ATTRS]] = {{.*}} convergent {{.*}}
370+
// CHECK: attributes #[[#BARRIER_ATTRS]] = {{.*}} convergent {{.*}}
371371

372372
#endif

0 commit comments

Comments
 (0)