Skip to content

Commit 58f9326

Browse files
committed
[OpenMP] Change AAKernelInfo to ignore non-kernels
Currently, AAKernelInfo will fail on an assertion if we attempt to run it on a kernel without the init / deinit runtime calls. However, this occurs for global constructors on the device. This will cause OpenMPOpt to crash whenever global constructors are present. This patch removes this assertion and just gives up instead. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D108258
1 parent edb8acd commit 58f9326

File tree

2 files changed

+103
-2
lines changed

2 files changed

+103
-2
lines changed

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2878,8 +2878,11 @@ struct AAKernelInfoFunction : AAKernelInfo {
28782878
},
28792879
Fn);
28802880

2881-
assert((KernelInitCB && KernelDeinitCB) &&
2882-
"Kernel without __kmpc_target_init or __kmpc_target_deinit!");
2881+
// Ignore kernels without initializers such as global constructors.
2882+
if (!KernelInitCB || !KernelDeinitCB) {
2883+
indicateOptimisticFixpoint();
2884+
return;
2885+
}
28832886

28842887
// For kernels we might need to initialize/finalize the IsSPMD state and
28852888
// we need to register a simplification callback so that the Attributor
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
2+
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
3+
4+
%struct.ident_t = type { i32, i32, i32, i32, i8* }
5+
6+
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
7+
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
8+
@_ZL6Device = internal global double 0.000000e+00, align 8
9+
@__omp_offloading_fd02_85283c04_main_l11_exec_mode = weak constant i8 0
10+
11+
define weak void @__omp_offloading_fd02_85283c04_main_l11(double* nonnull align 8 dereferenceable(8) %X) local_unnamed_addr {
12+
entry:
13+
%0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 true, i1 false, i1 false) #0
14+
%exec_user_code = icmp eq i32 %0, -1
15+
br i1 %exec_user_code, label %user_code.entry, label %common.ret
16+
17+
common.ret:
18+
ret void
19+
20+
user_code.entry:
21+
%1 = load double, double* @_ZL6Device, align 8, !tbaa !11
22+
%2 = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #0
23+
%3 = icmp eq i32 %2, 0
24+
br i1 %3, label %region.guarded, label %region.barrier
25+
26+
region.guarded:
27+
store double %1, double* %X, align 8, !tbaa !11
28+
br label %region.barrier
29+
30+
region.barrier:
31+
tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @1, i32 %2)
32+
tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i1 true, i1 false) #0
33+
br label %common.ret
34+
}
35+
36+
declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr
37+
38+
declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr
39+
40+
define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
41+
entry:
42+
%call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1
43+
%call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1
44+
%div = fdiv double %call.i, %call.i2
45+
store double %div, double* @_ZL6Device, align 8, !tbaa !11
46+
ret void
47+
}
48+
49+
declare double @__nv_log(double)
50+
51+
declare i32 @__kmpc_get_hardware_thread_id_in_block()
52+
53+
declare void @__kmpc_barrier_simple_spmd(%struct.ident_t*, i32)
54+
55+
attributes #0 = { nounwind }
56+
attributes #1 = { convergent nounwind }
57+
58+
!omp_offload.info = !{!0, !1, !2}
59+
!nvvm.annotations = !{!3, !4}
60+
!llvm.module.flags = !{!5, !6, !7, !8, !9}
61+
!llvm.ident = !{!10}
62+
63+
!0 = !{i32 0, i32 64770, i32 -2060960764, !"__omp_offloading__fd02_85283c04_Device_l6_ctor", i32 6, i32 1}
64+
!1 = !{i32 0, i32 64770, i32 -2060960764, !"main", i32 11, i32 2}
65+
!2 = !{i32 1, !"_ZL6Device", i32 0, i32 0}
66+
!3 = !{void ()* @__omp_offloading__fd02_85283c04_Device_l6_ctor, !"kernel", i32 1}
67+
!4 = !{void (double*)* @__omp_offloading_fd02_85283c04_main_l11, !"kernel", i32 1}
68+
!5 = !{i32 1, !"wchar_size", i32 4}
69+
!6 = !{i32 7, !"openmp", i32 50}
70+
!7 = !{i32 7, !"openmp-device", i32 50}
71+
!8 = !{i32 7, !"PIC Level", i32 2}
72+
!9 = !{i32 7, !"frame-pointer", i32 2}
73+
!10 = !{!"clang version 14.0.0"}
74+
!11 = !{!12, !12, i64 0}
75+
!12 = !{!"double", !13, i64 0}
76+
!13 = !{!"omnipotent char", !14, i64 0}
77+
!14 = !{!"Simple C++ TBAA"}
78+
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
79+
; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
80+
; CHECK-NEXT: entry:
81+
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) #[[ATTR1:[0-9]+]]
82+
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
83+
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
84+
; CHECK: common.ret:
85+
; CHECK-NEXT: ret void
86+
; CHECK: user_code.entry:
87+
; CHECK-NEXT: [[TMP1:%.*]] = load double, double* @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
88+
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
89+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
90+
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
91+
; CHECK: region.guarded:
92+
; CHECK-NEXT: store double [[TMP1]], double* [[X]], align 8, !tbaa [[TBAA11]]
93+
; CHECK-NEXT: br label [[REGION_BARRIER]]
94+
; CHECK: region.barrier:
95+
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
96+
; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false) #[[ATTR1]]
97+
; CHECK-NEXT: br label [[COMMON_RET]]
98+
;

0 commit comments

Comments
 (0)