@@ -85,8 +85,10 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
85
85
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
86
86
; CHECK: region.guarded:
87
87
; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
88
- ; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
89
- ; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
88
+ ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
89
+ ; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
90
+ ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
91
+ ; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
90
92
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
91
93
; CHECK: region.guarded.end:
92
94
; CHECK-NEXT: br label [[REGION_BARRIER]]
@@ -107,16 +109,17 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
107
109
; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
108
110
; CHECK-NEXT: br label [[REGION_CHECK_TID5:%.*]]
109
111
; CHECK: region.check.tid5:
110
- ; CHECK-NEXT: [[TMP4 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
111
- ; CHECK-NEXT: [[TMP5 :%.*]] = icmp eq i32 [[TMP4 ]], 0
112
- ; CHECK-NEXT: br i1 [[TMP5 ]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
112
+ ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
113
+ ; CHECK-NEXT: [[TMP7 :%.*]] = icmp eq i32 [[TMP6 ]], 0
114
+ ; CHECK-NEXT: br i1 [[TMP7 ]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
113
115
; CHECK: region.guarded4:
114
- ; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
116
+ ; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
117
+ ; CHECK-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP8]], align 4, !noalias [[META7]]
115
118
; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]]
116
119
; CHECK: region.guarded.end1:
117
120
; CHECK-NEXT: br label [[REGION_BARRIER2]]
118
121
; CHECK: region.barrier2:
119
- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP4 ]])
122
+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6 ]])
120
123
; CHECK-NEXT: br label [[REGION_EXIT3]]
121
124
; CHECK: region.exit3:
122
125
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
@@ -128,50 +131,53 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
128
131
; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
129
132
; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]]
130
133
; CHECK: region.check.tid10:
131
- ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
132
- ; CHECK-NEXT: [[TMP7 :%.*]] = icmp eq i32 [[TMP6 ]], 0
133
- ; CHECK-NEXT: br i1 [[TMP7 ]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
134
+ ; CHECK-NEXT: [[TMP9 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
135
+ ; CHECK-NEXT: [[TMP10 :%.*]] = icmp eq i32 [[TMP9 ]], 0
136
+ ; CHECK-NEXT: br i1 [[TMP10 ]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
134
137
; CHECK: region.guarded9:
135
- ; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
138
+ ; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
139
+ ; CHECK-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP11]], align 4, !noalias [[META7]]
136
140
; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]]
137
141
; CHECK: region.guarded.end6:
138
142
; CHECK-NEXT: br label [[REGION_BARRIER7]]
139
143
; CHECK: region.barrier7:
140
- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6 ]])
144
+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP9 ]])
141
145
; CHECK-NEXT: br label [[REGION_EXIT8:%.*]]
142
146
; CHECK: region.exit8:
143
147
; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
144
148
; CHECK-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
145
149
; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
146
150
; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]]
147
151
; CHECK: region.check.tid15:
148
- ; CHECK-NEXT: [[TMP8 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
149
- ; CHECK-NEXT: [[TMP9 :%.*]] = icmp eq i32 [[TMP8 ]], 0
150
- ; CHECK-NEXT: br i1 [[TMP9 ]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
152
+ ; CHECK-NEXT: [[TMP12 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
153
+ ; CHECK-NEXT: [[TMP13 :%.*]] = icmp eq i32 [[TMP12 ]], 0
154
+ ; CHECK-NEXT: br i1 [[TMP13 ]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
151
155
; CHECK: region.guarded14:
152
- ; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
156
+ ; CHECK-NEXT: [[TMP14:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
157
+ ; CHECK-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP14]], align 4, !noalias [[META7]]
153
158
; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]]
154
159
; CHECK: region.guarded.end11:
155
160
; CHECK-NEXT: br label [[REGION_BARRIER12]]
156
161
; CHECK: region.barrier12:
157
- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8 ]])
162
+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP12 ]])
158
163
; CHECK-NEXT: br label [[REGION_EXIT13:%.*]]
159
164
; CHECK: region.exit13:
160
165
; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
161
166
; CHECK-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
162
167
; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
163
168
; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]]
164
169
; CHECK: region.check.tid20:
165
- ; CHECK-NEXT: [[TMP10 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
166
- ; CHECK-NEXT: [[TMP11 :%.*]] = icmp eq i32 [[TMP10 ]], 0
167
- ; CHECK-NEXT: br i1 [[TMP11 ]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
170
+ ; CHECK-NEXT: [[TMP15 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
171
+ ; CHECK-NEXT: [[TMP16 :%.*]] = icmp eq i32 [[TMP15 ]], 0
172
+ ; CHECK-NEXT: br i1 [[TMP16 ]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
168
173
; CHECK: region.guarded19:
169
- ; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
174
+ ; CHECK-NEXT: [[TMP17:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
175
+ ; CHECK-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP17]], align 4, !noalias [[META7]]
170
176
; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]]
171
177
; CHECK: region.guarded.end16:
172
178
; CHECK-NEXT: br label [[REGION_BARRIER17]]
173
179
; CHECK: region.barrier17:
174
- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10 ]])
180
+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP15 ]])
175
181
; CHECK-NEXT: br label [[REGION_EXIT18:%.*]]
176
182
; CHECK: region.exit18:
177
183
; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -232,11 +238,13 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
232
238
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
233
239
; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
234
240
; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
235
- ; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
241
+ ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
242
+ ; CHECK-DISABLED-NEXT: store i32 1, ptr addrspace(1) [[TMP2]], align 4, !noalias [[META7]]
236
243
; CHECK-DISABLED-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
237
244
; CHECK-DISABLED-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
238
245
; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
239
- ; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
246
+ ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
247
+ ; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP3]], align 4, !noalias [[META7]]
240
248
; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
241
249
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]]
242
250
; CHECK-DISABLED: for.cond.i:
@@ -248,23 +256,27 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
248
256
; CHECK-DISABLED-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
249
257
; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
250
258
; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
251
- ; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
259
+ ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
260
+ ; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
252
261
; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
253
262
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
254
263
; CHECK-DISABLED: __omp_outlined__.exit:
255
264
; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr null, i64 0)
256
265
; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
257
266
; CHECK-DISABLED-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
258
267
; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
259
- ; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
268
+ ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
269
+ ; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
260
270
; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
261
271
; CHECK-DISABLED-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
262
272
; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
263
- ; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
273
+ ; CHECK-DISABLED-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
274
+ ; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
264
275
; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
265
276
; CHECK-DISABLED-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
266
277
; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
267
- ; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
278
+ ; CHECK-DISABLED-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
279
+ ; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP7]], align 4, !noalias [[META7]]
268
280
; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
269
281
; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
270
282
; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
0 commit comments