Skip to content

Commit c57ef2c

Browse files
authored
[llvm][OpenMPOpt] Remove no-op ptr-to-ptr bitcast (NFC) (#73869)
* Remove a call to CreatePointerBitCastOrAddrSpaceCast which merely adds a no-op ptr-to-ptr bitcast. * Most of the diff is from removing checks for no-op ptr-to-ptr bitcasts in relevant LIT tests
1 parent 8b9a6af commit c57ef2c

File tree

7 files changed

+68
-131
lines changed

7 files changed

+68
-131
lines changed

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4527,9 +4527,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
45274527
FunctionType *ParallelRegionFnTy = FunctionType::get(
45284528
Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
45294529
false);
4530-
Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
4531-
WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast",
4532-
StateMachineBeginBB);
45334530

45344531
Instruction *IsDone =
45354532
ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
@@ -4576,7 +4573,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
45764573
Value *IsPR;
45774574
if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
45784575
Instruction *CmpI = ICmpInst::Create(
4579-
ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
4576+
ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, ParallelRegion,
45804577
"worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
45814578
CmpI->setDebugLoc(DLoc);
45824579
IsPR = CmpI;
@@ -4596,7 +4593,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
45964593
if (!ReachedUnknownParallelRegions.empty()) {
45974594
StateMachineIfCascadeCurrentBB->setName(
45984595
"worker_state_machine.parallel_region.fallback.execute");
4599-
CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "",
4596+
CallInst::Create(ParallelRegionFnTy, WorkFn, {ZeroArg, GTid}, "",
46004597
StateMachineIfCascadeCurrentBB)
46014598
->setDebugLoc(DLoc);
46024599
}

llvm/test/Transforms/OpenMP/custom_state_machines.ll

Lines changed: 18 additions & 30 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
; CHECK-DAG: @__omp_outlined__1_wrapper.ID = private constant i8 undef
2727
; CHECK-DAG: @__omp_outlined__2_wrapper.ID = private constant i8 undef
2828

29-
; CHECK-DAG: icmp eq ptr %worker.work_fn.addr_cast, @__omp_outlined__1_wrapper.ID
30-
; CHECK-DAG: icmp eq ptr %worker.work_fn.addr_cast, @__omp_outlined__2_wrapper.ID
29+
; CHECK-DAG: icmp eq ptr %worker.work_fn, @__omp_outlined__1_wrapper.ID
30+
; CHECK-DAG: icmp eq ptr %worker.work_fn, @__omp_outlined__2_wrapper.ID
3131

3232

3333
; CHECK-DAG: call void @__kmpc_parallel_51(ptr @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr %{{.*}}, i64 0)

llvm/test/Transforms/OpenMP/spmdization.ll

Lines changed: 40 additions & 80 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/OpenMP/spmdization_guarding.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,6 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %dyn, ptr %x
203203
; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
204204
; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
205205
; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
206-
; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
207206
; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
208207
; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
209208
; CHECK-DISABLED: worker_state_machine.finished:

llvm/test/Transforms/OpenMP/spmdization_indirect.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -390,15 +390,14 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
390390
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
391391
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
392392
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
393-
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
394393
; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
395394
; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
396395
; AMDGPU: worker_state_machine.finished:
397396
; AMDGPU-NEXT: ret void
398397
; AMDGPU: worker_state_machine.is_active.check:
399398
; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
400399
; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
401-
; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
400+
; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
402401
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
403402
; AMDGPU: worker_state_machine.parallel_region.end:
404403
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -451,15 +450,14 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
451450
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
452451
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
453452
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
454-
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
455453
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
456454
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
457455
; NVPTX: worker_state_machine.finished:
458456
; NVPTX-NEXT: ret void
459457
; NVPTX: worker_state_machine.is_active.check:
460458
; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
461459
; NVPTX: worker_state_machine.parallel_region.fallback.execute:
462-
; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
460+
; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
463461
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
464462
; NVPTX: worker_state_machine.parallel_region.end:
465463
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -736,15 +734,14 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
736734
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
737735
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
738736
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
739-
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
740737
; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
741738
; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
742739
; AMDGPU: worker_state_machine.finished:
743740
; AMDGPU-NEXT: ret void
744741
; AMDGPU: worker_state_machine.is_active.check:
745742
; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
746743
; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
747-
; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
744+
; AMDGPU-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
748745
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
749746
; AMDGPU: worker_state_machine.parallel_region.end:
750747
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -796,15 +793,14 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
796793
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
797794
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
798795
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
799-
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
800796
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
801797
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
802798
; NVPTX: worker_state_machine.finished:
803799
; NVPTX-NEXT: ret void
804800
; NVPTX: worker_state_machine.is_active.check:
805801
; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
806802
; NVPTX: worker_state_machine.parallel_region.fallback.execute:
807-
; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
803+
; NVPTX-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
808804
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
809805
; NVPTX: worker_state_machine.parallel_region.end:
810806
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()

llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) #0 {
8888
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
8989
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
9090
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
91-
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
9291
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
9392
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
9493
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished:
@@ -166,15 +165,14 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 {
166165
; CHECK-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
167166
; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
168167
; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
169-
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
170168
; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
171169
; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
172170
; CHECK: worker_state_machine.finished:
173171
; CHECK-NEXT: ret void
174172
; CHECK: worker_state_machine.is_active.check:
175173
; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
176174
; CHECK: worker_state_machine.parallel_region.fallback.execute:
177-
; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
175+
; CHECK-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
178176
; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
179177
; CHECK: worker_state_machine.parallel_region.end:
180178
; CHECK-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -209,15 +207,14 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 {
209207
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
210208
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
211209
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
212-
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
213210
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
214211
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
215212
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished:
216213
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
217214
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.is_active.check:
218215
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
219216
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.fallback.execute:
220-
; CHECK-DISABLE-SPMDIZATION-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
217+
; CHECK-DISABLE-SPMDIZATION-NEXT: call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
221218
; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
222219
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.end:
223220
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_kernel_end_parallel()

0 commit comments

Comments
 (0)