@@ -390,15 +390,14 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
390
390
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
391
391
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
392
392
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
393
- ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
394
393
; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
395
394
; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
396
395
; AMDGPU: worker_state_machine.finished:
397
396
; AMDGPU-NEXT: ret void
398
397
; AMDGPU: worker_state_machine.is_active.check:
399
398
; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
400
399
; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
401
- ; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST ]](i16 0, i32 [[TMP0]])
400
+ ; AMDGPU-NEXT: call void [[WORKER_WORK_FN ]](i16 0, i32 [[TMP0]])
402
401
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
403
402
; AMDGPU: worker_state_machine.parallel_region.end:
404
403
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -451,15 +450,14 @@ define weak void @spmd_and_non_spmd_callee(i1 %c) #0 {
451
450
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
452
451
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
453
452
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
454
- ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
455
453
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
456
454
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
457
455
; NVPTX: worker_state_machine.finished:
458
456
; NVPTX-NEXT: ret void
459
457
; NVPTX: worker_state_machine.is_active.check:
460
458
; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
461
459
; NVPTX: worker_state_machine.parallel_region.fallback.execute:
462
- ; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST ]](i16 0, i32 [[TMP0]])
460
+ ; NVPTX-NEXT: call void [[WORKER_WORK_FN ]](i16 0, i32 [[TMP0]])
463
461
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
464
462
; NVPTX: worker_state_machine.parallel_region.end:
465
463
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -736,15 +734,14 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
736
734
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
737
735
; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
738
736
; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
739
- ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
740
737
; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
741
738
; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
742
739
; AMDGPU: worker_state_machine.finished:
743
740
; AMDGPU-NEXT: ret void
744
741
; AMDGPU: worker_state_machine.is_active.check:
745
742
; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
746
743
; AMDGPU: worker_state_machine.parallel_region.fallback.execute:
747
- ; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST ]](i16 0, i32 [[TMP0]])
744
+ ; AMDGPU-NEXT: call void [[WORKER_WORK_FN ]](i16 0, i32 [[TMP0]])
748
745
; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
749
746
; AMDGPU: worker_state_machine.parallel_region.end:
750
747
; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel()
@@ -796,15 +793,14 @@ define weak void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
796
793
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
797
794
; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
798
795
; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
799
- ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
800
796
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
801
797
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
802
798
; NVPTX: worker_state_machine.finished:
803
799
; NVPTX-NEXT: ret void
804
800
; NVPTX: worker_state_machine.is_active.check:
805
801
; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
806
802
; NVPTX: worker_state_machine.parallel_region.fallback.execute:
807
- ; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST ]](i16 0, i32 [[TMP0]])
803
+ ; NVPTX-NEXT: call void [[WORKER_WORK_FN ]](i16 0, i32 [[TMP0]])
808
804
; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
809
805
; NVPTX: worker_state_machine.parallel_region.end:
810
806
; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel()
0 commit comments