Skip to content

Commit 0cd8a28

Browse files
committed
[Attributor][FIX] No IntraFnReachability does not mean unreachable
Also, first check inter fn reachability as it seems to be cheaper in practise.
1 parent ba0be69 commit 0cd8a28

File tree

2 files changed

+99
-8
lines changed

2 files changed

+99
-8
lines changed

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10590,10 +10590,12 @@ struct AAInterFnReachabilityFunction
1059010590

1059110591
// Determine call like instructions that we can reach from the inst.
1059210592
auto CheckCallBase = [&](Instruction &CBInst) {
10593-
if (!IntraFnReachability || !IntraFnReachability->isAssumedReachable(
10594-
A, *RQI.From, CBInst, RQI.ExclusionSet))
10593+
// There are usually less nodes in the call graph, check inter function
10594+
// reachability first.
10595+
if (CheckReachableCallBase(cast<CallBase>(&CBInst)))
1059510596
return true;
10596-
return CheckReachableCallBase(cast<CallBase>(&CBInst));
10597+
return IntraFnReachability && !IntraFnReachability->isAssumedReachable(
10598+
A, *RQI.From, CBInst, RQI.ExclusionSet);
1059710599
};
1059810600

1059910601
bool UsedExclusionSet = /* conservative */ true;

llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll

Lines changed: 94 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ target triple = "amdgcn-amd-amdhsa"
2323
@QD3 = internal addrspace(3) global i32 undef, align 4
2424
@UAA1 = internal addrspace(3) global i32 undef, align 4
2525
@UAA2 = internal addrspace(3) global i32 undef, align 4
26+
@UAA3 = internal addrspace(3) global i32 undef, align 4
27+
@UANA1 = internal addrspace(3) global i32 undef, align 4
2628
@str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
2729

2830
; Make sure we do not delete the stores to @G without also replacing the load with `1`.
@@ -44,6 +46,8 @@ target triple = "amdgcn-amd-amdhsa"
4446
; TUNIT: @[[QD3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
4547
; TUNIT: @[[UAA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
4648
; TUNIT: @[[UAA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
49+
; TUNIT: @[[UAA3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
50+
; TUNIT: @[[UANA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
4751
; TUNIT: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
4852
; TUNIT: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak hidden constant i8 0
4953
;.
@@ -64,6 +68,8 @@ target triple = "amdgcn-amd-amdhsa"
6468
; CGSCC: @[[QD3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
6569
; CGSCC: @[[UAA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
6670
; CGSCC: @[[UAA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
71+
; CGSCC: @[[UAA3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
72+
; CGSCC: @[[UANA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
6773
; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
6874
;.
6975
define void @kernel() "kernel" {
@@ -677,7 +683,7 @@ define void @kernel_unknown_and_aligned1(i1 %c) "kernel" {
677683
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
678684
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
679685
; TUNIT: L:
680-
; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
686+
; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]]
681687
; TUNIT-NEXT: ret void
682688
; TUNIT: S:
683689
; TUNIT-NEXT: call void @sync()
@@ -689,7 +695,7 @@ define void @kernel_unknown_and_aligned1(i1 %c) "kernel" {
689695
; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
690696
; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
691697
; CGSCC: L:
692-
; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
698+
; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]]
693699
; CGSCC-NEXT: ret void
694700
; CGSCC: S:
695701
; CGSCC-NEXT: call void @sync()
@@ -717,7 +723,7 @@ define void @kernel_unknown_and_aligned2(i1 %c) "kernel" {
717723
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
718724
; TUNIT: L:
719725
; TUNIT-NEXT: call void @sync()
720-
; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
726+
; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]]
721727
; TUNIT-NEXT: ret void
722728
; TUNIT: S:
723729
; TUNIT-NEXT: call void @sync()
@@ -730,7 +736,7 @@ define void @kernel_unknown_and_aligned2(i1 %c) "kernel" {
730736
; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
731737
; CGSCC: L:
732738
; CGSCC-NEXT: call void @sync()
733-
; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
739+
; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]]
734740
; CGSCC-NEXT: ret void
735741
; CGSCC: S:
736742
; CGSCC-NEXT: call void @sync()
@@ -752,6 +758,85 @@ S:
752758
ret void
753759
}
754760

761+
define void @kernel_unknown_and_aligned3(i1 %c) "kernel" {
762+
; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3
763+
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
764+
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
765+
; TUNIT: L:
766+
; TUNIT-NEXT: call void @sync()
767+
; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
768+
; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
769+
; TUNIT-NEXT: ret void
770+
; TUNIT: S:
771+
; TUNIT-NEXT: call void @sync()
772+
; TUNIT-NEXT: call void @sync()
773+
; TUNIT-NEXT: ret void
774+
;
775+
; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3
776+
; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
777+
; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
778+
; CGSCC: L:
779+
; CGSCC-NEXT: call void @sync()
780+
; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
781+
; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
782+
; CGSCC-NEXT: ret void
783+
; CGSCC: S:
784+
; CGSCC-NEXT: call void @sync()
785+
; CGSCC-NEXT: call void @sync()
786+
; CGSCC-NEXT: ret void
787+
;
788+
br i1 %c, label %S, label %L
789+
L:
790+
call void @sync();
791+
%v = load i32, ptr addrspace(3) @UAA3
792+
call void @use1(i32 %v)
793+
call void @barrier();
794+
ret void
795+
S:
796+
call void @sync();
797+
store i32 2, ptr addrspace(3) @UAA3
798+
call void @sync();
799+
ret void
800+
}
801+
802+
define void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" {
803+
; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1
804+
; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
805+
; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
806+
; TUNIT: L:
807+
; TUNIT-NEXT: call void @sync()
808+
; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
809+
; TUNIT-NEXT: ret void
810+
; TUNIT: S:
811+
; TUNIT-NEXT: call void @sync()
812+
; TUNIT-NEXT: call void @sync()
813+
; TUNIT-NEXT: ret void
814+
;
815+
; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1
816+
; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
817+
; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
818+
; CGSCC: L:
819+
; CGSCC-NEXT: call void @sync()
820+
; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
821+
; CGSCC-NEXT: ret void
822+
; CGSCC: S:
823+
; CGSCC-NEXT: call void @sync()
824+
; CGSCC-NEXT: call void @sync()
825+
; CGSCC-NEXT: ret void
826+
;
827+
br i1 %c, label %S, label %L
828+
L:
829+
call void @sync();
830+
%v = load i32, ptr addrspace(3) @UANA1
831+
call void @use1(i32 %v)
832+
ret void
833+
S:
834+
call void @sync();
835+
store i32 2, ptr addrspace(3) @UANA1
836+
call void @sync();
837+
ret void
838+
}
839+
755840
declare void @sync()
756841
declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
757842
declare void @use1(i32) nosync norecurse nounwind nocallback
@@ -760,7 +845,7 @@ declare void @__kmpc_target_deinit(ptr, i8) nocallback
760845
declare void @llvm.assume(i1)
761846

762847
!llvm.module.flags = !{!0, !1}
763-
!nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18}
848+
!nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20}
764849

765850
!0 = !{i32 7, !"openmp", i32 50}
766851
!1 = !{i32 7, !"openmp-device", i32 50}
@@ -781,6 +866,8 @@ declare void @llvm.assume(i1)
781866
!16 = !{ptr @kernel4d3, !"kernel", i32 1}
782867
!17 = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
783868
!18 = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
869+
!19 = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
870+
!20 = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
784871

785872
;.
786873
; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
@@ -819,4 +906,6 @@ declare void @llvm.assume(i1)
819906
; CHECK: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
820907
; CHECK: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
821908
; CHECK: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
909+
; CHECK: [[META19:![0-9]+]] = !{ptr @kernel_unknown_and_aligned3, !"kernel", i32 1}
910+
; CHECK: [[META20:![0-9]+]] = !{ptr @kernel_unknown_and_not_aligned1, !"kernel", i32 1}
822911
;.

0 commit comments

Comments
 (0)