Skip to content

Commit ac29405

Browse files
[OpenMPOpt] Fix incorrect end-of-kernel barrier removal (llvm#65670)
Barrier removal in OpenMPOpt normally removes barriers by proving that they are redundant with barriers preceding them. However, it can't do this with the "pseudo-barrier" at the end of kernels because that can't be removed. Instead, it removes the barriers preceding the end of the kernel which that end-of-kernel barrier is redundant with. However, these barriers aren't always redundant with the end-of-kernel barrier when loops are involved, and removing them can lead to incorrect results in compiled code. This change fixes this by requiring that these pre-end-of-kernel barriers also have the kernel end as a unique successor before removing them. It also changes the initialization of `ExitED` for kernels since the kernel end is not an aligned barrier.
1 parent 3903817 commit ac29405

File tree

2 files changed

+199
-7
lines changed

2 files changed

+199
-7
lines changed

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2623,6 +2623,17 @@ struct AAICVTrackerCallSiteReturned : AAICVTracker {
26232623
}
26242624
};
26252625

2626+
/// Determines if \p BB exits the function unconditionally itself or reaches a
2627+
/// block that does through only unique successors.
2628+
static bool hasFunctionEndAsUniqueSuccessor(const BasicBlock *BB) {
2629+
if (succ_empty(BB))
2630+
return true;
2631+
const BasicBlock *const Successor = BB->getUniqueSuccessor();
2632+
if (!Successor)
2633+
return false;
2634+
return hasFunctionEndAsUniqueSuccessor(Successor);
2635+
}
2636+
26262637
struct AAExecutionDomainFunction : public AAExecutionDomain {
26272638
AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
26282639
: AAExecutionDomain(IRP, A) {}
@@ -2678,17 +2689,19 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
26782689
if (!ED.EncounteredAssumes.empty() && !A.isModulePass())
26792690
return;
26802691

2681-
// We can remove this barrier, if it is one, or all aligned barriers
2682-
// reaching the kernel end. In the latter case we can transitively work
2683-
// our way back until we find a barrier that guards a side-effect if we
2684-
// are dealing with the kernel end here.
2692+
// We can remove this barrier, if it is one, or aligned barriers reaching
2693+
// the kernel end (if CB is nullptr). Aligned barriers reaching the kernel
2694+
// end should only be removed if the kernel end is their unique successor;
2695+
// otherwise, they may have side-effects that aren't accounted for in the
2696+
// kernel end in their other successors. If those barriers have other
2697+
// barriers reaching them, those can be transitively removed as well as
2698+
// long as the kernel end is also their unique successor.
26852699
if (CB) {
26862700
DeletedBarriers.insert(CB);
26872701
A.deleteAfterManifest(*CB);
26882702
++NumBarriersEliminated;
26892703
Changed = ChangeStatus::CHANGED;
26902704
} else if (!ED.AlignedBarriers.empty()) {
2691-
NumBarriersEliminated += ED.AlignedBarriers.size();
26922705
Changed = ChangeStatus::CHANGED;
26932706
SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(),
26942707
ED.AlignedBarriers.end());
@@ -2699,7 +2712,10 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
26992712
continue;
27002713
if (LastCB->getFunction() != getAnchorScope())
27012714
continue;
2715+
if (!hasFunctionEndAsUniqueSuccessor(LastCB->getParent()))
2716+
continue;
27022717
if (!DeletedBarriers.count(LastCB)) {
2718+
++NumBarriersEliminated;
27032719
A.deleteAfterManifest(*LastCB);
27042720
continue;
27052721
}
@@ -2980,7 +2996,7 @@ bool AAExecutionDomainFunction::handleCallees(Attributor &A,
29802996
EntryBBED.IsExecutedByInitialThreadOnly = false;
29812997
EntryBBED.IsReachedFromAlignedBarrierOnly = true;
29822998
EntryBBED.EncounteredNonLocalSideEffect = false;
2983-
ExitED.IsReachingAlignedBarrierOnly = true;
2999+
ExitED.IsReachingAlignedBarrierOnly = false;
29843000
} else {
29853001
EntryBBED.IsExecutedByInitialThreadOnly = false;
29863002
EntryBBED.IsReachedFromAlignedBarrierOnly = false;

llvm/test/Transforms/OpenMP/barrier_removal.ll

Lines changed: 177 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1065,8 +1065,174 @@ define void @caller_barrier2() "kernel" {
10651065
ret void
10661066
}
10671067

1068+
define void @loop_barrier() "kernel" {
1069+
; CHECK-LABEL: define {{[^@]+}}@loop_barrier
1070+
; CHECK-SAME: () #[[ATTR4]] {
1071+
; CHECK-NEXT: entry:
1072+
; CHECK-NEXT: br label [[LOOP:%.*]]
1073+
; CHECK: loop:
1074+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1075+
; CHECK-NEXT: call void @unknown()
1076+
; CHECK-NEXT: call void @aligned_barrier()
1077+
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1078+
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1079+
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1080+
; CHECK: exit:
1081+
; CHECK-NEXT: ret void
1082+
;
1083+
entry:
1084+
br label %loop
1085+
1086+
loop:
1087+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1088+
call void @unknown()
1089+
call void @aligned_barrier()
1090+
%i.next = add nuw nsw i32 %i, 1
1091+
%cond = icmp ne i32 %i.next, 128
1092+
br i1 %cond, label %loop, label %exit
1093+
1094+
exit:
1095+
ret void
1096+
}
1097+
1098+
define void @loop_barrier_end_barriers() "kernel" {
1099+
; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers
1100+
; CHECK-SAME: () #[[ATTR4]] {
1101+
; CHECK-NEXT: entry:
1102+
; CHECK-NEXT: br label [[LOOP:%.*]]
1103+
; CHECK: loop:
1104+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1105+
; CHECK-NEXT: call void @unknown()
1106+
; CHECK-NEXT: call void @aligned_barrier()
1107+
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1108+
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1109+
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1110+
; CHECK: exit:
1111+
; CHECK-NEXT: ret void
1112+
;
1113+
entry:
1114+
br label %loop
1115+
1116+
loop:
1117+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1118+
call void @unknown()
1119+
call void @aligned_barrier()
1120+
%i.next = add nuw nsw i32 %i, 1
1121+
%cond = icmp ne i32 %i.next, 128
1122+
br i1 %cond, label %loop, label %exit
1123+
1124+
exit:
1125+
call void @aligned_barrier()
1126+
call void @aligned_barrier()
1127+
call void @aligned_barrier()
1128+
call void @aligned_barrier()
1129+
ret void
1130+
}
1131+
1132+
define void @loop_barrier_end_barriers_unknown() "kernel" {
1133+
; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown
1134+
; CHECK-SAME: () #[[ATTR4]] {
1135+
; CHECK-NEXT: entry:
1136+
; CHECK-NEXT: br label [[LOOP:%.*]]
1137+
; CHECK: loop:
1138+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1139+
; CHECK-NEXT: call void @unknown()
1140+
; CHECK-NEXT: call void @aligned_barrier()
1141+
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1142+
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1143+
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1144+
; CHECK: exit:
1145+
; CHECK-NEXT: call void @unknown()
1146+
; CHECK-NEXT: ret void
1147+
;
1148+
entry:
1149+
br label %loop
1150+
1151+
loop:
1152+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1153+
call void @unknown()
1154+
call void @aligned_barrier()
1155+
%i.next = add nuw nsw i32 %i, 1
1156+
%cond = icmp ne i32 %i.next, 128
1157+
br i1 %cond, label %loop, label %exit
1158+
1159+
exit:
1160+
call void @aligned_barrier()
1161+
call void @aligned_barrier()
1162+
call void @unknown()
1163+
call void @aligned_barrier()
1164+
call void @aligned_barrier()
1165+
ret void
1166+
}
1167+
1168+
define void @loop_barrier_store() "kernel" {
1169+
; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store
1170+
; CHECK-SAME: () #[[ATTR4]] {
1171+
; CHECK-NEXT: entry:
1172+
; CHECK-NEXT: br label [[LOOP:%.*]]
1173+
; CHECK: loop:
1174+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1175+
; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4
1176+
; CHECK-NEXT: call void @aligned_barrier()
1177+
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1178+
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1179+
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1180+
; CHECK: exit:
1181+
; CHECK-NEXT: ret void
1182+
;
1183+
entry:
1184+
br label %loop
1185+
1186+
loop:
1187+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1188+
store i32 %i, ptr @G1
1189+
call void @aligned_barrier()
1190+
%i.next = add nuw nsw i32 %i, 1
1191+
%cond = icmp ne i32 %i.next, 128
1192+
br i1 %cond, label %loop, label %exit
1193+
1194+
exit:
1195+
ret void
1196+
}
1197+
1198+
define void @loop_barrier_end_barriers_store() "kernel" {
1199+
; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store
1200+
; CHECK-SAME: () #[[ATTR4]] {
1201+
; CHECK-NEXT: entry:
1202+
; CHECK-NEXT: br label [[LOOP:%.*]]
1203+
; CHECK: loop:
1204+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1205+
; CHECK-NEXT: store i32 [[I]], ptr @G1, align 4
1206+
; CHECK-NEXT: call void @aligned_barrier()
1207+
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1208+
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1209+
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1210+
; CHECK: exit:
1211+
; CHECK-NEXT: store i32 [[I_NEXT]], ptr @G1, align 4
1212+
; CHECK-NEXT: ret void
1213+
;
1214+
entry:
1215+
br label %loop
1216+
1217+
loop:
1218+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1219+
store i32 %i, ptr @G1
1220+
call void @aligned_barrier()
1221+
%i.next = add nuw nsw i32 %i, 1
1222+
%cond = icmp ne i32 %i.next, 128
1223+
br i1 %cond, label %loop, label %exit
1224+
1225+
exit:
1226+
call void @aligned_barrier()
1227+
call void @aligned_barrier()
1228+
store i32 %i.next, ptr @G1
1229+
call void @aligned_barrier()
1230+
call void @aligned_barrier()
1231+
ret void
1232+
}
1233+
10681234
!llvm.module.flags = !{!16,!15}
1069-
!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25}
1235+
!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25,!26,!27,!28,!29,!30}
10701236

10711237
!0 = !{ptr @pos_empty_1, !"kernel", i32 1}
10721238
!1 = !{ptr @pos_empty_2, !"kernel", i32 1}
@@ -1079,6 +1245,11 @@ define void @caller_barrier2() "kernel" {
10791245
!23 = !{ptr @pos_empty_8, !"kernel", i32 1}
10801246
!24 = !{ptr @caller_barrier1, !"kernel", i32 1}
10811247
!25 = !{ptr @caller_barrier2, !"kernel", i32 1}
1248+
!26 = !{ptr @loop_barrier, !"kernel", i32 1}
1249+
!27 = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
1250+
!28 = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
1251+
!29 = !{ptr @loop_barrier_store, !"kernel", i32 1}
1252+
!30 = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
10821253
!6 = !{ptr @neg_empty_8, !"kernel", i32 1}
10831254
!19 = !{ptr @neg_empty_9, !"kernel", i32 1}
10841255
!20 = !{ptr @pos_empty_10, !"kernel", i32 1}
@@ -1128,4 +1299,9 @@ define void @caller_barrier2() "kernel" {
11281299
; CHECK: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1}
11291300
; CHECK: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1}
11301301
; CHECK: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1}
1302+
; CHECK: [[META26:![0-9]+]] = !{ptr @loop_barrier, !"kernel", i32 1}
1303+
; CHECK: [[META27:![0-9]+]] = !{ptr @loop_barrier_end_barriers, !"kernel", i32 1}
1304+
; CHECK: [[META28:![0-9]+]] = !{ptr @loop_barrier_end_barriers_unknown, !"kernel", i32 1}
1305+
; CHECK: [[META29:![0-9]+]] = !{ptr @loop_barrier_store, !"kernel", i32 1}
1306+
; CHECK: [[META30:![0-9]+]] = !{ptr @loop_barrier_end_barriers_store, !"kernel", i32 1}
11311307
;.

0 commit comments

Comments
 (0)