Skip to content

Commit 4fc52db

Browse files
committed
[InstCombine] Remove weaker fence adjacent to a stronger fence
We have an instCombine rule to remove identical consecutive fences. We can extend this to remove weaker fences when we have consecutive stronger fence. As stated in the LangRef, a fence with a stronger ordering also implies ordering weaker than itself: "A fence which has seq_cst ordering, in addition to having both acquire and release semantics specified above, participates in the global program order of other seq_cst operations and/or fences." Reviewed-By: reames Differential Revision: https://reviews.llvm.org/D118607
1 parent 91fb66c commit 4fc52db

File tree

2 files changed

+75
-8
lines changed

2 files changed

+75
-8
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2468,10 +2468,28 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
24682468

24692469
// Fence instruction simplification
24702470
Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
2471-
// Remove identical consecutive fences.
2472-
Instruction *Next = FI.getNextNonDebugInstruction();
2473-
if (auto *NFI = dyn_cast<FenceInst>(Next))
2474-
if (FI.isIdenticalTo(NFI))
2471+
auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction());
2472+
// This check is solely here to handle arbitrary target-dependent syncscopes.
2473+
// TODO: Can remove if does not matter in practice.
2474+
if (NFI && FI.isIdenticalTo(NFI))
2475+
return eraseInstFromFunction(FI);
2476+
2477+
// Returns true if FI1 is identical or stronger fence than FI2.
2478+
auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
2479+
auto FI1SyncScope = FI1->getSyncScopeID();
2480+
// Consider same scope, where scope is global or single-thread.
2481+
if (FI1SyncScope != FI2->getSyncScopeID() ||
2482+
(FI1SyncScope != SyncScope::System &&
2483+
FI1SyncScope != SyncScope::SingleThread))
2484+
return false;
2485+
2486+
return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
2487+
};
2488+
if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
2489+
return eraseInstFromFunction(FI);
2490+
2491+
if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction()))
2492+
if (isIdenticalOrStrongerFence(PFI, &FI))
24752493
return eraseInstFromFunction(FI);
24762494
return nullptr;
24772495
}

llvm/test/Transforms/InstCombine/consecutive-fences.ll

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
; CHECK-LABEL: define void @tinkywinky
66
; CHECK-NEXT: fence seq_cst
7-
; CHECK-NEXT: fence syncscope("singlethread") acquire
7+
; CHECK-NEXT: fence syncscope("singlethread") acquire
88
; CHECK-NEXT: ret void
99
; CHECK-NEXT: }
1010

@@ -18,6 +18,17 @@ define void @tinkywinky() {
1818
ret void
1919
}
2020

21+
; Arbitrary target dependent scope
22+
; Is this transform really needed?
23+
; CHECK-LABEL: test_target_dependent_scope
24+
; CHECK-NEXT: fence syncscope("MSP430") acquire
25+
; CHECK-NEXT: ret void
26+
define void @test_target_dependent_scope() {
27+
fence syncscope("MSP430") acquire
28+
fence syncscope("MSP430") acquire
29+
ret void
30+
}
31+
2132
; CHECK-LABEL: define void @dipsy
2233
; CHECK-NEXT: fence seq_cst
2334
; CHECK-NEXT: fence syncscope("singlethread") seq_cst
@@ -31,9 +42,6 @@ define void @dipsy() {
3142
}
3243

3344
; CHECK-LABEL: define void @patatino
34-
; CHECK-NEXT: fence acquire
35-
; CHECK-NEXT: fence seq_cst
36-
; CHECK-NEXT: fence acquire
3745
; CHECK-NEXT: fence seq_cst
3846
; CHECK-NEXT: ret void
3947
; CHECK-NEXT: }
@@ -46,6 +54,47 @@ define void @patatino() {
4654
ret void
4755
}
4856

57+
; CHECK-LABEL: define void @weaker_fence_1
58+
; CHECK-NEXT: fence seq_cst
59+
; CHECK-NEXT: ret void
60+
define void @weaker_fence_1() {
61+
fence seq_cst
62+
fence release
63+
fence seq_cst
64+
ret void
65+
}
66+
67+
; CHECK-LABEL: define void @weaker_fence_2
68+
; CHECK-NEXT: fence seq_cst
69+
; CHECK-NEXT: ret void
70+
define void @weaker_fence_2() {
71+
fence seq_cst
72+
fence release
73+
fence seq_cst
74+
fence acquire
75+
ret void
76+
}
77+
78+
; Although acquire is a weaker ordering than seq_cst, it has a system scope,
79+
; compare to singlethread scope in seq_cst.
80+
; CHECK-LABEL: acquire_global_neg_test
81+
; CHECK-NEXT: fence acquire
82+
; CHECK-NEXT: fence syncscope("singlethread") seq_cst
83+
define void @acquire_global_neg_test() {
84+
fence acquire
85+
fence acquire
86+
fence syncscope("singlethread") seq_cst
87+
ret void
88+
}
89+
90+
; CHECK-LABEL: acquire_single_thread_scope
91+
; CHECK-NEXT: fence syncscope("singlethread") seq_cst
92+
define void @acquire_single_thread_scope() {
93+
fence syncscope("singlethread") acquire
94+
fence syncscope("singlethread") seq_cst
95+
ret void
96+
}
97+
4998
; CHECK-LABEL: define void @debug
5099
; CHECK-NOT: fence
51100
; CHECK: call void @llvm.dbg.value

0 commit comments

Comments
 (0)