Skip to content

Commit 9ff7181

Browse files
committed
[EarlyCSE] Do not CSE convergent calls with memory effects
D149348 did this for readnone calls, which are handled by SimpleValue. This patch does the same for all other CSEable calls, which are handled by CallValue. Differential Revision: https://reviews.llvm.org/D153151
1 parent c2f8fe7 commit 9ff7181

File tree

2 files changed

+30
-12
lines changed

2 files changed

+30
-12
lines changed

llvm/lib/Transforms/Scalar/EarlyCSE.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,19 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
218218
return true;
219219
}
220220

221+
static unsigned hashCallInst(CallInst *CI) {
222+
// Don't CSE convergent calls in different basic blocks, because they
223+
// implicitly depend on the set of threads that is currently executing.
224+
if (CI->isConvergent()) {
225+
return hash_combine(
226+
CI->getOpcode(), CI->getParent(),
227+
hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
228+
}
229+
return hash_combine(
230+
CI->getOpcode(),
231+
hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
232+
}
233+
221234
static unsigned getHashValueImpl(SimpleValue Val) {
222235
Instruction *Inst = Val.Inst;
223236
// Hash in all of the operands as pointers.
@@ -320,11 +333,8 @@ static unsigned getHashValueImpl(SimpleValue Val) {
320333

321334
// Don't CSE convergent calls in different basic blocks, because they
322335
// implicitly depend on the set of threads that is currently executing.
323-
if (CallInst *CI = dyn_cast<CallInst>(Inst); CI && CI->isConvergent()) {
324-
return hash_combine(
325-
Inst->getOpcode(), Inst->getParent(),
326-
hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
327-
}
336+
if (CallInst *CI = dyn_cast<CallInst>(Inst))
337+
return hashCallInst(CI);
328338

329339
// Mix in the opcode.
330340
return hash_combine(
@@ -524,15 +534,21 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
524534
Instruction *Inst = Val.Inst;
525535

526536
// Hash all of the operands as pointers and mix in the opcode.
527-
return hash_combine(
528-
Inst->getOpcode(),
529-
hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
537+
return hashCallInst(cast<CallInst>(Inst));
530538
}
531539

532540
bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
533-
Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
534541
if (LHS.isSentinel() || RHS.isSentinel())
535-
return LHSI == RHSI;
542+
return LHS.Inst == RHS.Inst;
543+
544+
CallInst *LHSI = cast<CallInst>(LHS.Inst);
545+
CallInst *RHSI = cast<CallInst>(RHS.Inst);
546+
547+
// Convergent calls implicitly depend on the set of threads that is
548+
// currently executing, so conservatively return false if they are in
549+
// different basic blocks.
550+
if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
551+
return false;
536552

537553
return LHSI->isIdenticalTo(RHSI);
538554
}

llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ define i32 @test_read_register(i32 %cond) {
1111
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[COND]], 0
1212
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
1313
; CHECK: if:
14+
; CHECK-NEXT: [[Y1:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]]) #[[ATTR2]]
1415
; CHECK-NEXT: br label [[END]]
1516
; CHECK: end:
16-
; CHECK-NEXT: [[Y2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X1]], [[IF]] ]
17+
; CHECK-NEXT: [[Y2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[Y1]], [[IF]] ]
1718
; CHECK-NEXT: [[RET:%.*]] = add i32 [[X1]], [[Y2]]
1819
; CHECK-NEXT: ret i32 [[RET]]
1920
;
@@ -57,9 +58,10 @@ define i1 @test_live_mask(i32 %cond) {
5758
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[COND]], 0
5859
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
5960
; CHECK: if:
61+
; CHECK-NEXT: [[Y1:%.*]] = call i1 @llvm.amdgcn.live.mask() #[[ATTR2]]
6062
; CHECK-NEXT: br label [[END]]
6163
; CHECK: end:
62-
; CHECK-NEXT: [[Y2:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[X1]], [[IF]] ]
64+
; CHECK-NEXT: [[Y2:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[Y1]], [[IF]] ]
6365
; CHECK-NEXT: [[RET:%.*]] = add i1 [[X1]], [[Y2]]
6466
; CHECK-NEXT: ret i1 [[RET]]
6567
;

0 commit comments

Comments
 (0)