Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 5c16c4e

Browse files
author
Robin Morisset
committed
[X86] Use the generic AtomicExpandPass instead of X86AtomicExpandPass
This required a new hook called hasLoadLinkedStoreConditional to know whether to expand atomics to LL/SC (ARM, AArch64, in a future patch Power) or to CmpXchg (X86). Apart from that, the new code in AtomicExpandPass is mostly moved from X86AtomicExpandPass. The main result of this patch is to get rid of that pass, which had lots of code duplicated with AtomicExpandPass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217928 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 9fe79b4 commit 5c16c4e

12 files changed

+215
-342
lines changed

include/llvm/Target/TargetLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,10 @@ class TargetLoweringBase {
936936
/// \name Helpers for atomic expansion.
937937
/// @{
938938

939+
/// True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional
940+
/// and expand AtomicCmpXchgInst.
941+
virtual bool hasLoadLinkedStoreConditional() const { return false; }
942+
939943
/// Perform a load-linked operation on Addr, returning a "Value *" with the
940944
/// corresponding pointee type. This may entail some non-trivial operations to
941945
/// truncate or reconstruct types that will be illegal in the backend. See

lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 133 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
//===----------------------------------------------------------------------===//
99
//
1010
// This file contains a pass (at IR level) to replace atomic instructions with
11-
// appropriate (intrinsic-based) ldrex/strex loops.
11+
// either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg.
1212
//
1313
//===----------------------------------------------------------------------===//
1414

@@ -44,6 +44,8 @@ namespace {
4444
bool expandAtomicLoad(LoadInst *LI);
4545
bool expandAtomicStore(StoreInst *SI);
4646
bool expandAtomicRMW(AtomicRMWInst *AI);
47+
bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
48+
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
4749
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
4850
};
4951
}
@@ -88,7 +90,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
8890
MadeChange |= expandAtomicStore(SI);
8991
} else if (RMWI && TargetLowering->shouldExpandAtomicRMWInIR(RMWI)) {
9092
MadeChange |= expandAtomicRMW(RMWI);
91-
} else if (CASI) {
93+
} else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
9294
MadeChange |= expandAtomicCmpXchg(CASI);
9395
}
9496
}
@@ -127,9 +129,12 @@ bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
127129
}
128130

129131
bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
130-
// The only atomic 64-bit store on ARM is an strexd that succeeds, which means
131-
// we need a loop and the entire instruction is essentially an "atomicrmw
132-
// xchg" that ignores the value loaded.
132+
// This function is only called on atomic stores that are too large to be
133+
// atomic if implemented as a native store. So we replace them by an
134+
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
135+
// or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
136+
// It is the responsibility of the target to only return true in
137+
// shouldExpandAtomicRMW in cases where this is required and possible.
133138
IRBuilder<> Builder(SI);
134139
AtomicRMWInst *AI =
135140
Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
@@ -141,8 +146,54 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
141146
}
142147

143148
bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
149+
if (TM->getSubtargetImpl()
150+
->getTargetLowering()
151+
->hasLoadLinkedStoreConditional())
152+
return expandAtomicRMWToLLSC(AI);
153+
else
154+
return expandAtomicRMWToCmpXchg(AI);
155+
}
156+
157+
/// Emit IR to implement the given atomicrmw operation on values in registers,
158+
/// returning the new value.
159+
static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
160+
Value *Loaded, Value *Inc) {
161+
Value *NewVal;
162+
switch (Op) {
163+
case AtomicRMWInst::Xchg:
164+
return Inc;
165+
case AtomicRMWInst::Add:
166+
return Builder.CreateAdd(Loaded, Inc, "new");
167+
case AtomicRMWInst::Sub:
168+
return Builder.CreateSub(Loaded, Inc, "new");
169+
case AtomicRMWInst::And:
170+
return Builder.CreateAnd(Loaded, Inc, "new");
171+
case AtomicRMWInst::Nand:
172+
return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
173+
case AtomicRMWInst::Or:
174+
return Builder.CreateOr(Loaded, Inc, "new");
175+
case AtomicRMWInst::Xor:
176+
return Builder.CreateXor(Loaded, Inc, "new");
177+
case AtomicRMWInst::Max:
178+
NewVal = Builder.CreateICmpSGT(Loaded, Inc);
179+
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
180+
case AtomicRMWInst::Min:
181+
NewVal = Builder.CreateICmpSLE(Loaded, Inc);
182+
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
183+
case AtomicRMWInst::UMax:
184+
NewVal = Builder.CreateICmpUGT(Loaded, Inc);
185+
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
186+
case AtomicRMWInst::UMin:
187+
NewVal = Builder.CreateICmpULE(Loaded, Inc);
188+
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
189+
default:
190+
llvm_unreachable("Unknown atomic op");
191+
}
192+
}
193+
194+
bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
144195
auto TLI = TM->getSubtargetImpl()->getTargetLowering();
145-
AtomicOrdering Order = AI->getOrdering();
196+
AtomicOrdering FenceOrder = AI->getOrdering();
146197
Value *Addr = AI->getPointerOperand();
147198
BasicBlock *BB = AI->getParent();
148199
Function *F = BB->getParent();
@@ -152,7 +203,7 @@ bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
152203
// of everything. Otherwise, emitLeading/TrailingFence are no-op and we
153204
// should preserve the ordering.
154205
AtomicOrdering MemOpOrder =
155-
TLI->getInsertFencesForAtomic() ? Monotonic : Order;
206+
TLI->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
156207

157208
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
158209
//
@@ -179,56 +230,15 @@ bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
179230
// the branch entirely.
180231
std::prev(BB->end())->eraseFromParent();
181232
Builder.SetInsertPoint(BB);
182-
TLI->emitLeadingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
233+
TLI->emitLeadingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
183234
Builder.CreateBr(LoopBB);
184235

185236
// Start the main loop block now that we've taken care of the preliminaries.
186237
Builder.SetInsertPoint(LoopBB);
187238
Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
188239

189-
Value *NewVal;
190-
switch (AI->getOperation()) {
191-
case AtomicRMWInst::Xchg:
192-
NewVal = AI->getValOperand();
193-
break;
194-
case AtomicRMWInst::Add:
195-
NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
196-
break;
197-
case AtomicRMWInst::Sub:
198-
NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
199-
break;
200-
case AtomicRMWInst::And:
201-
NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
202-
break;
203-
case AtomicRMWInst::Nand:
204-
NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
205-
"new");
206-
break;
207-
case AtomicRMWInst::Or:
208-
NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
209-
break;
210-
case AtomicRMWInst::Xor:
211-
NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
212-
break;
213-
case AtomicRMWInst::Max:
214-
NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
215-
NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
216-
break;
217-
case AtomicRMWInst::Min:
218-
NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
219-
NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
220-
break;
221-
case AtomicRMWInst::UMax:
222-
NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
223-
NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
224-
break;
225-
case AtomicRMWInst::UMin:
226-
NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
227-
NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
228-
break;
229-
default:
230-
llvm_unreachable("Unknown atomic op");
231-
}
240+
Value *NewVal =
241+
performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
232242

233243
Value *StoreSuccess =
234244
TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
@@ -237,14 +247,85 @@ bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
237247
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
238248

239249
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
240-
TLI->emitTrailingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
250+
TLI->emitTrailingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
241251

242252
AI->replaceAllUsesWith(Loaded);
243253
AI->eraseFromParent();
244254

245255
return true;
246256
}
247257

258+
bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
259+
auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
260+
AtomicOrdering FenceOrder =
261+
AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
262+
AtomicOrdering MemOpOrder =
263+
TargetLowering->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
264+
Value *Addr = AI->getPointerOperand();
265+
BasicBlock *BB = AI->getParent();
266+
Function *F = BB->getParent();
267+
LLVMContext &Ctx = F->getContext();
268+
269+
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
270+
//
271+
// The standard expansion we produce is:
272+
// [...]
273+
// %init_loaded = load atomic iN* %addr
274+
// br label %loop
275+
// loop:
276+
// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
277+
// %new = some_op iN %loaded, %incr
278+
// %pair = cmpxchg iN* %addr, iN %loaded, iN %new
279+
// %new_loaded = extractvalue { iN, i1 } %pair, 0
280+
// %success = extractvalue { iN, i1 } %pair, 1
281+
// br i1 %success, label %atomicrmw.end, label %loop
282+
// atomicrmw.end:
283+
// [...]
284+
BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
285+
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
286+
287+
// This grabs the DebugLoc from AI.
288+
IRBuilder<> Builder(AI);
289+
290+
// The split call above "helpfully" added a branch at the end of BB (to the
291+
// wrong place), but we want a load. It's easiest to just remove
292+
// the branch entirely.
293+
std::prev(BB->end())->eraseFromParent();
294+
Builder.SetInsertPoint(BB);
295+
TargetLowering->emitLeadingFence(Builder, FenceOrder,
296+
/*IsStore=*/true, /*IsLoad=*/true);
297+
LoadInst *InitLoaded = Builder.CreateLoad(Addr);
298+
// Atomics require at least natural alignment.
299+
InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
300+
Builder.CreateBr(LoopBB);
301+
302+
// Start the main loop block now that we've taken care of the preliminaries.
303+
Builder.SetInsertPoint(LoopBB);
304+
PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
305+
Loaded->addIncoming(InitLoaded, BB);
306+
307+
Value *NewVal =
308+
performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
309+
310+
Value *Pair = Builder.CreateAtomicCmpXchg(
311+
Addr, Loaded, NewVal, MemOpOrder,
312+
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
313+
Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
314+
Loaded->addIncoming(NewLoaded, LoopBB);
315+
316+
Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
317+
Builder.CreateCondBr(Success, ExitBB, LoopBB);
318+
319+
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
320+
TargetLowering->emitTrailingFence(Builder, FenceOrder,
321+
/*IsStore=*/true, /*IsLoad=*/true);
322+
323+
AI->replaceAllUsesWith(NewLoaded);
324+
AI->eraseFromParent();
325+
326+
return true;
327+
}
328+
248329
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
249330
auto TLI = TM->getSubtargetImpl()->getTargetLowering();
250331
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();

lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8571,6 +8571,10 @@ bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
85718571
return Size <= 128;
85728572
}
85738573

8574+
bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
8575+
return true;
8576+
}
8577+
85748578
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
85758579
AtomicOrdering Ord) const {
85768580
Module *M = Builder.GetInsertBlock()->getParent()->getParent();

lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ class AArch64TargetLowering : public TargetLowering {
324324
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
325325
Type *Ty) const override;
326326

327+
bool hasLoadLinkedStoreConditional() const override;
327328
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
328329
AtomicOrdering Ord) const override;
329330
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,

lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10982,6 +10982,8 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1098210982
return true;
1098310983
}
1098410984

10985+
bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
10986+
1098510987
static void makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) {
1098610988
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
1098710989
Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);

lib/Target/ARM/ARMISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,7 @@ namespace llvm {
392392
bool functionArgumentNeedsConsecutiveRegisters(
393393
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
394394

395+
bool hasLoadLinkedStoreConditional() const override;
395396
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
396397
AtomicOrdering Ord) const override;
397398
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,

lib/Target/X86/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ add_public_tablegen_target(X86CommonTableGen)
1414

1515
set(sources
1616
X86AsmPrinter.cpp
17-
X86AtomicExpandPass.cpp
1817
X86FastISel.cpp
1918
X86FloatingPoint.cpp
2019
X86FrameLowering.cpp

lib/Target/X86/X86.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@ class FunctionPass;
2323
class ImmutablePass;
2424
class X86TargetMachine;
2525

26-
/// createX86AtomicExpandPass - This pass expands atomic operations that cannot
27-
/// be handled natively in terms of a loop using cmpxchg.
28-
FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM);
29-
3026
/// createX86ISelDag - This pass converts a legalized DAG into a
3127
/// X86-specific DAG, ready for instruction scheduling.
3228
///

0 commit comments

Comments
 (0)