8
8
// ===----------------------------------------------------------------------===//
9
9
//
10
10
// This file contains a pass (at IR level) to replace atomic instructions with
11
- // appropriate (intrinsic-based) ldrex/strex loops.
11
+ // either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg .
12
12
//
13
13
// ===----------------------------------------------------------------------===//
14
14
@@ -44,6 +44,8 @@ namespace {
44
44
bool expandAtomicLoad (LoadInst *LI);
45
45
bool expandAtomicStore (StoreInst *SI);
46
46
bool expandAtomicRMW (AtomicRMWInst *AI);
47
+ bool expandAtomicRMWToLLSC (AtomicRMWInst *AI);
48
+ bool expandAtomicRMWToCmpXchg (AtomicRMWInst *AI);
47
49
bool expandAtomicCmpXchg (AtomicCmpXchgInst *CI);
48
50
};
49
51
}
@@ -88,7 +90,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
88
90
MadeChange |= expandAtomicStore (SI);
89
91
} else if (RMWI && TargetLowering->shouldExpandAtomicRMWInIR (RMWI)) {
90
92
MadeChange |= expandAtomicRMW (RMWI);
91
- } else if (CASI) {
93
+ } else if (CASI && TargetLowering-> hasLoadLinkedStoreConditional () ) {
92
94
MadeChange |= expandAtomicCmpXchg (CASI);
93
95
}
94
96
}
@@ -127,9 +129,12 @@ bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
127
129
}
128
130
129
131
bool AtomicExpand::expandAtomicStore (StoreInst *SI) {
130
- // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
131
- // we need a loop and the entire instruction is essentially an "atomicrmw
132
- // xchg" that ignores the value loaded.
132
+ // This function is only called on atomic stores that are too large to be
133
+ // atomic if implemented as a native store. So we replace them by an
134
+ // atomic swap, that can be implemented for example as a ldrex/strex on ARM
135
+ // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
136
+ // It is the responsibility of the target to only return true in
137
+ // shouldExpandAtomicRMW in cases where this is required and possible.
133
138
IRBuilder<> Builder (SI);
134
139
AtomicRMWInst *AI =
135
140
Builder.CreateAtomicRMW (AtomicRMWInst::Xchg, SI->getPointerOperand (),
@@ -141,8 +146,54 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
141
146
}
142
147
143
148
bool AtomicExpand::expandAtomicRMW (AtomicRMWInst *AI) {
149
+ if (TM->getSubtargetImpl ()
150
+ ->getTargetLowering ()
151
+ ->hasLoadLinkedStoreConditional ())
152
+ return expandAtomicRMWToLLSC (AI);
153
+ else
154
+ return expandAtomicRMWToCmpXchg (AI);
155
+ }
156
+
157
+ // / Emit IR to implement the given atomicrmw operation on values in registers,
158
+ // / returning the new value.
159
+ static Value *performAtomicOp (AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
160
+ Value *Loaded, Value *Inc) {
161
+ Value *NewVal;
162
+ switch (Op) {
163
+ case AtomicRMWInst::Xchg:
164
+ return Inc;
165
+ case AtomicRMWInst::Add:
166
+ return Builder.CreateAdd (Loaded, Inc, " new" );
167
+ case AtomicRMWInst::Sub:
168
+ return Builder.CreateSub (Loaded, Inc, " new" );
169
+ case AtomicRMWInst::And:
170
+ return Builder.CreateAnd (Loaded, Inc, " new" );
171
+ case AtomicRMWInst::Nand:
172
+ return Builder.CreateNot (Builder.CreateAnd (Loaded, Inc), " new" );
173
+ case AtomicRMWInst::Or:
174
+ return Builder.CreateOr (Loaded, Inc, " new" );
175
+ case AtomicRMWInst::Xor:
176
+ return Builder.CreateXor (Loaded, Inc, " new" );
177
+ case AtomicRMWInst::Max:
178
+ NewVal = Builder.CreateICmpSGT (Loaded, Inc);
179
+ return Builder.CreateSelect (NewVal, Loaded, Inc, " new" );
180
+ case AtomicRMWInst::Min:
181
+ NewVal = Builder.CreateICmpSLE (Loaded, Inc);
182
+ return Builder.CreateSelect (NewVal, Loaded, Inc, " new" );
183
+ case AtomicRMWInst::UMax:
184
+ NewVal = Builder.CreateICmpUGT (Loaded, Inc);
185
+ return Builder.CreateSelect (NewVal, Loaded, Inc, " new" );
186
+ case AtomicRMWInst::UMin:
187
+ NewVal = Builder.CreateICmpULE (Loaded, Inc);
188
+ return Builder.CreateSelect (NewVal, Loaded, Inc, " new" );
189
+ default :
190
+ llvm_unreachable (" Unknown atomic op" );
191
+ }
192
+ }
193
+
194
+ bool AtomicExpand::expandAtomicRMWToLLSC (AtomicRMWInst *AI) {
144
195
auto TLI = TM->getSubtargetImpl ()->getTargetLowering ();
145
- AtomicOrdering Order = AI->getOrdering ();
196
+ AtomicOrdering FenceOrder = AI->getOrdering ();
146
197
Value *Addr = AI->getPointerOperand ();
147
198
BasicBlock *BB = AI->getParent ();
148
199
Function *F = BB->getParent ();
@@ -152,7 +203,7 @@ bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
152
203
// of everything. Otherwise, emitLeading/TrailingFence are no-op and we
153
204
// should preserve the ordering.
154
205
AtomicOrdering MemOpOrder =
155
- TLI->getInsertFencesForAtomic () ? Monotonic : Order ;
206
+ TLI->getInsertFencesForAtomic () ? Monotonic : FenceOrder ;
156
207
157
208
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
158
209
//
@@ -179,56 +230,15 @@ bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
179
230
// the branch entirely.
180
231
std::prev (BB->end ())->eraseFromParent ();
181
232
Builder.SetInsertPoint (BB);
182
- TLI->emitLeadingFence (Builder, Order , /* IsStore=*/ true , /* IsLoad=*/ true );
233
+ TLI->emitLeadingFence (Builder, FenceOrder , /* IsStore=*/ true , /* IsLoad=*/ true );
183
234
Builder.CreateBr (LoopBB);
184
235
185
236
// Start the main loop block now that we've taken care of the preliminaries.
186
237
Builder.SetInsertPoint (LoopBB);
187
238
Value *Loaded = TLI->emitLoadLinked (Builder, Addr, MemOpOrder);
188
239
189
- Value *NewVal;
190
- switch (AI->getOperation ()) {
191
- case AtomicRMWInst::Xchg:
192
- NewVal = AI->getValOperand ();
193
- break ;
194
- case AtomicRMWInst::Add:
195
- NewVal = Builder.CreateAdd (Loaded, AI->getValOperand (), " new" );
196
- break ;
197
- case AtomicRMWInst::Sub:
198
- NewVal = Builder.CreateSub (Loaded, AI->getValOperand (), " new" );
199
- break ;
200
- case AtomicRMWInst::And:
201
- NewVal = Builder.CreateAnd (Loaded, AI->getValOperand (), " new" );
202
- break ;
203
- case AtomicRMWInst::Nand:
204
- NewVal = Builder.CreateNot (Builder.CreateAnd (Loaded, AI->getValOperand ()),
205
- " new" );
206
- break ;
207
- case AtomicRMWInst::Or:
208
- NewVal = Builder.CreateOr (Loaded, AI->getValOperand (), " new" );
209
- break ;
210
- case AtomicRMWInst::Xor:
211
- NewVal = Builder.CreateXor (Loaded, AI->getValOperand (), " new" );
212
- break ;
213
- case AtomicRMWInst::Max:
214
- NewVal = Builder.CreateICmpSGT (Loaded, AI->getValOperand ());
215
- NewVal = Builder.CreateSelect (NewVal, Loaded, AI->getValOperand (), " new" );
216
- break ;
217
- case AtomicRMWInst::Min:
218
- NewVal = Builder.CreateICmpSLE (Loaded, AI->getValOperand ());
219
- NewVal = Builder.CreateSelect (NewVal, Loaded, AI->getValOperand (), " new" );
220
- break ;
221
- case AtomicRMWInst::UMax:
222
- NewVal = Builder.CreateICmpUGT (Loaded, AI->getValOperand ());
223
- NewVal = Builder.CreateSelect (NewVal, Loaded, AI->getValOperand (), " new" );
224
- break ;
225
- case AtomicRMWInst::UMin:
226
- NewVal = Builder.CreateICmpULE (Loaded, AI->getValOperand ());
227
- NewVal = Builder.CreateSelect (NewVal, Loaded, AI->getValOperand (), " new" );
228
- break ;
229
- default :
230
- llvm_unreachable (" Unknown atomic op" );
231
- }
240
+ Value *NewVal =
241
+ performAtomicOp (AI->getOperation (), Builder, Loaded, AI->getValOperand ());
232
242
233
243
Value *StoreSuccess =
234
244
TLI->emitStoreConditional (Builder, NewVal, Addr, MemOpOrder);
@@ -237,14 +247,85 @@ bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
237
247
Builder.CreateCondBr (TryAgain, LoopBB, ExitBB);
238
248
239
249
Builder.SetInsertPoint (ExitBB, ExitBB->begin ());
240
- TLI->emitTrailingFence (Builder, Order , /* IsStore=*/ true , /* IsLoad=*/ true );
250
+ TLI->emitTrailingFence (Builder, FenceOrder , /* IsStore=*/ true , /* IsLoad=*/ true );
241
251
242
252
AI->replaceAllUsesWith (Loaded);
243
253
AI->eraseFromParent ();
244
254
245
255
return true ;
246
256
}
247
257
258
+ bool AtomicExpand::expandAtomicRMWToCmpXchg (AtomicRMWInst *AI) {
259
+ auto TargetLowering = TM->getSubtargetImpl ()->getTargetLowering ();
260
+ AtomicOrdering FenceOrder =
261
+ AI->getOrdering () == Unordered ? Monotonic : AI->getOrdering ();
262
+ AtomicOrdering MemOpOrder =
263
+ TargetLowering->getInsertFencesForAtomic () ? Monotonic : FenceOrder;
264
+ Value *Addr = AI->getPointerOperand ();
265
+ BasicBlock *BB = AI->getParent ();
266
+ Function *F = BB->getParent ();
267
+ LLVMContext &Ctx = F->getContext ();
268
+
269
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
270
+ //
271
+ // The standard expansion we produce is:
272
+ // [...]
273
+ // %init_loaded = load atomic iN* %addr
274
+ // br label %loop
275
+ // loop:
276
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
277
+ // %new = some_op iN %loaded, %incr
278
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
279
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
280
+ // %success = extractvalue { iN, i1 } %pair, 1
281
+ // br i1 %success, label %atomicrmw.end, label %loop
282
+ // atomicrmw.end:
283
+ // [...]
284
+ BasicBlock *ExitBB = BB->splitBasicBlock (AI, " atomicrmw.end" );
285
+ BasicBlock *LoopBB = BasicBlock::Create (Ctx, " atomicrmw.start" , F, ExitBB);
286
+
287
+ // This grabs the DebugLoc from AI.
288
+ IRBuilder<> Builder (AI);
289
+
290
+ // The split call above "helpfully" added a branch at the end of BB (to the
291
+ // wrong place), but we want a load. It's easiest to just remove
292
+ // the branch entirely.
293
+ std::prev (BB->end ())->eraseFromParent ();
294
+ Builder.SetInsertPoint (BB);
295
+ TargetLowering->emitLeadingFence (Builder, FenceOrder,
296
+ /* IsStore=*/ true , /* IsLoad=*/ true );
297
+ LoadInst *InitLoaded = Builder.CreateLoad (Addr);
298
+ // Atomics require at least natural alignment.
299
+ InitLoaded->setAlignment (AI->getType ()->getPrimitiveSizeInBits ());
300
+ Builder.CreateBr (LoopBB);
301
+
302
+ // Start the main loop block now that we've taken care of the preliminaries.
303
+ Builder.SetInsertPoint (LoopBB);
304
+ PHINode *Loaded = Builder.CreatePHI (AI->getType (), 2 , " loaded" );
305
+ Loaded->addIncoming (InitLoaded, BB);
306
+
307
+ Value *NewVal =
308
+ performAtomicOp (AI->getOperation (), Builder, Loaded, AI->getValOperand ());
309
+
310
+ Value *Pair = Builder.CreateAtomicCmpXchg (
311
+ Addr, Loaded, NewVal, MemOpOrder,
312
+ AtomicCmpXchgInst::getStrongestFailureOrdering (MemOpOrder));
313
+ Value *NewLoaded = Builder.CreateExtractValue (Pair, 0 , " newloaded" );
314
+ Loaded->addIncoming (NewLoaded, LoopBB);
315
+
316
+ Value *Success = Builder.CreateExtractValue (Pair, 1 , " success" );
317
+ Builder.CreateCondBr (Success, ExitBB, LoopBB);
318
+
319
+ Builder.SetInsertPoint (ExitBB, ExitBB->begin ());
320
+ TargetLowering->emitTrailingFence (Builder, FenceOrder,
321
+ /* IsStore=*/ true , /* IsLoad=*/ true );
322
+
323
+ AI->replaceAllUsesWith (NewLoaded);
324
+ AI->eraseFromParent ();
325
+
326
+ return true ;
327
+ }
328
+
248
329
bool AtomicExpand::expandAtomicCmpXchg (AtomicCmpXchgInst *CI) {
249
330
auto TLI = TM->getSubtargetImpl ()->getTargetLowering ();
250
331
AtomicOrdering SuccessOrder = CI->getSuccessOrdering ();
0 commit comments