Skip to content

Commit 26c3a84

Browse files
authored
AArch64: Use consistent atomicrmw expansion for FP operations (llvm#103702)
Use LLSC or cmpxchg in the same cases as for the unsupported integer operations. This required some fixups to the LLSC implementatation to deal with the fp128 case. The comment about floating-point exceptions was wrong, because floating-point exceptions are not really exceptions at all.
1 parent 5048fab commit 26c3a84

File tree

5 files changed

+376
-1055
lines changed

5 files changed

+376
-1055
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27096,21 +27096,37 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2709627096
: AtomicExpansionKind::LLSC;
2709727097
}
2709827098

27099+
// Return true if the atomic operation expansion will lower to use a library
27100+
// call, and is thus ineligible to use an LLSC expansion.
27101+
static bool rmwOpMayLowerToLibcall(const AArch64Subtarget &Subtarget,
27102+
const AtomicRMWInst *RMW) {
27103+
if (!RMW->isFloatingPointOperation())
27104+
return false;
27105+
switch (RMW->getType()->getScalarType()->getTypeID()) {
27106+
case Type::FloatTyID:
27107+
case Type::DoubleTyID:
27108+
case Type::HalfTyID:
27109+
case Type::BFloatTyID:
27110+
// Will use soft float
27111+
return !Subtarget.hasFPARMv8();
27112+
default:
27113+
// fp128 will emit library calls.
27114+
return true;
27115+
}
27116+
27117+
llvm_unreachable("covered type switch");
27118+
}
27119+
2709927120
// The "default" for integer RMW operations is to expand to an LL/SC loop.
2710027121
// However, with the LSE instructions (or outline-atomics mode, which provides
2710127122
// library routines in place of the LSE-instructions), we can directly emit many
2710227123
// operations instead.
27103-
//
27104-
// Floating-point operations are always emitted to a cmpxchg loop, because they
27105-
// may trigger a trap which aborts an LLSC sequence.
2710627124
TargetLowering::AtomicExpansionKind
2710727125
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
27108-
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
27126+
Type *Ty = AI->getType();
27127+
unsigned Size = Ty->getPrimitiveSizeInBits();
2710927128
assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");
2711027129

27111-
if (AI->isFloatingPointOperation())
27112-
return AtomicExpansionKind::CmpXChg;
27113-
2711427130
bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
2711527131
(AI->getOperation() == AtomicRMWInst::Xchg ||
2711627132
AI->getOperation() == AtomicRMWInst::Or ||
@@ -27120,7 +27136,8 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2712027136

2712127137
// Nand is not supported in LSE.
2712227138
// Leave 128 bits to LLSC or CmpXChg.
27123-
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
27139+
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
27140+
!AI->isFloatingPointOperation()) {
2712427141
if (Subtarget->hasLSE())
2712527142
return AtomicExpansionKind::None;
2712627143
if (Subtarget->outlineAtomics()) {
@@ -27146,7 +27163,7 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2714627163
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
2714727164
// we have a single CAS instruction that can replace the loop.
2714827165
if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None ||
27149-
Subtarget->hasLSE())
27166+
Subtarget->hasLSE() || rmwOpMayLowerToLibcall(*Subtarget, AI))
2715027167
return AtomicExpansionKind::CmpXChg;
2715127168

2715227169
return AtomicExpansionKind::LLSC;
@@ -27193,10 +27210,14 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
2719327210

2719427211
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
2719527212
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
27196-
Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
27197-
Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
27198-
return Builder.CreateOr(
27199-
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
27213+
27214+
auto *Int128Ty = Type::getInt128Ty(Builder.getContext());
27215+
Lo = Builder.CreateZExt(Lo, Int128Ty, "lo64");
27216+
Hi = Builder.CreateZExt(Hi, Int128Ty, "hi64");
27217+
27218+
Value *Or = Builder.CreateOr(
27219+
Lo, Builder.CreateShl(Hi, ConstantInt::get(Int128Ty, 64)), "val64");
27220+
return Builder.CreateBitCast(Or, ValueTy);
2720027221
}
2720127222

2720227223
Type *Tys[] = { Addr->getType() };
@@ -27207,8 +27228,8 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
2720727228
const DataLayout &DL = M->getDataLayout();
2720827229
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
2720927230
CallInst *CI = Builder.CreateCall(Ldxr, Addr);
27210-
CI->addParamAttr(
27211-
0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy));
27231+
CI->addParamAttr(0, Attribute::get(Builder.getContext(),
27232+
Attribute::ElementType, IntEltTy));
2721227233
Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
2721327234

2721427235
return Builder.CreateBitCast(Trunc, ValueTy);
@@ -27234,9 +27255,13 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
2723427255
IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
2723527256
Function *Stxr = Intrinsic::getDeclaration(M, Int);
2723627257
Type *Int64Ty = Type::getInt64Ty(M->getContext());
27258+
Type *Int128Ty = Type::getInt128Ty(M->getContext());
27259+
27260+
Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);
2723727261

27238-
Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
27239-
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
27262+
Value *Lo = Builder.CreateTrunc(CastVal, Int64Ty, "lo");
27263+
Value *Hi =
27264+
Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty, "hi");
2724027265
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
2724127266
}
2724227267

0 commit comments

Comments
 (0)