@@ -27067,9 +27067,6 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
27067
27067
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
27068
27068
assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");
27069
27069
27070
- if (AI->isFloatingPointOperation())
27071
- return AtomicExpansionKind::CmpXChg;
27072
-
27073
27070
bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
27074
27071
(AI->getOperation() == AtomicRMWInst::Xchg ||
27075
27072
AI->getOperation() == AtomicRMWInst::Or ||
@@ -27079,7 +27076,8 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
27079
27076
27080
27077
// Nand is not supported in LSE.
27081
27078
// Leave 128 bits to LLSC or CmpXChg.
27082
- if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
27079
+ if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
27080
+ !AI->isFloatingPointOperation()) {
27083
27081
if (Subtarget->hasLSE())
27084
27082
return AtomicExpansionKind::None;
27085
27083
if (Subtarget->outlineAtomics()) {
@@ -27152,10 +27150,14 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
27152
27150
27153
27151
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
27154
27152
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
27155
- Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
27156
- Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
27157
- return Builder.CreateOr(
27158
- Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
27153
+
27154
+ auto *Int128Ty = Type::getInt128Ty(Builder.getContext());
27155
+ Lo = Builder.CreateZExt(Lo, Int128Ty, "lo64");
27156
+ Hi = Builder.CreateZExt(Hi, Int128Ty, "hi64");
27157
+
27158
+ Value *Or = Builder.CreateOr(
27159
+ Lo, Builder.CreateShl(Hi, ConstantInt::get(Int128Ty, 64)), "val64");
27160
+ return Builder.CreateBitCast(Or, ValueTy);
27159
27161
}
27160
27162
27161
27163
Type *Tys[] = { Addr->getType() };
@@ -27166,8 +27168,8 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
27166
27168
const DataLayout &DL = M->getDataLayout();
27167
27169
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
27168
27170
CallInst *CI = Builder.CreateCall(Ldxr, Addr);
27169
- CI->addParamAttr(
27170
- 0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy ));
27171
+ CI->addParamAttr(0, Attribute::get(Builder.getContext(),
27172
+ Attribute::ElementType, IntEltTy ));
27171
27173
Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
27172
27174
27173
27175
return Builder.CreateBitCast(Trunc, ValueTy);
@@ -27193,9 +27195,13 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
27193
27195
IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
27194
27196
Function *Stxr = Intrinsic::getDeclaration(M, Int);
27195
27197
Type *Int64Ty = Type::getInt64Ty(M->getContext());
27198
+ Type *Int128Ty = Type::getInt128Ty(M->getContext());
27199
+
27200
+ Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);
27196
27201
27197
- Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
27198
- Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
27202
+ Value *Lo = Builder.CreateTrunc(CastVal, Int64Ty, "lo");
27203
+ Value *Hi =
27204
+ Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty, "hi");
27199
27205
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
27200
27206
}
27201
27207
0 commit comments