-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[ClangFE] Improve handling of casting of atomic memory operations. #86691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -197,11 +197,11 @@ namespace { | |
llvm::Value *getScalarRValValueOrNull(RValue RVal) const; | ||
|
||
/// Converts an rvalue to integer value if needed. | ||
llvm::Value *convertRValueToInt(RValue RVal, bool CastFP = true) const; | ||
llvm::Value *convertRValueToInt(RValue RVal, bool CmpXchg = false) const; | ||
|
||
RValue ConvertToValueOrAtomic(llvm::Value *IntVal, AggValueSlot ResultSlot, | ||
SourceLocation Loc, bool AsValue, | ||
bool CastFP = true) const; | ||
bool CmpXchg = false) const; | ||
|
||
/// Copy an atomic r-value into atomic-layout memory. | ||
void emitCopyIntoMemory(RValue rvalue) const; | ||
|
@@ -264,7 +264,7 @@ namespace { | |
llvm::AtomicOrdering AO, bool IsVolatile); | ||
/// Emits atomic load as LLVM instruction. | ||
llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile, | ||
bool CastFP = true); | ||
bool CmpXchg = false); | ||
/// Emits atomic compare-and-exchange op as a libcall. | ||
llvm::Value *EmitAtomicCompareExchangeLibcall( | ||
llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr, | ||
|
@@ -1401,13 +1401,26 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr, | |
LVal.getBaseInfo(), TBAAAccessInfo())); | ||
} | ||
|
||
/// Return true if \param ValTy is a type that should be casted to integer | ||
/// around the atomic memory operation. If \param CmpXchg is true, then the | ||
/// cast of a floating point type is made as that instruction can not have | ||
/// floating point operands. TODO: Allow compare-and-exchange and FP - see | ||
/// comment in AtomicExpandPass.cpp. | ||
static bool shouldCastToInt(llvm::Type *ValTy, bool CmpXchg) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand the CmpXchg parameter. Also needs a docstring There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought it was a bit more clear than 'CastFP', as the reason for it now is the compare-and-exchange instruction only. |
||
if (ValTy->isFloatingPointTy()) | ||
return ValTy->isX86_FP80Ty() || CmpXchg; | ||
return !ValTy->isIntegerTy() && !ValTy->isPointerTy(); | ||
} | ||
|
||
RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val, | ||
AggValueSlot ResultSlot, | ||
SourceLocation Loc, bool AsValue, | ||
bool CastFP) const { | ||
bool CmpXchg) const { | ||
// Try not to in some easy cases. | ||
assert((Val->getType()->isIntegerTy() || Val->getType()->isIEEELikeFPTy()) && | ||
"Expected integer or floating point value"); | ||
assert((Val->getType()->isIntegerTy() || Val->getType()->isPointerTy() || | ||
Val->getType()->isIEEELikeFPTy()) && | ||
"Expected integer, pointer or floating point value when converting " | ||
"result."); | ||
if (getEvaluationKind() == TEK_Scalar && | ||
(((!LVal.isBitField() || | ||
LVal.getBitFieldInfo().Size == ValueSizeInBits) && | ||
|
@@ -1416,13 +1429,12 @@ RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val, | |
auto *ValTy = AsValue | ||
? CGF.ConvertTypeForMem(ValueTy) | ||
: getAtomicAddress().getElementType(); | ||
if (ValTy->isIntegerTy() || (!CastFP && ValTy->isIEEELikeFPTy())) { | ||
if (!shouldCastToInt(ValTy, CmpXchg)) { | ||
assert((!ValTy->isIntegerTy() || Val->getType() == ValTy) && | ||
"Different integer types."); | ||
return RValue::get(CGF.EmitFromMemory(Val, ValueTy)); | ||
} else if (ValTy->isPointerTy()) | ||
return RValue::get(CGF.Builder.CreateIntToPtr(Val, ValTy)); | ||
else if (llvm::CastInst::isBitCastable(Val->getType(), ValTy)) | ||
} | ||
if (llvm::CastInst::isBitCastable(Val->getType(), ValTy)) | ||
return RValue::get(CGF.Builder.CreateBitCast(Val, ValTy)); | ||
} | ||
|
||
|
@@ -1459,10 +1471,10 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded, | |
} | ||
|
||
llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO, | ||
bool IsVolatile, bool CastFP) { | ||
bool IsVolatile, bool CmpXchg) { | ||
// Okay, we're doing this natively. | ||
Address Addr = getAtomicAddress(); | ||
if (!(Addr.getElementType()->isIEEELikeFPTy() && !CastFP)) | ||
if (shouldCastToInt(Addr.getElementType(), CmpXchg)) | ||
Addr = castToAtomicIntPointer(Addr); | ||
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load"); | ||
Load->setAtomic(AO); | ||
|
@@ -1523,16 +1535,15 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc, | |
} | ||
|
||
// Okay, we're doing this natively. | ||
auto *Load = EmitAtomicLoadOp(AO, IsVolatile, /*CastFP=*/false); | ||
auto *Load = EmitAtomicLoadOp(AO, IsVolatile); | ||
|
||
// If we're ignoring an aggregate return, don't do anything. | ||
if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored()) | ||
return RValue::getAggregate(Address::invalid(), false); | ||
|
||
// Okay, turn that back into the original value or atomic (for non-simple | ||
// lvalues) type. | ||
return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue, | ||
/*CastFP=*/false); | ||
return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue); | ||
} | ||
|
||
/// Emit a load from an l-value of atomic type. Note that the r-value | ||
|
@@ -1601,20 +1612,17 @@ llvm::Value *AtomicInfo::getScalarRValValueOrNull(RValue RVal) const { | |
return nullptr; | ||
} | ||
|
||
llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CastFP) const { | ||
llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CmpXchg) const { | ||
// If we've got a scalar value of the right size, try to avoid going | ||
// through memory. Floats get casted if needed by AtomicExpandPass. | ||
if (llvm::Value *Value = getScalarRValValueOrNull(RVal)) { | ||
if (isa<llvm::IntegerType>(Value->getType()) || | ||
(!CastFP && Value->getType()->isIEEELikeFPTy())) | ||
if (!shouldCastToInt(Value->getType(), CmpXchg)) | ||
return CGF.EmitToMemory(Value, ValueTy); | ||
else { | ||
llvm::IntegerType *InputIntTy = llvm::IntegerType::get( | ||
CGF.getLLVMContext(), | ||
LVal.isSimple() ? getValueSizeInBits() : getAtomicSizeInBits()); | ||
if (isa<llvm::PointerType>(Value->getType())) | ||
return CGF.Builder.CreatePtrToInt(Value, InputIntTy); | ||
else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy)) | ||
if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy)) | ||
return CGF.Builder.CreateBitCast(Value, InputIntTy); | ||
} | ||
} | ||
|
@@ -1687,13 +1695,14 @@ std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange( | |
|
||
// If we've got a scalar value of the right size, try to avoid going | ||
// through memory. | ||
auto *ExpectedVal = convertRValueToInt(Expected); | ||
auto *DesiredVal = convertRValueToInt(Desired); | ||
auto *ExpectedVal = convertRValueToInt(Expected, /*CmpXchg=*/true); | ||
auto *DesiredVal = convertRValueToInt(Desired, /*CmpXchg=*/true); | ||
auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success, | ||
Failure, IsWeak); | ||
return std::make_pair( | ||
ConvertToValueOrAtomic(Res.first, AggValueSlot::ignored(), | ||
SourceLocation(), /*AsValue=*/false), | ||
SourceLocation(), /*AsValue=*/false, | ||
/*CmpXchg=*/true), | ||
Res.second); | ||
} | ||
|
||
|
@@ -1787,7 +1796,7 @@ void AtomicInfo::EmitAtomicUpdateOp( | |
auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); | ||
|
||
// Do the atomic load. | ||
auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile); | ||
auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile, /*CmpXchg=*/true); | ||
// For non-simple lvalues perform compare-and-swap procedure. | ||
auto *ContBB = CGF.createBasicBlock("atomic_cont"); | ||
auto *ExitBB = CGF.createBasicBlock("atomic_exit"); | ||
|
@@ -1803,7 +1812,8 @@ void AtomicInfo::EmitAtomicUpdateOp( | |
CGF.Builder.CreateStore(PHI, NewAtomicIntAddr); | ||
} | ||
auto OldRVal = ConvertToValueOrAtomic(PHI, AggValueSlot::ignored(), | ||
SourceLocation(), /*AsValue=*/false); | ||
SourceLocation(), /*AsValue=*/false, | ||
/*CmpXchg=*/true); | ||
EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr); | ||
auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr); | ||
// Try to write new value using cmpxchg operation. | ||
|
@@ -1869,7 +1879,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal, | |
auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); | ||
|
||
// Do the atomic load. | ||
auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile); | ||
auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile, /*CmpXchg=*/true); | ||
// For non-simple lvalues perform compare-and-swap procedure. | ||
auto *ContBB = CGF.createBasicBlock("atomic_cont"); | ||
auto *ExitBB = CGF.createBasicBlock("atomic_exit"); | ||
|
@@ -1969,21 +1979,16 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, | |
} | ||
|
||
// Okay, we're doing this natively. | ||
llvm::Value *ValToStore = | ||
atomics.convertRValueToInt(rvalue, /*CastFP=*/false); | ||
llvm::Value *ValToStore = atomics.convertRValueToInt(rvalue); | ||
|
||
// Do the atomic store. | ||
Address Addr = atomics.getAtomicAddress(); | ||
bool ShouldCastToInt = true; | ||
if (llvm::Value *Value = atomics.getScalarRValValueOrNull(rvalue)) | ||
if (isa<llvm::IntegerType>(Value->getType()) || | ||
Value->getType()->isIEEELikeFPTy()) | ||
ShouldCastToInt = false; | ||
if (ShouldCastToInt) { | ||
Addr = atomics.castToAtomicIntPointer(Addr); | ||
ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(), | ||
/*isSigned=*/false); | ||
} | ||
if (shouldCastToInt(Value->getType(), /*CmpXchg=*/false)) { | ||
Addr = atomics.castToAtomicIntPointer(Addr); | ||
ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(), | ||
/*isSigned=*/false); | ||
} | ||
llvm::StoreInst *store = Builder.CreateStore(ValToStore, Addr); | ||
|
||
if (AO == llvm::AtomicOrdering::Acquire) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,14 @@ | ||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686-apple-darwin9 | FileCheck %s | ||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686-apple-darwin9 | FileCheck %s --check-prefixes=CHECK,X86 | ||
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=s390x-linux-gnu | FileCheck %s --check-prefixes=CHECK,SYSTEMZ | ||
|
||
// CHECK: @[[NONSTATIC_GLOB_POINTER_FROM_INT:.+]] = global ptr null | ||
// CHECK: @[[GLOB_POINTER:.+]] = internal global ptr null | ||
// CHECK: @[[GLOB_POINTER_FROM_INT:.+]] = internal global ptr null | ||
// CHECK: @[[GLOB_INT:.+]] = internal global i32 0 | ||
// CHECK: @[[GLOB_FLT:.+]] = internal global float {{[0e\+-\.]+}}, align | ||
// CHECK: @[[GLOB_DBL:.+]] = internal global double {{[0e\+-\.]+}}, align | ||
// X86: @[[GLOB_LONGDBL:.+]] = internal global x86_fp80 {{[0xK]+}}, align | ||
// SYSTEMZ: @[[GLOB_LONGDBL:.+]] = internal global fp128 {{[0xL]+}}, align | ||
|
||
int atomic(void) { | ||
// non-sensical test for sync functions | ||
|
@@ -79,8 +83,10 @@ int atomic(void) { | |
// CHECK: atomicrmw nand ptr %valc, i8 6 seq_cst, align 1 | ||
|
||
__sync_val_compare_and_swap((void **)0, (void *)0, (void *)0); | ||
// CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4 | ||
// CHECK: extractvalue { i32, i1 } [[PAIR]], 0 | ||
// X86: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4 | ||
// X86-NEXT: extractvalue { i32, i1 } [[PAIR]], 0 | ||
// SYSTEMZ: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i64 0, i64 0 seq_cst seq_cst, align 8 | ||
// SYSTEMZ-NEXT: extractvalue { i64, i1 } [[PAIR]], 0 | ||
|
||
if ( __sync_val_compare_and_swap(&valb, 0, 1)) { | ||
// CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr %valb, i8 0, i8 1 seq_cst seq_cst, align 1 | ||
|
@@ -90,13 +96,15 @@ int atomic(void) { | |
} | ||
|
||
__sync_bool_compare_and_swap((void **)0, (void *)0, (void *)0); | ||
// CHECK: cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4 | ||
// X86: cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4 | ||
// SYSTEMZ: cmpxchg ptr null, i64 0, i64 0 seq_cst seq_cst, align 8 | ||
|
||
__sync_lock_release(&val); | ||
// CHECK: store atomic i32 0, {{.*}} release, align 4 | ||
|
||
__sync_lock_release(&ptrval); | ||
// CHECK: store atomic i32 0, {{.*}} release, align 4 | ||
// X86: store atomic i32 0, {{.*}} release, align 4 | ||
// SYSTEMZ: store atomic i64 0, {{.*}} release, align 8 | ||
|
||
__sync_synchronize (); | ||
// CHECK: fence seq_cst | ||
|
@@ -131,19 +139,25 @@ static _Atomic(int *) glob_pointer_from_int = 0; | |
_Atomic(int *) nonstatic_glob_pointer_from_int = 0LL; | ||
static _Atomic int glob_int = 0; | ||
static _Atomic float glob_flt = 0.0f; | ||
static _Atomic double glob_dbl = 0.0f; | ||
static _Atomic long double glob_longdbl = 0.0f; | ||
|
||
void force_global_uses(void) { | ||
// X86: %atomic-temp = alloca x86_fp80, align 16 | ||
(void)glob_pointer; | ||
// CHECK: %[[LOCAL_INT:.+]] = load atomic i32, ptr @[[GLOB_POINTER]] seq_cst | ||
// CHECK-NEXT: inttoptr i32 %[[LOCAL_INT]] to ptr | ||
// CHECK: load atomic ptr, ptr @[[GLOB_POINTER]] seq_cst | ||
(void)glob_pointer_from_int; | ||
// CHECK: %[[LOCAL_INT_2:.+]] = load atomic i32, ptr @[[GLOB_POINTER_FROM_INT]] seq_cst | ||
// CHECK-NEXT: inttoptr i32 %[[LOCAL_INT_2]] to ptr | ||
// CHECK-NEXT: load atomic ptr, ptr @[[GLOB_POINTER_FROM_INT]] seq_cst | ||
(void)nonstatic_glob_pointer_from_int; | ||
// CHECK: %[[LOCAL_INT_3:.+]] = load atomic i32, ptr @[[NONSTATIC_GLOB_POINTER_FROM_INT]] seq_cst | ||
// CHECK-NEXT: inttoptr i32 %[[LOCAL_INT_3]] to ptr | ||
// CHECK-NEXT: load atomic ptr, ptr @[[NONSTATIC_GLOB_POINTER_FROM_INT]] seq_cst | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing tests for the all the FP cases There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added tests for double and long double, and also a run for systemz as the long double type is fp80 on x86, evidently. |
||
(void)glob_int; | ||
// CHECK: load atomic i32, ptr @[[GLOB_INT]] seq_cst | ||
// CHECK-NEXT: load atomic i32, ptr @[[GLOB_INT]] seq_cst | ||
(void)glob_flt; | ||
// CHECK: load atomic float, ptr @[[GLOB_FLT]] seq_cst | ||
// CHECK-NEXT: load atomic float, ptr @[[GLOB_FLT]] seq_cst | ||
(void)glob_dbl; | ||
// CHECK-NEXT: load atomic double, ptr @[[GLOB_DBL]] seq_cst | ||
(void)glob_longdbl; | ||
// X86: call void @__atomic_load(i32 noundef 16, ptr noundef @glob_longdbl, ptr noundef %atomic-temp | ||
// X86-NEXT: %0 = load x86_fp80, ptr %atomic-temp, align 16 | ||
// SYSTEMZ: load atomic fp128, ptr @[[GLOB_LONGDBL]] seq_cst | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The answer should just be false. I see no reason to maintain any of this logic in clang
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a bit messy...
For one thing, there can't currently be an atomic CmpXchg of float type - per the spec - so that must be casted to int. That would be a later patch on its own, including as well changes in the spec document and AtomicExpand.
An atomic struct needs to be casted to integer in EmitAtomicLoadOp().
In ConvertToValueOrAtomic() and convertRValueToInt(), scalars are treated specially. But still FP80 and float/CmpXchg scalars currrently need casting.
FP80 also seems like a separate patch if there's a need. I guess maybe in that case it does make sense to cast it to i128 here, as there is only one target using it, and it's a special non-power-of-2 case..?
So it still looks to me that there are these two changes that would first need handling, and then this could probably be improved further.