Skip to content

AMDGPU: Do not bitcast atomic load in IR #90060

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5575,6 +5575,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(NewAtomic.getValue(1));
break;
}
case ISD::ATOMIC_LOAD: {
AtomicSDNode *AM = cast<AtomicSDNode>(Node);
SDLoc SL(Node);
assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
"unexpected promotion type");
assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
"unexpected atomic_load with illegal type");

SDValue NewAtomic =
DAG.getAtomic(ISD::ATOMIC_LOAD, SL, NVT, DAG.getVTList(NVT, MVT::Other),
{AM->getChain(), AM->getBasePtr()}, AM->getMemOperand());
Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
Results.push_back(NewAtomic.getValue(1));
break;
}
case ISD::SPLAT_VECTOR: {
SDValue Scalar = Node->getOperand(0);
MVT ScalarType = Scalar.getSimpleValueType();
Expand Down
39 changes: 39 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2449,6 +2449,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
R = PromoteFloatRes_STRICT_FP_ROUND(N);
break;
case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
case ISD::ATOMIC_LOAD:
R = PromoteFloatRes_ATOMIC_LOAD(N);
break;
case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;
case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;

Expand Down Expand Up @@ -2695,6 +2698,25 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL);
}

SDValue DAGTypeLegalizer::PromoteFloatRes_ATOMIC_LOAD(SDNode *N) {
AtomicSDNode *AM = cast<AtomicSDNode>(N);
EVT VT = AM->getValueType(0);

// Load the value as an integer value with the same number of bits.
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
SDValue newL = DAG.getAtomic(
ISD::ATOMIC_LOAD, SDLoc(N), IVT, DAG.getVTList(IVT, MVT::Other),
{AM->getChain(), AM->getBasePtr()}, AM->getMemOperand());

// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), newL.getValue(1));

// Convert the integer value to the desired FP type
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
return DAG.getNode(GetPromotionOpcode(VT, IVT), SDLoc(N), NVT, newL);
}

// Construct a new SELECT node with the promoted true- and false- values.
SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) {
SDValue TrueVal = GetPromotedFloat(N->getOperand(1));
Expand Down Expand Up @@ -2855,6 +2877,9 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break;

case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
case ISD::ATOMIC_LOAD:
R = SoftPromoteHalfRes_ATOMIC_LOAD(N);
break;
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
case ISD::SINT_TO_FP:
Expand Down Expand Up @@ -3039,6 +3064,20 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) {
return NewL;
}

SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N) {
AtomicSDNode *AM = cast<AtomicSDNode>(N);

// Load the value as an integer value with the same number of bits.
SDValue NewL = DAG.getAtomic(
ISD::ATOMIC_LOAD, SDLoc(N), MVT::i16, DAG.getVTList(MVT::i16, MVT::Other),
{AM->getChain(), AM->getBasePtr()}, AM->getMemOperand());

// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
return NewL;
}

SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) {
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_LOAD(SDNode *N);
SDValue PromoteFloatRes_ATOMIC_LOAD(SDNode *N);
SDValue PromoteFloatRes_SELECT(SDNode *N);
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
SDValue PromoteFloatRes_UnaryOp(SDNode *N);
Expand Down Expand Up @@ -734,6 +735,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftPromoteHalfRes_FFREXP(SDNode *N);
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
SDValue SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N);
SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N);
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i128, Promote);
AddPromotedToType(ISD::LOAD, MVT::i128, MVT::v4i32);

// TODO: Would be better to consume as directly legal
setOperationAction(ISD::ATOMIC_LOAD, MVT::f32, Promote);
AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);

setOperationAction(ISD::ATOMIC_LOAD, MVT::f64, Promote);
AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);

setOperationAction(ISD::ATOMIC_LOAD, MVT::f16, Promote);
AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);

setOperationAction(ISD::ATOMIC_LOAD, MVT::bf16, Promote);
AddPromotedToType(ISD::ATOMIC_LOAD, MVT::bf16, MVT::i16);

// There are no 64-bit extloads. These should be done as a 32-bit extload and
// an extension to 64-bit.
for (MVT VT : MVT::integer_valuetypes())
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,12 @@ class AMDGPUTargetLowering : public TargetLowering {
bool isCheapToSpeculateCtlz(Type *Ty) const override;

bool isSDNodeAlwaysUniform(const SDNode *N) const override;

// FIXME: This hook should not exist
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you planning any follow-up work here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'm most of the way through getting amdgpu to only bitcast in the DAG. Then theoretically it should be easy to remove it. The atomicrmw case is giving me a bit of trouble, since it seems there's some value in keeping bitcasts out of the cmpxchg loop

AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
return AtomicExpansionKind::None;
}

static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);

Expand Down
54 changes: 20 additions & 34 deletions llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
define float @load_atomic_f32_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define float @load_atomic_f32_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] seq_cst, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr addrspace(1) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0:![0-9]+]]
; CHECK-NEXT: ret float [[TMP2]]
;
%ld = load atomic float, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0
Expand All @@ -17,8 +16,7 @@ define float @load_atomic_f32_global_system(ptr addrspace(1) %ptr) {
define float @load_atomic_f32_global_agent(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define float @load_atomic_f32_global_agent(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4, !some.unknown.md [[META0]]
; CHECK-NEXT: ret float [[TMP2]]
;
%ld = load atomic float, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
Expand All @@ -28,8 +26,7 @@ define float @load_atomic_f32_global_agent(ptr addrspace(1) %ptr) {
define float @load_atomic_f32_local(ptr addrspace(3) %ptr) {
; CHECK-LABEL: define float @load_atomic_f32_local(
; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(3) [[PTR]] seq_cst, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr addrspace(3) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]]
; CHECK-NEXT: ret float [[TMP2]]
;
%ld = load atomic float, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0
Expand All @@ -39,8 +36,7 @@ define float @load_atomic_f32_local(ptr addrspace(3) %ptr) {
define float @load_atomic_f32_flat_system(ptr %ptr) {
; CHECK-LABEL: define float @load_atomic_f32_flat_system(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] seq_cst, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]]
; CHECK-NEXT: ret float [[TMP2]]
;
%ld = load atomic float, ptr %ptr seq_cst, align 4, !some.unknown.md !0
Expand All @@ -50,8 +46,7 @@ define float @load_atomic_f32_flat_system(ptr %ptr) {
define float @load_atomic_f32_flat_agent(ptr %ptr) {
; CHECK-LABEL: define float @load_atomic_f32_flat_agent(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] syncscope("agent") seq_cst, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr [[PTR]] syncscope("agent") seq_cst, align 4, !some.unknown.md [[META0]]
; CHECK-NEXT: ret float [[TMP2]]
;
%ld = load atomic float, ptr %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
Expand All @@ -61,8 +56,7 @@ define float @load_atomic_f32_flat_agent(ptr %ptr) {
define half @load_atomic_f16_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define half @load_atomic_f16_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] seq_cst, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
; CHECK-NEXT: [[TMP2:%.*]] = load atomic half, ptr addrspace(1) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]]
; CHECK-NEXT: ret half [[TMP2]]
;
%ld = load atomic half, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0
Expand All @@ -72,8 +66,7 @@ define half @load_atomic_f16_global_system(ptr addrspace(1) %ptr) {
define half @load_atomic_f16_global_agent(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define half @load_atomic_f16_global_agent(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
; CHECK-NEXT: [[TMP2:%.*]] = load atomic half, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4, !some.unknown.md [[META0]]
; CHECK-NEXT: ret half [[TMP2]]
;
%ld = load atomic half, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
Expand All @@ -83,8 +76,7 @@ define half @load_atomic_f16_global_agent(ptr addrspace(1) %ptr) {
define half @load_atomic_f16_local(ptr addrspace(3) %ptr) {
; CHECK-LABEL: define half @load_atomic_f16_local(
; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(3) [[PTR]] seq_cst, align 2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
; CHECK-NEXT: [[TMP2:%.*]] = load atomic half, ptr addrspace(3) [[PTR]] seq_cst, align 2, !some.unknown.md [[META0]]
; CHECK-NEXT: ret half [[TMP2]]
;
%ld = load atomic half, ptr addrspace(3) %ptr seq_cst, align 2, !some.unknown.md !0
Expand All @@ -94,8 +86,7 @@ define half @load_atomic_f16_local(ptr addrspace(3) %ptr) {
define bfloat @load_atomic_bf16_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define bfloat @load_atomic_bf16_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] seq_cst, align 2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
; CHECK-NEXT: [[TMP2:%.*]] = load atomic bfloat, ptr addrspace(1) [[PTR]] seq_cst, align 2, !some.unknown.md [[META0]]
; CHECK-NEXT: ret bfloat [[TMP2]]
;
%ld = load atomic bfloat, ptr addrspace(1) %ptr seq_cst, align 2, !some.unknown.md !0
Expand All @@ -105,8 +96,7 @@ define bfloat @load_atomic_bf16_global_system(ptr addrspace(1) %ptr) {
define bfloat @load_atomic_bf16_global_agent(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define bfloat @load_atomic_bf16_global_agent(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
; CHECK-NEXT: [[TMP2:%.*]] = load atomic bfloat, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 2, !some.unknown.md [[META0]]
; CHECK-NEXT: ret bfloat [[TMP2]]
;
%ld = load atomic bfloat, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 2, !some.unknown.md !0
Expand All @@ -116,8 +106,7 @@ define bfloat @load_atomic_bf16_global_agent(ptr addrspace(1) %ptr) {
define bfloat @load_atomic_bf16_local(ptr addrspace(3) %ptr) {
; CHECK-LABEL: define bfloat @load_atomic_bf16_local(
; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(3) [[PTR]] seq_cst, align 2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
; CHECK-NEXT: [[TMP2:%.*]] = load atomic bfloat, ptr addrspace(3) [[PTR]] seq_cst, align 2, !some.unknown.md [[META0]]
; CHECK-NEXT: ret bfloat [[TMP2]]
;
%ld = load atomic bfloat, ptr addrspace(3) %ptr seq_cst, align 2, !some.unknown.md !0
Expand All @@ -127,8 +116,7 @@ define bfloat @load_atomic_bf16_local(ptr addrspace(3) %ptr) {
define bfloat @load_atomic_bf16_flat(ptr %ptr) {
; CHECK-LABEL: define bfloat @load_atomic_bf16_flat(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr [[PTR]] seq_cst, align 2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
; CHECK-NEXT: [[TMP2:%.*]] = load atomic bfloat, ptr [[PTR]] seq_cst, align 2, !some.unknown.md [[META0]]
; CHECK-NEXT: ret bfloat [[TMP2]]
;
%ld = load atomic bfloat, ptr %ptr seq_cst, align 2, !some.unknown.md !0
Expand All @@ -138,8 +126,7 @@ define bfloat @load_atomic_bf16_flat(ptr %ptr) {
define double @load_atomic_f64_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define double @load_atomic_f64_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr addrspace(1) [[PTR]] seq_cst, align 8
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr addrspace(1) [[PTR]] seq_cst, align 8, !some.unknown.md [[META0]]
; CHECK-NEXT: ret double [[TMP2]]
;
%ld = load atomic double, ptr addrspace(1) %ptr seq_cst, align 8, !some.unknown.md !0
Expand All @@ -149,8 +136,7 @@ define double @load_atomic_f64_global_system(ptr addrspace(1) %ptr) {
define double @load_atomic_f64_global_agent(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define double @load_atomic_f64_global_agent(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8, !some.unknown.md [[META0]]
; CHECK-NEXT: ret double [[TMP2]]
;
%ld = load atomic double, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0
Expand All @@ -160,8 +146,7 @@ define double @load_atomic_f64_global_agent(ptr addrspace(1) %ptr) {
define double @load_atomic_f64_local(ptr addrspace(3) %ptr) {
; CHECK-LABEL: define double @load_atomic_f64_local(
; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr addrspace(3) [[PTR]] seq_cst, align 8
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr addrspace(3) [[PTR]] seq_cst, align 8, !some.unknown.md [[META0]]
; CHECK-NEXT: ret double [[TMP2]]
;
%ld = load atomic double, ptr addrspace(3) %ptr seq_cst, align 8, !some.unknown.md !0
Expand All @@ -171,8 +156,7 @@ define double @load_atomic_f64_local(ptr addrspace(3) %ptr) {
define double @load_atomic_f64_flat_system(ptr %ptr) {
; CHECK-LABEL: define double @load_atomic_f64_flat_system(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr [[PTR]] seq_cst, align 8
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] seq_cst, align 8, !some.unknown.md [[META0]]
; CHECK-NEXT: ret double [[TMP2]]
;
%ld = load atomic double, ptr %ptr seq_cst, align 8, !some.unknown.md !0
Expand All @@ -182,8 +166,7 @@ define double @load_atomic_f64_flat_system(ptr %ptr) {
define double @load_atomic_f64_flat_agent(ptr %ptr) {
; CHECK-LABEL: define double @load_atomic_f64_flat_agent(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr [[PTR]] syncscope("agent") seq_cst, align 8
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] syncscope("agent") seq_cst, align 8, !some.unknown.md [[META0]]
; CHECK-NEXT: ret double [[TMP2]]
;
%ld = load atomic double, ptr %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0
Expand All @@ -193,3 +176,6 @@ define double @load_atomic_f64_flat_agent(ptr %ptr) {
!0 = !{}


;.
; CHECK: [[META0]] = !{}
;.