Skip to content

Commit f1112eb

Browse files
authored
AMDGPU: Do not bitcast atomic load in IR (#90060)
These hooks should be removed. This is a trivial legalization transform the legalizer needs to support. The IR just complicates things, and it was losing metadata. Implement the DAG promotion support, and switch AMDGPU over to using it. Really we'd be a lot better off merging ATOMIC_LOAD and LOAD like GlobalISel does.
1 parent 1ed1ec9 commit f1112eb

File tree

6 files changed

+95
-34
lines changed

6 files changed

+95
-34
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5575,6 +5575,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
55755575
Results.push_back(NewAtomic.getValue(1));
55765576
break;
55775577
}
5578+
case ISD::ATOMIC_LOAD: {
5579+
AtomicSDNode *AM = cast<AtomicSDNode>(Node);
5580+
SDLoc SL(Node);
5581+
assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
5582+
"unexpected promotion type");
5583+
assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
5584+
"unexpected atomic_load with illegal type");
5585+
5586+
SDValue NewAtomic =
5587+
DAG.getAtomic(ISD::ATOMIC_LOAD, SL, NVT, DAG.getVTList(NVT, MVT::Other),
5588+
{AM->getChain(), AM->getBasePtr()}, AM->getMemOperand());
5589+
Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
5590+
Results.push_back(NewAtomic.getValue(1));
5591+
break;
5592+
}
55785593
case ISD::SPLAT_VECTOR: {
55795594
SDValue Scalar = Node->getOperand(0);
55805595
MVT ScalarType = Scalar.getSimpleValueType();

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,6 +2449,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
24492449
R = PromoteFloatRes_STRICT_FP_ROUND(N);
24502450
break;
24512451
case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
2452+
case ISD::ATOMIC_LOAD:
2453+
R = PromoteFloatRes_ATOMIC_LOAD(N);
2454+
break;
24522455
case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;
24532456
case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;
24542457

@@ -2695,6 +2698,25 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
26952698
return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL);
26962699
}
26972700

2701+
SDValue DAGTypeLegalizer::PromoteFloatRes_ATOMIC_LOAD(SDNode *N) {
2702+
AtomicSDNode *AM = cast<AtomicSDNode>(N);
2703+
EVT VT = AM->getValueType(0);
2704+
2705+
// Load the value as an integer value with the same number of bits.
2706+
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
2707+
SDValue newL = DAG.getAtomic(
2708+
ISD::ATOMIC_LOAD, SDLoc(N), IVT, DAG.getVTList(IVT, MVT::Other),
2709+
{AM->getChain(), AM->getBasePtr()}, AM->getMemOperand());
2710+
2711+
// Legalize the chain result by replacing uses of the old value chain with the
2712+
// new one
2713+
ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
2714+
2715+
// Convert the integer value to the desired FP type
2716+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
2717+
return DAG.getNode(GetPromotionOpcode(VT, IVT), SDLoc(N), NVT, newL);
2718+
}
2719+
26982720
// Construct a new SELECT node with the promoted true- and false- values.
26992721
SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) {
27002722
SDValue TrueVal = GetPromotedFloat(N->getOperand(1));
@@ -2855,6 +2877,9 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
28552877
case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break;
28562878

28572879
case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
2880+
case ISD::ATOMIC_LOAD:
2881+
R = SoftPromoteHalfRes_ATOMIC_LOAD(N);
2882+
break;
28582883
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
28592884
case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
28602885
case ISD::SINT_TO_FP:
@@ -3039,6 +3064,20 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) {
30393064
return NewL;
30403065
}
30413066

3067+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N) {
3068+
AtomicSDNode *AM = cast<AtomicSDNode>(N);
3069+
3070+
// Load the value as an integer value with the same number of bits.
3071+
SDValue NewL = DAG.getAtomic(
3072+
ISD::ATOMIC_LOAD, SDLoc(N), MVT::i16, DAG.getVTList(MVT::i16, MVT::Other),
3073+
{AM->getChain(), AM->getBasePtr()}, AM->getMemOperand());
3074+
3075+
// Legalize the chain result by replacing uses of the old value chain with the
3076+
// new one
3077+
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
3078+
return NewL;
3079+
}
3080+
30423081
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) {
30433082
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
30443083
SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
691691
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
692692
SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N);
693693
SDValue PromoteFloatRes_LOAD(SDNode *N);
694+
SDValue PromoteFloatRes_ATOMIC_LOAD(SDNode *N);
694695
SDValue PromoteFloatRes_SELECT(SDNode *N);
695696
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
696697
SDValue PromoteFloatRes_UnaryOp(SDNode *N);
@@ -734,6 +735,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
734735
SDValue SoftPromoteHalfRes_FFREXP(SDNode *N);
735736
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
736737
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
738+
SDValue SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N);
737739
SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
738740
SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N);
739741
SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N);

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
148148
setOperationAction(ISD::LOAD, MVT::i128, Promote);
149149
AddPromotedToType(ISD::LOAD, MVT::i128, MVT::v4i32);
150150

151+
// TODO: Would be better to consume as directly legal
152+
setOperationAction(ISD::ATOMIC_LOAD, MVT::f32, Promote);
153+
AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
154+
155+
setOperationAction(ISD::ATOMIC_LOAD, MVT::f64, Promote);
156+
AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
157+
158+
setOperationAction(ISD::ATOMIC_LOAD, MVT::f16, Promote);
159+
AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
160+
161+
setOperationAction(ISD::ATOMIC_LOAD, MVT::bf16, Promote);
162+
AddPromotedToType(ISD::ATOMIC_LOAD, MVT::bf16, MVT::i16);
163+
151164
// There are no 64-bit extloads. These should be done as a 32-bit extload and
152165
// an extension to 64-bit.
153166
for (MVT VT : MVT::integer_valuetypes())

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,12 @@ class AMDGPUTargetLowering : public TargetLowering {
230230
bool isCheapToSpeculateCtlz(Type *Ty) const override;
231231

232232
bool isSDNodeAlwaysUniform(const SDNode *N) const override;
233+
234+
// FIXME: This hook should not exist
235+
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
236+
return AtomicExpansionKind::None;
237+
}
238+
233239
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
234240
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
235241

llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-load.ll

Lines changed: 20 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
define float @load_atomic_f32_global_system(ptr addrspace(1) %ptr) {
77
; CHECK-LABEL: define float @load_atomic_f32_global_system(
88
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
9-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] seq_cst, align 4
10-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
9+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr addrspace(1) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0:![0-9]+]]
1110
; CHECK-NEXT: ret float [[TMP2]]
1211
;
1312
%ld = load atomic float, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0
@@ -17,8 +16,7 @@ define float @load_atomic_f32_global_system(ptr addrspace(1) %ptr) {
1716
define float @load_atomic_f32_global_agent(ptr addrspace(1) %ptr) {
1817
; CHECK-LABEL: define float @load_atomic_f32_global_agent(
1918
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
20-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4
21-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
19+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4, !some.unknown.md [[META0]]
2220
; CHECK-NEXT: ret float [[TMP2]]
2321
;
2422
%ld = load atomic float, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
@@ -28,8 +26,7 @@ define float @load_atomic_f32_global_agent(ptr addrspace(1) %ptr) {
2826
define float @load_atomic_f32_local(ptr addrspace(3) %ptr) {
2927
; CHECK-LABEL: define float @load_atomic_f32_local(
3028
; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
31-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(3) [[PTR]] seq_cst, align 4
32-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
29+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr addrspace(3) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]]
3330
; CHECK-NEXT: ret float [[TMP2]]
3431
;
3532
%ld = load atomic float, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0
@@ -39,8 +36,7 @@ define float @load_atomic_f32_local(ptr addrspace(3) %ptr) {
3936
define float @load_atomic_f32_flat_system(ptr %ptr) {
4037
; CHECK-LABEL: define float @load_atomic_f32_flat_system(
4138
; CHECK-SAME: ptr [[PTR:%.*]]) {
42-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] seq_cst, align 4
43-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
39+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]]
4440
; CHECK-NEXT: ret float [[TMP2]]
4541
;
4642
%ld = load atomic float, ptr %ptr seq_cst, align 4, !some.unknown.md !0
@@ -50,8 +46,7 @@ define float @load_atomic_f32_flat_system(ptr %ptr) {
5046
define float @load_atomic_f32_flat_agent(ptr %ptr) {
5147
; CHECK-LABEL: define float @load_atomic_f32_flat_agent(
5248
; CHECK-SAME: ptr [[PTR:%.*]]) {
53-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] syncscope("agent") seq_cst, align 4
54-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
49+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic float, ptr [[PTR]] syncscope("agent") seq_cst, align 4, !some.unknown.md [[META0]]
5550
; CHECK-NEXT: ret float [[TMP2]]
5651
;
5752
%ld = load atomic float, ptr %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
@@ -61,8 +56,7 @@ define float @load_atomic_f32_flat_agent(ptr %ptr) {
6156
define half @load_atomic_f16_global_system(ptr addrspace(1) %ptr) {
6257
; CHECK-LABEL: define half @load_atomic_f16_global_system(
6358
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
64-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] seq_cst, align 4
65-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
59+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic half, ptr addrspace(1) [[PTR]] seq_cst, align 4, !some.unknown.md [[META0]]
6660
; CHECK-NEXT: ret half [[TMP2]]
6761
;
6862
%ld = load atomic half, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0
@@ -72,8 +66,7 @@ define half @load_atomic_f16_global_system(ptr addrspace(1) %ptr) {
7266
define half @load_atomic_f16_global_agent(ptr addrspace(1) %ptr) {
7367
; CHECK-LABEL: define half @load_atomic_f16_global_agent(
7468
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
75-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4
76-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
69+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic half, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4, !some.unknown.md [[META0]]
7770
; CHECK-NEXT: ret half [[TMP2]]
7871
;
7972
%ld = load atomic half, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
@@ -83,8 +76,7 @@ define half @load_atomic_f16_global_agent(ptr addrspace(1) %ptr) {
8376
define half @load_atomic_f16_local(ptr addrspace(3) %ptr) {
8477
; CHECK-LABEL: define half @load_atomic_f16_local(
8578
; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
86-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(3) [[PTR]] seq_cst, align 2
87-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
79+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic half, ptr addrspace(3) [[PTR]] seq_cst, align 2, !some.unknown.md [[META0]]
8880
; CHECK-NEXT: ret half [[TMP2]]
8981
;
9082
%ld = load atomic half, ptr addrspace(3) %ptr seq_cst, align 2, !some.unknown.md !0
@@ -94,8 +86,7 @@ define half @load_atomic_f16_local(ptr addrspace(3) %ptr) {
9486
define bfloat @load_atomic_bf16_global_system(ptr addrspace(1) %ptr) {
9587
; CHECK-LABEL: define bfloat @load_atomic_bf16_global_system(
9688
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
97-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] seq_cst, align 2
98-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
89+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic bfloat, ptr addrspace(1) [[PTR]] seq_cst, align 2, !some.unknown.md [[META0]]
9990
; CHECK-NEXT: ret bfloat [[TMP2]]
10091
;
10192
%ld = load atomic bfloat, ptr addrspace(1) %ptr seq_cst, align 2, !some.unknown.md !0
@@ -105,8 +96,7 @@ define bfloat @load_atomic_bf16_global_system(ptr addrspace(1) %ptr) {
10596
define bfloat @load_atomic_bf16_global_agent(ptr addrspace(1) %ptr) {
10697
; CHECK-LABEL: define bfloat @load_atomic_bf16_global_agent(
10798
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
108-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 2
109-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
99+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic bfloat, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 2, !some.unknown.md [[META0]]
110100
; CHECK-NEXT: ret bfloat [[TMP2]]
111101
;
112102
%ld = load atomic bfloat, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 2, !some.unknown.md !0
@@ -116,8 +106,7 @@ define bfloat @load_atomic_bf16_global_agent(ptr addrspace(1) %ptr) {
116106
define bfloat @load_atomic_bf16_local(ptr addrspace(3) %ptr) {
117107
; CHECK-LABEL: define bfloat @load_atomic_bf16_local(
118108
; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
119-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(3) [[PTR]] seq_cst, align 2
120-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
109+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic bfloat, ptr addrspace(3) [[PTR]] seq_cst, align 2, !some.unknown.md [[META0]]
121110
; CHECK-NEXT: ret bfloat [[TMP2]]
122111
;
123112
%ld = load atomic bfloat, ptr addrspace(3) %ptr seq_cst, align 2, !some.unknown.md !0
@@ -127,8 +116,7 @@ define bfloat @load_atomic_bf16_local(ptr addrspace(3) %ptr) {
127116
define bfloat @load_atomic_bf16_flat(ptr %ptr) {
128117
; CHECK-LABEL: define bfloat @load_atomic_bf16_flat(
129118
; CHECK-SAME: ptr [[PTR:%.*]]) {
130-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr [[PTR]] seq_cst, align 2
131-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
119+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic bfloat, ptr [[PTR]] seq_cst, align 2, !some.unknown.md [[META0]]
132120
; CHECK-NEXT: ret bfloat [[TMP2]]
133121
;
134122
%ld = load atomic bfloat, ptr %ptr seq_cst, align 2, !some.unknown.md !0
@@ -138,8 +126,7 @@ define bfloat @load_atomic_bf16_flat(ptr %ptr) {
138126
define double @load_atomic_f64_global_system(ptr addrspace(1) %ptr) {
139127
; CHECK-LABEL: define double @load_atomic_f64_global_system(
140128
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
141-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr addrspace(1) [[PTR]] seq_cst, align 8
142-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
129+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr addrspace(1) [[PTR]] seq_cst, align 8, !some.unknown.md [[META0]]
143130
; CHECK-NEXT: ret double [[TMP2]]
144131
;
145132
%ld = load atomic double, ptr addrspace(1) %ptr seq_cst, align 8, !some.unknown.md !0
@@ -149,8 +136,7 @@ define double @load_atomic_f64_global_system(ptr addrspace(1) %ptr) {
149136
define double @load_atomic_f64_global_agent(ptr addrspace(1) %ptr) {
150137
; CHECK-LABEL: define double @load_atomic_f64_global_agent(
151138
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
152-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8
153-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
139+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8, !some.unknown.md [[META0]]
154140
; CHECK-NEXT: ret double [[TMP2]]
155141
;
156142
%ld = load atomic double, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0
@@ -160,8 +146,7 @@ define double @load_atomic_f64_global_agent(ptr addrspace(1) %ptr) {
160146
define double @load_atomic_f64_local(ptr addrspace(3) %ptr) {
161147
; CHECK-LABEL: define double @load_atomic_f64_local(
162148
; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
163-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr addrspace(3) [[PTR]] seq_cst, align 8
164-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
149+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr addrspace(3) [[PTR]] seq_cst, align 8, !some.unknown.md [[META0]]
165150
; CHECK-NEXT: ret double [[TMP2]]
166151
;
167152
%ld = load atomic double, ptr addrspace(3) %ptr seq_cst, align 8, !some.unknown.md !0
@@ -171,8 +156,7 @@ define double @load_atomic_f64_local(ptr addrspace(3) %ptr) {
171156
define double @load_atomic_f64_flat_system(ptr %ptr) {
172157
; CHECK-LABEL: define double @load_atomic_f64_flat_system(
173158
; CHECK-SAME: ptr [[PTR:%.*]]) {
174-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr [[PTR]] seq_cst, align 8
175-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
159+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] seq_cst, align 8, !some.unknown.md [[META0]]
176160
; CHECK-NEXT: ret double [[TMP2]]
177161
;
178162
%ld = load atomic double, ptr %ptr seq_cst, align 8, !some.unknown.md !0
@@ -182,8 +166,7 @@ define double @load_atomic_f64_flat_system(ptr %ptr) {
182166
define double @load_atomic_f64_flat_agent(ptr %ptr) {
183167
; CHECK-LABEL: define double @load_atomic_f64_flat_agent(
184168
; CHECK-SAME: ptr [[PTR:%.*]]) {
185-
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr [[PTR]] syncscope("agent") seq_cst, align 8
186-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
169+
; CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] syncscope("agent") seq_cst, align 8, !some.unknown.md [[META0]]
187170
; CHECK-NEXT: ret double [[TMP2]]
188171
;
189172
%ld = load atomic double, ptr %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0
@@ -193,3 +176,6 @@ define double @load_atomic_f64_flat_agent(ptr %ptr) {
193176
!0 = !{}
194177

195178

179+
;.
180+
; CHECK: [[META0]] = !{}
181+
;.

0 commit comments

Comments
 (0)