Skip to content

Commit fe893c9

Browse files
authored
[FPEnv][AtomicExpand] Correct strictfp attribute handling in AtomicExpandPass (#87082)
The AtomicExpand pass was lowering function calls with the strictfp attribute to sequences that included function calls incorrectly lacking the attribute. This patch corrects that. The pass now also emits the correct constrained fp call instead of normal FP instructions when in a function with the strictfp attribute. Test changes verified with D146845.
1 parent 2684a09 commit fe893c9

File tree

2 files changed

+15
-12
lines changed

2 files changed

+15
-12
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,13 @@ class AtomicExpandLegacy : public FunctionPass {
135135
// IRBuilder to be used for replacement atomic instructions.
136136
struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
137137
// Preserves the DebugLoc from I, and preserves still valid metadata.
138+
// Enable StrictFP builder mode when appropriate.
138139
explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
139140
: IRBuilder(I->getContext(), DL) {
140141
SetInsertPoint(I);
141142
this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
143+
if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
144+
this->setIsFPConstrained(true);
142145
}
143146
};
144147

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3028,7 +3028,7 @@ define bfloat @test_atomicrmw_fadd_bf16_global_system_align4(ptr addrspace(1) %p
30283028

30293029
define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bfloat %value) #2 {
30303030
; CI-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3031-
; CI-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3031+
; CI-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6]]
30323032
; CI-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
30333033
; CI-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
30343034
; CI-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3041,7 +3041,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
30413041
; CI-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
30423042
; CI-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
30433043
; CI-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3044-
; CI-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3044+
; CI-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
30453045
; CI-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
30463046
; CI-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
30473047
; CI-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3058,7 +3058,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
30583058
; CI-NEXT: ret bfloat [[TMP7]]
30593059
;
30603060
; GFX9-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3061-
; GFX9-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3061+
; GFX9-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6]]
30623062
; GFX9-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
30633063
; GFX9-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
30643064
; GFX9-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3071,7 +3071,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
30713071
; GFX9-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
30723072
; GFX9-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
30733073
; GFX9-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3074-
; GFX9-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3074+
; GFX9-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
30753075
; GFX9-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
30763076
; GFX9-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
30773077
; GFX9-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3088,7 +3088,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
30883088
; GFX9-NEXT: ret bfloat [[TMP7]]
30893089
;
30903090
; GFX908-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3091-
; GFX908-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3091+
; GFX908-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6]]
30923092
; GFX908-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
30933093
; GFX908-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
30943094
; GFX908-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3101,7 +3101,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
31013101
; GFX908-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
31023102
; GFX908-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
31033103
; GFX908-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3104-
; GFX908-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3104+
; GFX908-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
31053105
; GFX908-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
31063106
; GFX908-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
31073107
; GFX908-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3118,7 +3118,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
31183118
; GFX908-NEXT: ret bfloat [[TMP7]]
31193119
;
31203120
; GFX90A-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3121-
; GFX90A-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3121+
; GFX90A-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6:[0-9]+]]
31223122
; GFX90A-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
31233123
; GFX90A-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
31243124
; GFX90A-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3131,7 +3131,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
31313131
; GFX90A-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
31323132
; GFX90A-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
31333133
; GFX90A-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3134-
; GFX90A-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3134+
; GFX90A-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
31353135
; GFX90A-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
31363136
; GFX90A-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
31373137
; GFX90A-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3148,7 +3148,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
31483148
; GFX90A-NEXT: ret bfloat [[TMP7]]
31493149
;
31503150
; GFX940-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3151-
; GFX940-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3151+
; GFX940-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6:[0-9]+]]
31523152
; GFX940-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
31533153
; GFX940-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
31543154
; GFX940-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3161,7 +3161,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
31613161
; GFX940-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
31623162
; GFX940-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
31633163
; GFX940-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3164-
; GFX940-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3164+
; GFX940-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
31653165
; GFX940-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
31663166
; GFX940-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
31673167
; GFX940-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3178,7 +3178,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
31783178
; GFX940-NEXT: ret bfloat [[TMP7]]
31793179
;
31803180
; GFX11-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3181-
; GFX11-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3181+
; GFX11-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6]]
31823182
; GFX11-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
31833183
; GFX11-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
31843184
; GFX11-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3191,7 +3191,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
31913191
; GFX11-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
31923192
; GFX11-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
31933193
; GFX11-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3194-
; GFX11-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3194+
; GFX11-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6]]
31953195
; GFX11-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
31963196
; GFX11-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
31973197
; GFX11-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]

0 commit comments

Comments
 (0)