@@ -3028,7 +3028,7 @@ define bfloat @test_atomicrmw_fadd_bf16_global_system_align4(ptr addrspace(1) %p
3028
3028
3029
3029
define bfloat @test_atomicrmw_fadd_bf16_local_strictfp (ptr addrspace (3 ) %ptr , bfloat %value ) #2 {
3030
3030
; CI-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3031
- ; CI-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3031
+ ; CI-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6]]
3032
3032
; CI-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
3033
3033
; CI-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
3034
3034
; CI-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3041,7 +3041,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3041
3041
; CI-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
3042
3042
; CI-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
3043
3043
; CI-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3044
- ; CI-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3044
+ ; CI-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6 ]]
3045
3045
; CI-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
3046
3046
; CI-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
3047
3047
; CI-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3058,7 +3058,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3058
3058
; CI-NEXT: ret bfloat [[TMP7]]
3059
3059
;
3060
3060
; GFX9-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3061
- ; GFX9-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3061
+ ; GFX9-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6]]
3062
3062
; GFX9-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
3063
3063
; GFX9-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
3064
3064
; GFX9-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3071,7 +3071,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3071
3071
; GFX9-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
3072
3072
; GFX9-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
3073
3073
; GFX9-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3074
- ; GFX9-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3074
+ ; GFX9-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6 ]]
3075
3075
; GFX9-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
3076
3076
; GFX9-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
3077
3077
; GFX9-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3088,7 +3088,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3088
3088
; GFX9-NEXT: ret bfloat [[TMP7]]
3089
3089
;
3090
3090
; GFX908-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3091
- ; GFX908-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3091
+ ; GFX908-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6]]
3092
3092
; GFX908-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
3093
3093
; GFX908-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
3094
3094
; GFX908-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3101,7 +3101,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3101
3101
; GFX908-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
3102
3102
; GFX908-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
3103
3103
; GFX908-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3104
- ; GFX908-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3104
+ ; GFX908-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6 ]]
3105
3105
; GFX908-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
3106
3106
; GFX908-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
3107
3107
; GFX908-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3118,7 +3118,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3118
3118
; GFX908-NEXT: ret bfloat [[TMP7]]
3119
3119
;
3120
3120
; GFX90A-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3121
- ; GFX90A-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3121
+ ; GFX90A-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6:[0-9]+]]
3122
3122
; GFX90A-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
3123
3123
; GFX90A-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
3124
3124
; GFX90A-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3131,7 +3131,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3131
3131
; GFX90A-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
3132
3132
; GFX90A-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
3133
3133
; GFX90A-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3134
- ; GFX90A-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3134
+ ; GFX90A-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6 ]]
3135
3135
; GFX90A-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
3136
3136
; GFX90A-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
3137
3137
; GFX90A-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3148,7 +3148,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3148
3148
; GFX90A-NEXT: ret bfloat [[TMP7]]
3149
3149
;
3150
3150
; GFX940-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3151
- ; GFX940-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3151
+ ; GFX940-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6:[0-9]+]]
3152
3152
; GFX940-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
3153
3153
; GFX940-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
3154
3154
; GFX940-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3161,7 +3161,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3161
3161
; GFX940-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
3162
3162
; GFX940-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
3163
3163
; GFX940-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3164
- ; GFX940-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3164
+ ; GFX940-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6 ]]
3165
3165
; GFX940-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
3166
3166
; GFX940-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
3167
3167
; GFX940-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
@@ -3178,7 +3178,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3178
3178
; GFX940-NEXT: ret bfloat [[TMP7]]
3179
3179
;
3180
3180
; GFX11-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
3181
- ; GFX11-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
3181
+ ; GFX11-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) #[[ATTR6]]
3182
3182
; GFX11-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
3183
3183
; GFX11-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
3184
3184
; GFX11-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
@@ -3191,7 +3191,7 @@ define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bf
3191
3191
; GFX11-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
3192
3192
; GFX11-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
3193
3193
; GFX11-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
3194
- ; GFX11-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
3194
+ ; GFX11-NEXT: [[NEW:%.*]] = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat [[TMP4]], bfloat [[VALUE:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR6 ]]
3195
3195
; GFX11-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
3196
3196
; GFX11-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
3197
3197
; GFX11-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
0 commit comments