@@ -439,7 +439,7 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str
439
439
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ]
440
440
; IR-ITERATIVE-NEXT: ret float [[TMP19]]
441
441
; IR-ITERATIVE: ComputeLoop:
442
- ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ]
442
+ ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ - 0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ]
443
443
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ]
444
444
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ]
445
445
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
@@ -451,7 +451,7 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str
451
451
; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32
452
452
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]]
453
453
; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float
454
- ; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fsub .f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
454
+ ; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd .f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
455
455
; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]]
456
456
; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1
457
457
; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]]
@@ -472,22 +472,22 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str
472
472
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
473
473
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
474
474
; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
475
- ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 0 ) #[[ATTR8]]
475
+ ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648 ) #[[ATTR8]]
476
476
; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
477
477
; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
478
- ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
479
- ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fsub .f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
480
- ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
481
- ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fsub .f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
482
- ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
483
- ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fsub .f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
484
- ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
485
- ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fsub .f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
486
- ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
487
- ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fsub .f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
488
- ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
489
- ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fsub .f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
490
- ; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
478
+ ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float - 0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
479
+ ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd .f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
480
+ ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float - 0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
481
+ ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd .f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
482
+ ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float - 0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
483
+ ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd .f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
484
+ ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float - 0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
485
+ ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd .f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
486
+ ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float - 0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
487
+ ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd .f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
488
+ ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float - 0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
489
+ ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd .f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
490
+ ; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float - 0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
491
491
; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32
492
492
; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 63) #[[ATTR8]]
493
493
; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float
0 commit comments