@@ -428,9 +428,9 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
428
428
; GFX90A_ITERATIVE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
429
429
; GFX90A_ITERATIVE-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
430
430
; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
431
- ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
431
+ ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
432
432
; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
433
- ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
433
+ ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
434
434
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
435
435
; GFX90A_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
436
436
; GFX90A_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
@@ -529,8 +529,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
529
529
; GFX90A_DPP-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
530
530
; GFX90A_DPP-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
531
531
; GFX90A_DPP-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
532
- ; GFX90A_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
533
- ; GFX90A_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
532
+ ; GFX90A_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
533
+ ; GFX90A_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
534
534
; GFX90A_DPP-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
535
535
; GFX90A_DPP-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
536
536
; GFX90A_DPP-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY21]], implicit $exec
@@ -609,9 +609,9 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
609
609
; GFX940_ITERATIVE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
610
610
; GFX940_ITERATIVE-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
611
611
; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
612
- ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
612
+ ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_]]
613
613
; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
614
- ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
614
+ ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_1]]
615
615
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
616
616
; GFX940_ITERATIVE-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
617
617
; GFX940_ITERATIVE-NEXT: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, [[PHI]], 0, [[COPY7]], 0, 0, implicit $mode, implicit $exec
@@ -710,8 +710,8 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
710
710
; GFX940_DPP-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
711
711
; GFX940_DPP-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
712
712
; GFX940_DPP-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
713
- ; GFX940_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
714
- ; GFX940_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
713
+ ; GFX940_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY19]], [[S_MOV_B32_2]]
714
+ ; GFX940_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
715
715
; GFX940_DPP-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
716
716
; GFX940_DPP-NEXT: [[COPY21:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
717
717
; GFX940_DPP-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY21]], implicit $exec
@@ -818,9 +818,9 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
818
818
; GFX90A_ITERATIVE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
819
819
; GFX90A_ITERATIVE-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
820
820
; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
821
- ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
821
+ ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
822
822
; GFX90A_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
823
- ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
823
+ ; GFX90A_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
824
824
; GFX90A_ITERATIVE-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
825
825
; GFX90A_ITERATIVE-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
826
826
; GFX90A_ITERATIVE-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
@@ -936,8 +936,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
936
936
; GFX90A_DPP-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
937
937
; GFX90A_DPP-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
938
938
; GFX90A_DPP-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
939
- ; GFX90A_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
940
- ; GFX90A_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
939
+ ; GFX90A_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
940
+ ; GFX90A_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
941
941
; GFX90A_DPP-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
942
942
; GFX90A_DPP-NEXT: [[COPY22:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
943
943
; GFX90A_DPP-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY22]], implicit $exec
@@ -1070,9 +1070,9 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
1070
1070
; GFX940_ITERATIVE-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FFBL_B32_e64_1]], [[V_MOV_B32_e32_1]], 0, implicit $exec
1071
1071
; GFX940_ITERATIVE-NEXT: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_FFBL_B32_e64_]], [[V_ADD_U32_e64_]], implicit $exec
1072
1072
; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1073
- ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
1073
+ ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY2]], [[V_READFIRSTLANE_B32_4]]
1074
1074
; GFX940_ITERATIVE-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[V_MIN_U32_e64_]], implicit $exec
1075
- ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
1075
+ ; GFX940_ITERATIVE-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY3]], [[V_READFIRSTLANE_B32_5]]
1076
1076
; GFX940_ITERATIVE-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
1077
1077
; GFX940_ITERATIVE-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub0
1078
1078
; GFX940_ITERATIVE-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[PHI2]].sub1
@@ -1188,8 +1188,8 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
1188
1188
; GFX940_DPP-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63
1189
1189
; GFX940_DPP-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub0
1190
1190
; GFX940_DPP-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_F64_e64_5]].sub1
1191
- ; GFX940_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
1192
- ; GFX940_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
1191
+ ; GFX940_DPP-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY20]], [[S_MOV_B32_2]]
1192
+ ; GFX940_DPP-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READLANE_B32 [[COPY21]], [[S_MOV_B32_2]]
1193
1193
; GFX940_DPP-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READLANE_B32_]], %subreg.sub0, [[V_READLANE_B32_1]], %subreg.sub1
1194
1194
; GFX940_DPP-NEXT: [[COPY22:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
1195
1195
; GFX940_DPP-NEXT: [[STRICT_WWM:%[0-9]+]]:vreg_64_align2 = STRICT_WWM [[COPY22]], implicit $exec
0 commit comments