@@ -120,17 +120,10 @@ stack:
120
120
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
121
121
body : |
122
122
bb.0:
123
- ; GFX8-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
124
- ; GFX8: $vgpr0 = V_ADD_CO_U32_e32 128, %stack.0, implicit-def dead $vcc, implicit $exec
125
- ; GFX8-NEXT: SI_RETURN implicit $vgpr0
126
- ;
127
- ; GFX9-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
128
- ; GFX9: $vgpr0 = V_ADD_U32_e32 128, %stack.0, implicit $exec
129
- ; GFX9-NEXT: SI_RETURN implicit $vgpr0
130
- ;
131
- ; GFX10-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
132
- ; GFX10: $vgpr0 = V_ADD_U32_e32 128, %stack.0, implicit $exec
133
- ; GFX10-NEXT: SI_RETURN implicit $vgpr0
123
+ ; CHECK-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
124
+ ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 128, implicit-def dead $scc
125
+ ; CHECK-NEXT: $vgpr0 = COPY [[S_ADD_I32_]]
126
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
134
127
%0:sreg_32 = S_MOV_B32 %stack.0
135
128
%1:sreg_32 = S_ADD_I32 %0, 128, implicit-def dead $scc
136
129
$vgpr0 = COPY %1
@@ -535,3 +528,68 @@ body: |
535
528
%2:vgpr_32 = COPY %1
536
529
SI_RETURN implicit %2
537
530
...
531
+
532
+ # Physreg copy of %2 to $vgpr0 should not be erased
533
+ ---
534
+ name : fold_fi_into_s_or_b32_user_is_physreg_copy
535
+ tracksRegLiveness : true
536
+ stack :
537
+ - { id: 0, size: 16, alignment: 16 }
538
+ machineFunctionInfo :
539
+ scratchRSrcReg : ' $sgpr0_sgpr1_sgpr2_sgpr3'
540
+ frameOffsetReg : ' $sgpr33'
541
+ stackPtrOffsetReg : ' $sgpr32'
542
+ body : |
543
+ ; CHECK-LABEL: name: fold_fi_into_s_or_b32_user_is_physreg_copy
544
+ ; CHECK: bb.0:
545
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
546
+ ; CHECK-NEXT: liveins: $vgpr0_vgpr1
547
+ ; CHECK-NEXT: {{ $}}
548
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
549
+ ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
550
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
551
+ ; CHECK-NEXT: {{ $}}
552
+ ; CHECK-NEXT: bb.1:
553
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
554
+ ; CHECK-NEXT: {{ $}}
555
+ ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]].sub0, implicit $exec
556
+ ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]].sub1, implicit $exec
557
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
558
+ ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE]], [[COPY]], implicit $exec
559
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U64_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
560
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
561
+ ; CHECK-NEXT: $vgpr0 = COPY [[S_ADD_I32_]]
562
+ ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[REG_SEQUENCE]], 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0
563
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
564
+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
565
+ ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
566
+ ; CHECK-NEXT: {{ $}}
567
+ ; CHECK-NEXT: bb.2:
568
+ ; CHECK-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
569
+ ; CHECK-NEXT: SI_RETURN
570
+ bb.0:
571
+ liveins: $vgpr0_vgpr1
572
+
573
+ %0:vreg_64 = COPY $vgpr0_vgpr1
574
+ %1:sreg_32 = S_MOV_B32 %stack.0
575
+ %2:sreg_32 = S_ADD_I32 killed %1, 4, implicit-def dead $scc
576
+ %3:sreg_64_xexec = S_MOV_B64 $exec
577
+
578
+ bb.1:
579
+ %4:sgpr_32 = V_READFIRSTLANE_B32 %0.sub0, implicit $exec
580
+ %5:sgpr_32 = V_READFIRSTLANE_B32 %0.sub1, implicit $exec
581
+ %6:sgpr_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
582
+ %7:sreg_64_xexec = V_CMP_EQ_U64_e64 %6, %0, implicit $exec
583
+ %8:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed %7, implicit-def $exec, implicit-def $scc, implicit $exec
584
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
585
+ $vgpr0 = COPY %2
586
+ $sgpr30_sgpr31 = SI_CALL %6, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0
587
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
588
+ $exec = S_XOR_B64_term $exec, %8, implicit-def $scc
589
+ SI_WATERFALL_LOOP %bb.1, implicit $exec
590
+
591
+ bb.2:
592
+ $exec = S_MOV_B64 %3
593
+ SI_RETURN
594
+
595
+ ...
0 commit comments