@@ -465,7 +465,6 @@ body: |
465
465
; GFX90A-NEXT: bb.2:
466
466
; GFX90A-NEXT: S_ENDPGM 0
467
467
bb.0:
468
- ; Tests that tryOptimizeAGPRPhis kicks in for GFX908.
469
468
liveins: $sgpr0, $scc
470
469
successors: %bb.1
471
470
@@ -715,3 +714,85 @@ body: |
715
714
bb.3:
716
715
S_ENDPGM 0
717
716
...
717
+
718
+ ---
719
+ name : skip_optimize_agpr_phi_without_subreg_use
720
+ tracksRegLiveness : true
721
+ body : |
722
+ ; GFX908-LABEL: name: skip_optimize_agpr_phi_without_subreg_use
723
+ ; GFX908: bb.0:
724
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
725
+ ; GFX908-NEXT: liveins: $scc
726
+ ; GFX908-NEXT: {{ $}}
727
+ ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
728
+ ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
729
+ ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
730
+ ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
731
+ ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
732
+ ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
733
+ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
734
+ ; GFX908-NEXT: {{ $}}
735
+ ; GFX908-NEXT: bb.1:
736
+ ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
737
+ ; GFX908-NEXT: liveins: $scc
738
+ ; GFX908-NEXT: {{ $}}
739
+ ; GFX908-NEXT: [[PHI:%[0-9]+]]:areg_128_align2 = PHI [[REG_SEQUENCE]], %bb.0, %7, %bb.1
740
+ ; GFX908-NEXT: [[V_MFMA_F32_16X16X4F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X4F32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], [[PHI]], 0, 0, 0, implicit $mode, implicit $exec
741
+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_MFMA_F32_16X16X4F32_e64_]], implicit $exec
742
+ ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
743
+ ; GFX908-NEXT: {{ $}}
744
+ ; GFX908-NEXT: bb.2:
745
+ ; GFX908-NEXT: S_ENDPGM 0
746
+ ;
747
+ ; GFX90A-LABEL: name: skip_optimize_agpr_phi_without_subreg_use
748
+ ; GFX90A: bb.0:
749
+ ; GFX90A-NEXT: successors: %bb.1(0x80000000)
750
+ ; GFX90A-NEXT: liveins: $scc
751
+ ; GFX90A-NEXT: {{ $}}
752
+ ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
753
+ ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
754
+ ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
755
+ ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
756
+ ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
757
+ ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
758
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
759
+ ; GFX90A-NEXT: {{ $}}
760
+ ; GFX90A-NEXT: bb.1:
761
+ ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
762
+ ; GFX90A-NEXT: liveins: $scc
763
+ ; GFX90A-NEXT: {{ $}}
764
+ ; GFX90A-NEXT: [[PHI:%[0-9]+]]:areg_128_align2 = PHI [[REG_SEQUENCE]], %bb.0, %7, %bb.1
765
+ ; GFX90A-NEXT: [[V_MFMA_F32_16X16X4F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X4F32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], [[PHI]], 0, 0, 0, implicit $mode, implicit $exec
766
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_MFMA_F32_16X16X4F32_e64_]], implicit $exec
767
+ ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
768
+ ; GFX90A-NEXT: {{ $}}
769
+ ; GFX90A-NEXT: bb.2:
770
+ ; GFX90A-NEXT: S_ENDPGM 0
771
+ bb.0:
772
+ liveins: $scc
773
+ successors: %bb.1
774
+
775
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
776
+ %1:sgpr_32 = S_MOV_B32 0
777
+ %2:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3
778
+ %3:vreg_128 = COPY %2
779
+ %4:sreg_64 = S_MOV_B64 0
780
+ %5:areg_128_align2 = COPY %3, implicit $exec
781
+
782
+ bb.1:
783
+ liveins: $scc
784
+ successors: %bb.1, %bb.2
785
+
786
+ %9:areg_128_align2 = PHI %5, %bb.0, %10, %bb.1
787
+ %11:areg_128_align2 = V_MFMA_F32_16X16X4F32_e64 %0:vgpr_32, %0:vgpr_32, %9:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec
788
+ %12:vgpr_32 = COPY %11.sub3
789
+ %13:vgpr_32 = COPY %11.sub2
790
+ %14:vgpr_32 = COPY %11.sub1
791
+ %15:vgpr_32 = COPY %11.sub0
792
+ %10:areg_128_align2 = COPY %11, implicit $exec
793
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
794
+
795
+ bb.2:
796
+ S_ENDPGM 0
797
+
798
+ ...
0 commit comments