|
1 | 1 | # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
2 | 2 | # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-after=phi-node-elimination -stop-before=greedy -o - %s | FileCheck -check-prefix=GFX9 %s
|
3 |
| -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-after=phi-node-elimination -stop-before=greedy -early-live-intervals -o - %s | FileCheck -check-prefix=GFX9 %s |
| 3 | +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-after=phi-node-elimination -stop-before=greedy -early-live-intervals -o - %s | FileCheck -check-prefix=GFX9_ELI %s |
4 | 4 |
|
5 | 5 | # Make sure that the V_MOV_B32 isn't rematerialized out of the loop. This was also breaking RenameIndependentSubregisters which missed the use of all subregisters.
|
6 | 6 |
|
@@ -29,41 +29,90 @@ liveins:
|
29 | 29 | body: |
|
30 | 30 | ; GFX9-LABEL: name: index_vgpr_waterfall_loop
|
31 | 31 | ; GFX9: bb.0:
|
32 |
| - ; GFX9: successors: %bb.1(0x80000000) |
33 |
| - ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16 |
34 |
| - ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr16 |
35 |
| - ; GFX9: undef %18.sub15:vreg_512 = COPY $vgpr15 |
36 |
| - ; GFX9: %18.sub14:vreg_512 = COPY $vgpr14 |
37 |
| - ; GFX9: %18.sub13:vreg_512 = COPY $vgpr13 |
38 |
| - ; GFX9: %18.sub12:vreg_512 = COPY $vgpr12 |
39 |
| - ; GFX9: %18.sub11:vreg_512 = COPY $vgpr11 |
40 |
| - ; GFX9: %18.sub10:vreg_512 = COPY $vgpr10 |
41 |
| - ; GFX9: %18.sub9:vreg_512 = COPY $vgpr9 |
42 |
| - ; GFX9: %18.sub8:vreg_512 = COPY $vgpr8 |
43 |
| - ; GFX9: %18.sub7:vreg_512 = COPY $vgpr7 |
44 |
| - ; GFX9: %18.sub6:vreg_512 = COPY $vgpr6 |
45 |
| - ; GFX9: %18.sub5:vreg_512 = COPY $vgpr5 |
46 |
| - ; GFX9: %18.sub4:vreg_512 = COPY $vgpr4 |
47 |
| - ; GFX9: %18.sub3:vreg_512 = COPY $vgpr3 |
48 |
| - ; GFX9: %18.sub2:vreg_512 = COPY $vgpr2 |
49 |
| - ; GFX9: %18.sub1:vreg_512 = COPY $vgpr1 |
50 |
| - ; GFX9: %18.sub0:vreg_512 = COPY $vgpr0 |
51 |
| - ; GFX9: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 15, [[COPY1]], implicit $exec |
52 |
| - ; GFX9: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec |
53 |
| - ; GFX9: bb.1: |
54 |
| - ; GFX9: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
55 |
| - ; GFX9: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_AND_B32_e32_]], implicit $exec |
56 |
| - ; GFX9: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[V_AND_B32_e32_]], implicit $exec |
57 |
| - ; GFX9: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def dead $scc, implicit $exec |
58 |
| - ; GFX9: S_SET_GPR_IDX_ON [[V_READFIRSTLANE_B32_]], 1, implicit-def $m0, implicit-def $mode, implicit undef $m0, implicit $mode |
59 |
| - ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef %18.sub0, implicit $exec, implicit %18, implicit $m0 |
60 |
| - ; GFX9: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode |
61 |
| - ; GFX9: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def dead $scc |
62 |
| - ; GFX9: S_CBRANCH_EXECNZ %bb.1, implicit $exec |
63 |
| - ; GFX9: bb.2: |
64 |
| - ; GFX9: $exec = S_MOV_B64 [[S_MOV_B64_]] |
65 |
| - ; GFX9: $vgpr0 = COPY [[V_MOV_B32_e32_]] |
66 |
| - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit undef $vgpr1, implicit undef $vgpr2, implicit undef $vgpr3 |
| 32 | + ; GFX9-NEXT: successors: %bb.1(0x80000000) |
| 33 | + ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $sgpr30_sgpr31 |
| 34 | + ; GFX9-NEXT: {{ $}} |
| 35 | + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 |
| 36 | + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr16 |
| 37 | + ; GFX9-NEXT: undef %18.sub15:vreg_512 = COPY $vgpr15 |
| 38 | + ; GFX9-NEXT: %18.sub14:vreg_512 = COPY $vgpr14 |
| 39 | + ; GFX9-NEXT: %18.sub13:vreg_512 = COPY $vgpr13 |
| 40 | + ; GFX9-NEXT: %18.sub12:vreg_512 = COPY $vgpr12 |
| 41 | + ; GFX9-NEXT: %18.sub11:vreg_512 = COPY $vgpr11 |
| 42 | + ; GFX9-NEXT: %18.sub10:vreg_512 = COPY $vgpr10 |
| 43 | + ; GFX9-NEXT: %18.sub9:vreg_512 = COPY $vgpr9 |
| 44 | + ; GFX9-NEXT: %18.sub8:vreg_512 = COPY $vgpr8 |
| 45 | + ; GFX9-NEXT: %18.sub7:vreg_512 = COPY $vgpr7 |
| 46 | + ; GFX9-NEXT: %18.sub6:vreg_512 = COPY $vgpr6 |
| 47 | + ; GFX9-NEXT: %18.sub5:vreg_512 = COPY $vgpr5 |
| 48 | + ; GFX9-NEXT: %18.sub4:vreg_512 = COPY $vgpr4 |
| 49 | + ; GFX9-NEXT: %18.sub3:vreg_512 = COPY $vgpr3 |
| 50 | + ; GFX9-NEXT: %18.sub2:vreg_512 = COPY $vgpr2 |
| 51 | + ; GFX9-NEXT: %18.sub1:vreg_512 = COPY $vgpr1 |
| 52 | + ; GFX9-NEXT: %18.sub0:vreg_512 = COPY $vgpr0 |
| 53 | + ; GFX9-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 15, [[COPY1]], implicit $exec |
| 54 | + ; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec |
| 55 | + ; GFX9-NEXT: {{ $}} |
| 56 | + ; GFX9-NEXT: bb.1: |
| 57 | + ; GFX9-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| 58 | + ; GFX9-NEXT: {{ $}} |
| 59 | + ; GFX9-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_AND_B32_e32_]], implicit $exec |
| 60 | + ; GFX9-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[V_AND_B32_e32_]], implicit $exec |
| 61 | + ; GFX9-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def dead $scc, implicit $exec |
| 62 | + ; GFX9-NEXT: S_SET_GPR_IDX_ON [[V_READFIRSTLANE_B32_]], 1, implicit-def $m0, implicit-def $mode, implicit undef $m0, implicit $mode |
| 63 | + ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef %18.sub0, implicit $exec, implicit %18, implicit $m0 |
| 64 | + ; GFX9-NEXT: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode |
| 65 | + ; GFX9-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def dead $scc |
| 66 | + ; GFX9-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec |
| 67 | + ; GFX9-NEXT: {{ $}} |
| 68 | + ; GFX9-NEXT: bb.2: |
| 69 | + ; GFX9-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] |
| 70 | + ; GFX9-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] |
| 71 | + ; GFX9-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] |
| 72 | + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit undef $vgpr1, implicit undef $vgpr2, implicit undef $vgpr3 |
| 73 | + ; GFX9_ELI-LABEL: name: index_vgpr_waterfall_loop |
| 74 | + ; GFX9_ELI: bb.0: |
| 75 | + ; GFX9_ELI-NEXT: successors: %bb.1(0x80000000) |
| 76 | + ; GFX9_ELI-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $sgpr30_sgpr31 |
| 77 | + ; GFX9_ELI-NEXT: {{ $}} |
| 78 | + ; GFX9_ELI-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 |
| 79 | + ; GFX9_ELI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr16 |
| 80 | + ; GFX9_ELI-NEXT: undef %18.sub15:vreg_512 = COPY $vgpr15 |
| 81 | + ; GFX9_ELI-NEXT: %18.sub14:vreg_512 = COPY $vgpr14 |
| 82 | + ; GFX9_ELI-NEXT: %18.sub13:vreg_512 = COPY $vgpr13 |
| 83 | + ; GFX9_ELI-NEXT: %18.sub12:vreg_512 = COPY $vgpr12 |
| 84 | + ; GFX9_ELI-NEXT: %18.sub11:vreg_512 = COPY $vgpr11 |
| 85 | + ; GFX9_ELI-NEXT: %18.sub10:vreg_512 = COPY $vgpr10 |
| 86 | + ; GFX9_ELI-NEXT: %18.sub9:vreg_512 = COPY $vgpr9 |
| 87 | + ; GFX9_ELI-NEXT: %18.sub8:vreg_512 = COPY $vgpr8 |
| 88 | + ; GFX9_ELI-NEXT: %18.sub7:vreg_512 = COPY $vgpr7 |
| 89 | + ; GFX9_ELI-NEXT: %18.sub6:vreg_512 = COPY $vgpr6 |
| 90 | + ; GFX9_ELI-NEXT: %18.sub5:vreg_512 = COPY $vgpr5 |
| 91 | + ; GFX9_ELI-NEXT: %18.sub4:vreg_512 = COPY $vgpr4 |
| 92 | + ; GFX9_ELI-NEXT: %18.sub3:vreg_512 = COPY $vgpr3 |
| 93 | + ; GFX9_ELI-NEXT: %18.sub2:vreg_512 = COPY $vgpr2 |
| 94 | + ; GFX9_ELI-NEXT: %18.sub1:vreg_512 = COPY $vgpr1 |
| 95 | + ; GFX9_ELI-NEXT: %18.sub0:vreg_512 = COPY $vgpr0 |
| 96 | + ; GFX9_ELI-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 15, [[COPY1]], implicit $exec |
| 97 | + ; GFX9_ELI-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec |
| 98 | + ; GFX9_ELI-NEXT: {{ $}} |
| 99 | + ; GFX9_ELI-NEXT: bb.1: |
| 100 | + ; GFX9_ELI-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| 101 | + ; GFX9_ELI-NEXT: {{ $}} |
| 102 | + ; GFX9_ELI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_AND_B32_e32_]], implicit $exec |
| 103 | + ; GFX9_ELI-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[V_AND_B32_e32_]], implicit $exec |
| 104 | + ; GFX9_ELI-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def dead $scc, implicit $exec |
| 105 | + ; GFX9_ELI-NEXT: S_SET_GPR_IDX_ON [[V_READFIRSTLANE_B32_]], 1, implicit-def $m0, implicit-def $mode, implicit undef $m0, implicit $mode |
| 106 | + ; GFX9_ELI-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef %18.sub0, implicit $exec, implicit %18, implicit $m0 |
| 107 | + ; GFX9_ELI-NEXT: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode |
| 108 | + ; GFX9_ELI-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def dead $scc |
| 109 | + ; GFX9_ELI-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec |
| 110 | + ; GFX9_ELI-NEXT: {{ $}} |
| 111 | + ; GFX9_ELI-NEXT: bb.2: |
| 112 | + ; GFX9_ELI-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] |
| 113 | + ; GFX9_ELI-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] |
| 114 | + ; GFX9_ELI-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]] |
| 115 | + ; GFX9_ELI-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit undef $vgpr1, implicit undef $vgpr2, implicit undef $vgpr3 |
67 | 116 | bb.0:
|
68 | 117 | successors: %bb.1
|
69 | 118 | liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $sgpr30_sgpr31
|
|
0 commit comments