|
| 1 | +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -simplify-mir -start-before=greedy,2 -stress-regalloc=4 -stop-before=virtregrewriter,2 -o - -verify-regalloc %s 2> %t.err | FileCheck %s |
| 2 | +# RUN: FileCheck -check-prefix=ERR %s < %t.err |
| 3 | + |
| 4 | +# To allocate the vreg_512_align2, the allocation will attempt to |
| 5 | +# inflate the register class to av_512_align2. This will ultimately |
| 6 | +# not work, and the allocation will fail. There is an unproductive |
| 7 | +# live range split, and we end up with a snippet copy of an |
| 8 | +# unspillable register. Recursive assignment of interfering ranges |
| 9 | +# during last chance recoloring would delete the unspillable snippet |
| 10 | +# live range. Make sure there's no use after free when rolling back |
| 11 | +# the last chance assignment. |
| 12 | + |
| 13 | +# ERR: error: <unknown>:0:0: ran out of registers during register allocation in function 'inflated_reg_class_copy_use_after_free' |
| 14 | +# ERR: error: <unknown>:0:0: ran out of registers during register allocation in function 'inflated_reg_class_copy_use_after_free_lane_subset' |
| 15 | + |
| 16 | +--- | |
| 17 | + define amdgpu_kernel void @inflated_reg_class_copy_use_after_free() { |
| 18 | + ret void |
| 19 | + } |
| 20 | + |
| 21 | + define amdgpu_kernel void @inflated_reg_class_copy_use_after_free_lane_subset() { |
| 22 | + ret void |
| 23 | + } |
| 24 | + |
| 25 | +... |
| 26 | + |
| 27 | +# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free |
| 28 | +# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3 |
| 29 | +# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) |
| 30 | +# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) |
| 31 | +# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec |
| 32 | +# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 { |
| 33 | +# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0 |
| 34 | +# CHECK-NEXT: } |
| 35 | +# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 { |
| 36 | +# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0 |
| 37 | +# CHECK-NEXT: } |
| 38 | +# CHECK-NEXT: undef [[SPLIT2:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT1]].sub2_sub3 { |
| 39 | +# CHECK-NEXT: internal [[SPLIT2]].sub0:av_512_align2 = COPY [[SPLIT1]].sub0 |
| 40 | +# CHECK-NEXT: } |
| 41 | +# CHECK-NEXT: SI_SPILL_AV512_SAVE [[SPLIT2]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5) |
| 42 | +# CHECK-NEXT: [[RESTORE1:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) |
| 43 | +# CHECK-NEXT: undef [[SPLIT3:%[0-9]+]].sub0_sub1:av_512_align2 = COPY [[RESTORE1]].sub0_sub1 |
| 44 | +# CHECK-NEXT: [[RESTORE2:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5) |
| 45 | +# CHECK-NEXT: undef [[SPLIT3:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[RESTORE2]].sub2_sub3 { |
| 46 | +# CHECK-NEXT: internal [[SPLIT3]].sub0:av_512_align2 = COPY [[RESTORE2]].sub0 |
| 47 | +# CHECK-NEXT: } |
| 48 | +# CHECK-NEXT: undef [[SPLIT4:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT3]].sub2_sub3 { |
| 49 | +# CHECK-NEXT: internal [[SPLIT4]].sub0:av_512_align2 = COPY [[SPLIT3]].sub0 |
| 50 | +# CHECK-NEXT: } |
| 51 | +# CHECK-NEXT: [[SPLIT5:%[0-9]+]].sub2:av_512_align2 = COPY [[SPLIT4]].sub3 |
| 52 | +# CHECK-NEXT: undef [[SPLIT6:%[0-9]+]].sub0_sub1_sub2:av_512_align2 = COPY [[SPLIT5]].sub0_sub1_sub2 |
| 53 | +# CHECK-NEXT: undef [[SPLIT7:%[0-9]+]].sub0_sub1_sub2:av_512_align2 = COPY [[SPLIT6]].sub0_sub1_sub2 |
| 54 | +# CHECK-NEXT: undef [[SPLIT8:%[0-9]+]].sub0:av_512_align2 = COPY [[SPLIT4]].sub0 { |
| 55 | +# CHECK-NEXT: internal [[SPLIT8]].sub2:av_512_align2 = COPY [[SPLIT4]].sub2 |
| 56 | +# CHECK-NEXT: } |
| 57 | +# CHECK-NEXT: [[SPLIT9:%[0-9]+]].sub3:av_512_align2 = COPY [[SPLIT8]].sub2 |
| 58 | +# CHECK-NEXT: undef [[SPLIT10:%[0-9]+]].sub0_sub1_sub2_sub3:av_512_align2 = COPY [[SPLIT9]].sub0_sub1_sub2_sub3 |
| 59 | +# CHECK-NEXT: undef [[SPLIT13:%[0-9]+]].sub0_sub1_sub2_sub3:vreg_512_align2 = COPY [[SPLIT10]].sub0_sub1_sub2_sub3 |
| 60 | +# CHECK-NEXT: [[MFMA_USE1:%[0-9]+]].sub4:vreg_512_align2 = COPY [[SPLIT8]].sub0 |
| 61 | +# CHECK-NEXT: [[MFMA_USE1]].sub5:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 62 | +# CHECK-NEXT: [[MFMA_USE1]].sub6:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 63 | +# CHECK-NEXT: [[MFMA_USE1]].sub7:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 64 | +# CHECK-NEXT: [[MFMA_USE1]].sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 65 | +# CHECK-NEXT: [[MFMA_USE1]].sub9:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 66 | +# CHECK-NEXT: [[MFMA_USE1]].sub10:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 67 | +# CHECK-NEXT: [[MFMA_USE1]].sub11:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 68 | +# CHECK-NEXT: [[MFMA_USE1]].sub12:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 69 | +# CHECK-NEXT: [[MFMA_USE1]].sub13:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 70 | +# CHECK-NEXT: [[MFMA_USE1]].sub14:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 71 | +# CHECK-NEXT: [[MFMA_USE1]].sub15:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 72 | +# CHECK-NEXT: [[MFMA_USE1]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_mac_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[MFMA_USE1]], 0, 0, 0, implicit $mode, implicit $exec |
| 73 | + |
| 74 | +--- |
| 75 | +name: inflated_reg_class_copy_use_after_free |
| 76 | +tracksRegLiveness: true |
| 77 | +machineFunctionInfo: |
| 78 | + isEntryFunction: true |
| 79 | + scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75' |
| 80 | + stackPtrOffsetReg: '$sgpr32' |
| 81 | + occupancy: 7 |
| 82 | + vgprForAGPRCopy: '$vgpr255' |
| 83 | + sgprForEXECCopy: '$sgpr74_sgpr75' |
| 84 | +body: | |
| 85 | + bb.0: |
| 86 | + liveins: $vgpr0, $sgpr4_sgpr5 |
| 87 | +
|
| 88 | + %0:vgpr_32 = IMPLICIT_DEF |
| 89 | + renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed undef renamable $sgpr4_sgpr5, 0, 0 :: (load (s64), addrspace 4) |
| 90 | + S_NOP 0, implicit-def undef %1.sub12_sub13_sub14_sub15:vreg_512_align2 |
| 91 | + S_NOP 0, implicit-def %1.sub8_sub9_sub10_sub11:vreg_512_align2 |
| 92 | + S_NOP 0, implicit-def %1.sub4_sub5_sub6_sub7:vreg_512_align2 |
| 93 | + S_NOP 0, implicit-def %1.sub0_sub1_sub2_sub3:vreg_512_align2 |
| 94 | + early-clobber %2:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, %1, 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec |
| 95 | + %1.sub2:vreg_512_align2 = COPY %2.sub3 |
| 96 | + %1.sub3:vreg_512_align2 = COPY %2.sub2 |
| 97 | + %1.sub4:vreg_512_align2 = COPY %2.sub0 |
| 98 | + %1.sub5:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 99 | + %1.sub6:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 100 | + %1.sub7:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 101 | + %1.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 102 | + %1.sub9:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 103 | + %1.sub10:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 104 | + %1.sub11:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 105 | + %1.sub12:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 106 | + %1.sub13:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 107 | + %1.sub14:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 108 | + %1.sub15:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 109 | + %1:vreg_512_align2 = V_MFMA_F32_16X16X1F32_mac_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, %1, 0, 0, 0, implicit $mode, implicit $exec |
| 110 | + GLOBAL_STORE_DWORDX4_SADDR undef %3:vgpr_32, %1.sub12_sub13_sub14_sub15, undef renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), addrspace 1) |
| 111 | + S_ENDPGM 0 |
| 112 | +
|
| 113 | +... |
| 114 | + |
| 115 | +# This test is similar to except it is still broken when the use |
| 116 | +# instruction does not read the full set of lanes after one attempted fix. |
| 117 | + |
| 118 | +# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset |
| 119 | +# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3 |
| 120 | +# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) |
| 121 | +# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) |
| 122 | +# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7 |
| 123 | +# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 { |
| 124 | +# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0 |
| 125 | +# CHECK-NEXT: } |
| 126 | +# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 { |
| 127 | +# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0 |
| 128 | +# CHECK-NEXT: } |
| 129 | +# CHECK-NEXT: undef [[SPLIT2:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT1]].sub2_sub3 { |
| 130 | +# CHECK-NEXT: internal [[SPLIT2]].sub0:av_512_align2 = COPY [[SPLIT1]].sub0 |
| 131 | +# CHECK-NEXT: } |
| 132 | +# CHECK-NEXT: SI_SPILL_AV512_SAVE [[SPLIT2]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5) |
| 133 | +# CHECK-NEXT: [[RESTORE_1:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) |
| 134 | +# CHECK-NEXT: undef [[SPLIT3:%[0-9]+]].sub0_sub1:av_512_align2 = COPY [[RESTORE_1]].sub0_sub1 |
| 135 | +# CHECK-NEXT: [[RESTORE_2:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5) |
| 136 | +# CHECK-NEXT: undef [[SPLIT4:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[RESTORE_2]].sub2_sub3 { |
| 137 | +# CHECK-NEXT: internal [[SPLIT4]].sub0:av_512_align2 = COPY [[RESTORE_2]].sub0 |
| 138 | +# CHECK-NEXT: } |
| 139 | +# CHECK-NEXT: undef [[SPLIT5:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT4]].sub2_sub3 { |
| 140 | +# CHECK-NEXT: internal [[SPLIT5]].sub0:av_512_align2 = COPY [[SPLIT4]].sub0 |
| 141 | +# CHECK-NEXT: } |
| 142 | +# CHECK-NEXT: [[SPLIT3]].sub2:av_512_align2 = COPY [[SPLIT5]].sub3 |
| 143 | +# CHECK-NEXT: undef [[SPLIT6:%[0-9]+]].sub0_sub1_sub2:av_512_align2 = COPY [[SPLIT3]].sub0_sub1_sub2 |
| 144 | +# CHECK-NEXT: undef [[SPLIT7:%[0-9]+]].sub0_sub1_sub2:av_512_align2 = COPY [[SPLIT6]].sub0_sub1_sub2 |
| 145 | +# CHECK-NEXT: undef [[SPLIT8:%[0-9]+]].sub0:av_512_align2 = COPY [[SPLIT5]].sub0 { |
| 146 | +# CHECK-NEXT: internal [[SPLIT8]].sub2:av_512_align2 = COPY [[SPLIT5]].sub2 |
| 147 | +# CHECK-NEXT: } |
| 148 | +# CHECK-NEXT: [[SPLIT7]].sub3:av_512_align2 = COPY [[SPLIT8]].sub2 |
| 149 | +# CHECK-NEXT: undef [[SPLIT9:%[0-9]+]].sub0_sub1_sub2_sub3:av_512_align2 = COPY [[SPLIT7]].sub0_sub1_sub2_sub3 |
| 150 | +# CHECK-NEXT: undef [[LAST_USE:%[0-9]+]].sub0_sub1_sub2_sub3:vreg_512_align2 = COPY [[SPLIT9]].sub0_sub1_sub2_sub3 |
| 151 | +# CHECK-NEXT: [[LAST_USE]].sub4:vreg_512_align2 = COPY [[SPLIT8]].sub0 |
| 152 | +# CHECK-NEXT: [[LAST_USE]].sub5:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 153 | +# CHECK-NEXT: [[LAST_USE]].sub6:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 154 | +# CHECK-NEXT: [[LAST_USE]].sub7:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 155 | +# CHECK-NEXT: [[LAST_USE]].sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 156 | +# CHECK-NEXT: [[LAST_USE]].sub9:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 157 | +# CHECK-NEXT: [[LAST_USE]].sub10:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 158 | +# CHECK-NEXT: [[LAST_USE]].sub11:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 159 | +# CHECK-NEXT: [[LAST_USE]].sub12:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 160 | +# CHECK-NEXT: [[LAST_USE]].sub13:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 161 | +# CHECK-NEXT: [[LAST_USE]].sub14:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 162 | +# CHECK-NEXT: [[LAST_USE]].sub15:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 163 | +# CHECK-NEXT: S_NOP 0, implicit-def [[LAST_USE]], implicit [[LAST_USE]].sub0_sub1_sub2_sub3, implicit [[LAST_USE]].sub4_sub5_sub6_sub7, implicit [[LAST_USE]].sub8_sub9_sub10_sub11 |
| 164 | + |
| 165 | +--- |
| 166 | +name: inflated_reg_class_copy_use_after_free_lane_subset |
| 167 | +tracksRegLiveness: true |
| 168 | +machineFunctionInfo: |
| 169 | + isEntryFunction: true |
| 170 | + scratchRSrcReg: '$sgpr72_sgpr73_sgpr74_sgpr75' |
| 171 | + stackPtrOffsetReg: '$sgpr32' |
| 172 | + occupancy: 7 |
| 173 | + vgprForAGPRCopy: '$vgpr255' |
| 174 | + sgprForEXECCopy: '$sgpr74_sgpr75' |
| 175 | +body: | |
| 176 | + bb.0: |
| 177 | + liveins: $vgpr0, $sgpr4_sgpr5 |
| 178 | +
|
| 179 | + %0:vgpr_32 = IMPLICIT_DEF |
| 180 | + renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed undef renamable $sgpr4_sgpr5, 0, 0 :: (load (s64), addrspace 4) |
| 181 | + S_NOP 0, implicit-def undef %1.sub12_sub13_sub14_sub15:vreg_512_align2 |
| 182 | + S_NOP 0, implicit-def %1.sub8_sub9_sub10_sub11:vreg_512_align2 |
| 183 | + S_NOP 0, implicit-def %1.sub4_sub5_sub6_sub7:vreg_512_align2 |
| 184 | + S_NOP 0, implicit-def %1.sub0_sub1_sub2_sub3:vreg_512_align2 |
| 185 | + S_NOP 0, implicit-def early-clobber %2:vreg_512_align2, implicit %1.sub0_sub1_sub2_sub3, implicit %1.sub4_sub5_sub6_sub7 |
| 186 | + %1.sub2:vreg_512_align2 = COPY %2.sub3 |
| 187 | + %1.sub3:vreg_512_align2 = COPY %2.sub2 |
| 188 | + %1.sub4:vreg_512_align2 = COPY %2.sub0 |
| 189 | + %1.sub5:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 190 | + %1.sub6:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 191 | + %1.sub7:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 192 | + %1.sub8:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 193 | + %1.sub9:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 194 | + %1.sub10:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 195 | + %1.sub11:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 196 | + %1.sub12:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 197 | + %1.sub13:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 198 | + %1.sub14:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 199 | + %1.sub15:vreg_512_align2 = V_MOV_B32_e32 0, implicit $exec |
| 200 | + S_NOP 0, implicit-def %1:vreg_512_align2, implicit %1.sub0_sub1_sub2_sub3, implicit %1.sub4_sub5_sub6_sub7, implicit %1.sub8_sub9_sub10_sub11 |
| 201 | + GLOBAL_STORE_DWORDX4_SADDR undef %3:vgpr_32, %1.sub12_sub13_sub14_sub15, undef renamable $sgpr0_sgpr1, 96, 0, implicit $exec :: (store (s128), addrspace 1) |
| 202 | + S_ENDPGM 0 |
| 203 | +
|
| 204 | +... |
0 commit comments