Skip to content

Commit c3c97ea

Browse files
authored
PeepholeOpt: Do not skip reg_sequence sources with subregs (#125667)
Contrary to the comment, this particular code is not responsible for handling any composes that may be required, and unhandled cases are already rejected later. Lift this restriction to permit composes and reg_sequence subregisters later.
1 parent 6ff33ed commit c3c97ea

File tree

2 files changed

+50
-6
lines changed

2 files changed

+50
-6
lines changed

llvm/lib/CodeGen/PeepholeOptimizer.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,9 +406,7 @@ class RegSequenceRewriter : public Rewriter {
406406

407407
const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
408408
Src.Reg = MOInsertedReg.getReg();
409-
// If we have to compose sub-register indices, bail out.
410-
if ((Src.SubReg = MOInsertedReg.getSubReg()))
411-
return false;
409+
Src.SubReg = MOInsertedReg.getSubReg();
412410

413411
// We want to track something that is compatible with the related
414412
// partial definition.

llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ body: |
2222
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2323
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
2424
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
25-
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[REG_SEQUENCE]].sub1, %subreg.sub0, [[REG_SEQUENCE]].sub0, %subreg.sub1
26-
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
27-
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
25+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
26+
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
27+
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
2828
; GCN-NEXT: KILL [[COPY3]], implicit [[COPY2]]
2929
%0:vgpr_32 = COPY $vgpr0
3030
%1:vgpr_32 = COPY $vgpr1
@@ -34,3 +34,49 @@ body: |
3434
%5:vgpr_32 = COPY %3.sub1
3535
KILL implicit %4, %5
3636
...
37+
38+
---
39+
name: reg_sequence_removal_2
40+
body: |
41+
bb.0:
42+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
43+
44+
; GCN-LABEL: name: reg_sequence_removal_2
45+
; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
46+
; GCN-NEXT: {{ $}}
47+
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
48+
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
49+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub2_sub3, [[COPY1]].sub2_sub3, %subreg.sub0_sub1
50+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]].sub1_sub2_sub3, %subreg.sub0_sub1_sub2, [[COPY1]].sub0, %subreg.sub3
51+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]].sub1, %subreg.sub0, [[REG_SEQUENCE]].sub2, %subreg.sub1, [[REG_SEQUENCE]].sub3, %subreg.sub2, [[COPY1]].sub0, %subreg.sub3
52+
; GCN-NEXT: KILL implicit [[REG_SEQUENCE2]]
53+
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
54+
%1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
55+
%2:vreg_128 = REG_SEQUENCE %0.sub0_sub1, %subreg.sub2_sub3, %1.sub2_sub3, %subreg.sub0_sub1
56+
%3:vreg_128 = REG_SEQUENCE %2.sub1_sub2_sub3, %subreg.sub0_sub1_sub2, %1.sub0, %subreg.sub3
57+
%4:vreg_128 = REG_SEQUENCE %3.sub0, %subreg.sub0, %3.sub1, %subreg.sub1, %3.sub2, %subreg.sub2, %3.sub3, %subreg.sub3
58+
KILL implicit %4
59+
...
60+
61+
---
62+
name: reg_sequence_removal_3
63+
body: |
64+
bb.0:
65+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
66+
67+
; GCN-LABEL: name: reg_sequence_removal_3
68+
; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
69+
; GCN-NEXT: {{ $}}
70+
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
71+
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
72+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub2_sub3, [[COPY1]].sub2_sub3, %subreg.sub0_sub1
73+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]].sub2_sub3, %subreg.sub2_sub3, [[COPY]].sub0, %subreg.sub1, [[COPY]].sub1, %subreg.sub0
74+
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub1, %subreg.sub0, [[COPY]].sub0, %subreg.sub1, [[COPY1]].sub2, %subreg.sub2, [[COPY1]].sub3, %subreg.sub3
75+
; GCN-NEXT: KILL implicit [[REG_SEQUENCE2]]
76+
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
77+
%1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7
78+
%2:vreg_128 = REG_SEQUENCE %0.sub0_sub1, %subreg.sub2_sub3, %1.sub2_sub3, %subreg.sub0_sub1
79+
%3:vreg_128 = REG_SEQUENCE %2.sub0_sub1, %subreg.sub2_sub3, %2.sub2, %subreg.sub1, %2.sub3, %subreg.sub0
80+
%4:vreg_128 = REG_SEQUENCE %3.sub0, %subreg.sub0, %3.sub1, %subreg.sub1, %3.sub2, %subreg.sub2, %3.sub3, %subreg.sub3
81+
KILL implicit %4
82+
...

0 commit comments

Comments
 (0)