Skip to content

Commit a0841df

Browse files
committed
[BPF] Fix a bug in peephole optimization
One of current peephole optimiations is to remove SLL/SRL if the sub register has been zero extended. This phase has two bugs and one limitations. First, for the physical subregister used in pseudo insn COPY like below, it permits incorrect optimization. %0:gpr32 = COPY $w0 ... %4:gpr = MOV_32_64 %0:gpr32 %5:gpr = SLL_ri %4:gpr(tied-def 0), 32 %6:gpr = SRA_ri %5:gpr(tied-def 0), 32 The $w0 could be from the return value of a previous function call and its upper 32-bit value might contain some non-zero values. The same applies to function arguments. Second, the current code may permits removing SLL/SRA like below: %0:gpr32 = COPY $w0 %1:gpr32 = COPY %0:gpr32 ... %4:gpr = MOV_32_64 %1:gpr32 %5:gpr = SLL_ri %4:gpr(tied-def 0), 32 %6:gpr = SRA_ri %5:gpr(tied-def 0), 32 The reason is that it did not follow def-use chain to skip all intermediate 32bit-to-32bit COPY instructions. The current implementation is also very conservative for PHI instructions. If any PHI insn component is another PHI or COPY insn, it will just permit SLL/SRA. This patch fixed the issue as follows: - During def/use chain traversal, if any physical register is read, SLL/SRA will be preserved as these physical registers are mostly from function return values or current function arguments. - Recursively visit all COPY and PHI instructions.
1 parent cd8748a commit a0841df

File tree

5 files changed

+163
-29
lines changed

5 files changed

+163
-29
lines changed

llvm/lib/Target/BPF/BPFMIPeephole.cpp

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ struct BPFMIPeephole : public MachineFunctionPass {
5151
// Initialize class variables.
5252
void initialize(MachineFunction &MFParm);
5353

54+
bool isCopyFrom32Def(MachineInstr *CopyMI);
55+
bool isInsnFrom32Def(MachineInstr *DefInsn);
56+
bool isPhiFrom32Def(MachineInstr *MovMI);
5457
bool isMovFrom32Def(MachineInstr *MovMI);
5558
bool eliminateZExtSeq(void);
5659

@@ -75,42 +78,77 @@ void BPFMIPeephole::initialize(MachineFunction &MFParm) {
7578
LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
7679
}
7780

78-
bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
81+
bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
7982
{
80-
MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
83+
MachineOperand &opnd = CopyMI->getOperand(1);
8184

82-
LLVM_DEBUG(dbgs() << " Def of Mov Src:");
83-
LLVM_DEBUG(DefInsn->dump());
85+
if (!opnd.isReg())
86+
return false;
8487

85-
if (!DefInsn)
88+
// Return false if getting value from a 32bit physical register.
89+
// Most likely, this physical register is aliased to
90+
// function call return value or current function parameters.
91+
Register Reg = opnd.getReg();
92+
if (!Register::isVirtualRegister(Reg))
8693
return false;
8794

88-
if (DefInsn->isPHI()) {
89-
for (unsigned i = 1, e = DefInsn->getNumOperands(); i < e; i += 2) {
90-
MachineOperand &opnd = DefInsn->getOperand(i);
95+
if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
96+
return false;
9197

92-
if (!opnd.isReg())
93-
return false;
98+
MachineInstr *DefInsn = MRI->getVRegDef(Reg);
99+
if (!isInsnFrom32Def(DefInsn))
100+
return false;
94101

95-
MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
96-
// quick check on PHI incoming definitions.
97-
if (!PhiDef || PhiDef->isPHI() || PhiDef->getOpcode() == BPF::COPY)
98-
return false;
99-
}
100-
}
102+
return true;
103+
}
101104

102-
if (DefInsn->getOpcode() == BPF::COPY) {
103-
MachineOperand &opnd = DefInsn->getOperand(1);
105+
bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
106+
{
107+
for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
108+
MachineOperand &opnd = PhiMI->getOperand(i);
104109

105110
if (!opnd.isReg())
106111
return false;
107112

108-
Register Reg = opnd.getReg();
109-
if ((Register::isVirtualRegister(Reg) &&
110-
MRI->getRegClass(Reg) == &BPF::GPRRegClass))
113+
MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
114+
if (!PhiDef)
115+
return false;
116+
if (PhiDef->isPHI() && !isPhiFrom32Def(PhiDef))
117+
return false;
118+
if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
119+
return false;
120+
}
121+
122+
return true;
123+
}
124+
125+
// The \p DefInsn instruction defines a virtual register.
126+
bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
127+
{
128+
if (!DefInsn)
129+
return false;
130+
131+
if (DefInsn->isPHI()) {
132+
if (!isPhiFrom32Def(DefInsn))
133+
return false;
134+
} else if (DefInsn->getOpcode() == BPF::COPY) {
135+
if (!isCopyFrom32Def(DefInsn))
111136
return false;
112137
}
113138

139+
return true;
140+
}
141+
142+
bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
143+
{
144+
MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
145+
146+
LLVM_DEBUG(dbgs() << " Def of Mov Src:");
147+
LLVM_DEBUG(DefInsn->dump());
148+
149+
if (!isInsnFrom32Def(DefInsn))
150+
return false;
151+
114152
LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
115153

116154
return true;

llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ entry:
5555
%c.d = select i1 %cmp, i32 %c, i32 %d
5656
ret i32 %c.d
5757
}
58+
; CHECK-LABEL: select_cc_32
59+
; CHECK: r{{[0-9]+}} <<= 32
60+
; CHECK-NEXT: r{{[0-9]+}} >>= 32
5861

5962
; Function Attrs: norecurse nounwind readnone
6063
define dso_local i64 @select_cc_32_64(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 {
@@ -63,6 +66,9 @@ entry:
6366
%c.d = select i1 %cmp, i64 %c, i64 %d
6467
ret i64 %c.d
6568
}
69+
; CHECK-LABEL: select_cc_32_64
70+
; CHECK: r{{[0-9]+}} <<= 32
71+
; CHECK-NEXT: r{{[0-9]+}} >>= 32
6672

6773
; Function Attrs: norecurse nounwind readnone
6874
define dso_local i32 @select_cc_64_32(i64 %a, i64 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
@@ -71,6 +77,8 @@ entry:
7177
%c.d = select i1 %cmp, i32 %c, i32 %d
7278
ret i32 %c.d
7379
}
80+
; CHECK-LABEL: select_cc_64_32
81+
; CHECK-NOT: r{{[0-9]+}} <<= 32
7482

7583
; Function Attrs: norecurse nounwind readnone
7684
define dso_local i32 @selecti_cc_32(i32 %a, i32 %c, i32 %d) local_unnamed_addr #0 {
@@ -79,6 +87,9 @@ entry:
7987
%c.d = select i1 %cmp, i32 %c, i32 %d
8088
ret i32 %c.d
8189
}
90+
; CHECK-LABEL: selecti_cc_32
91+
; CHECK: r{{[0-9]+}} <<= 32
92+
; CHECK-NEXT: r{{[0-9]+}} >>= 32
8293

8394
; Function Attrs: norecurse nounwind readnone
8495
define dso_local i64 @selecti_cc_32_64(i32 %a, i64 %c, i64 %d) local_unnamed_addr #0 {
@@ -87,6 +98,9 @@ entry:
8798
%c.d = select i1 %cmp, i64 %c, i64 %d
8899
ret i64 %c.d
89100
}
101+
; CHECK-LABEL: selecti_cc_32_64
102+
; CHECK: r{{[0-9]+}} <<= 32
103+
; CHECK-NEXT: r{{[0-9]+}} >>= 32
90104

91105
; Function Attrs: norecurse nounwind readnone
92106
define dso_local i32 @selecti_cc_64_32(i64 %a, i32 %c, i32 %d) local_unnamed_addr #0 {
@@ -95,6 +109,5 @@ entry:
95109
%c.d = select i1 %cmp, i32 %c, i32 %d
96110
ret i32 %c.d
97111
}
98-
; There shouldn't be any type promotion, all of them are expected to be
99-
; eliminated by peephole optimization.
112+
; CHECK-LABEL: selecti_cc_64_32
100113
; CHECK-NOT: r{{[0-9]+}} <<= 32
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: llc -O2 -march=bpfel -mcpu=v2 -mattr=+alu32 < %s | FileCheck %s
2+
;
3+
; For the below test case, 'b' in 'ret == b' needs SLL/SLR.
4+
; 'ret' in 'ret == b' does not need SLL/SLR as all 'ret' values
5+
; are assigned through 'w<reg> = <value>' alu32 operations.
6+
;
7+
; extern int helper(int);
8+
; int test(int a, int b, int c, int d) {
9+
; int ret;
10+
; if (a < b)
11+
; ret = (c < d) ? -1 : 0;
12+
; else
13+
; ret = (c < a) ? 1 : 2;
14+
; return helper(ret == b);
15+
; }
16+
17+
define dso_local i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr {
18+
entry:
19+
%cmp = icmp slt i32 %a, %b
20+
%cmp1 = icmp slt i32 %c, %d
21+
%cond = sext i1 %cmp1 to i32
22+
%cmp2 = icmp slt i32 %c, %a
23+
%cond3 = select i1 %cmp2, i32 1, i32 2
24+
%ret.0 = select i1 %cmp, i32 %cond, i32 %cond3
25+
%cmp4 = icmp eq i32 %ret.0, %b
26+
%conv = zext i1 %cmp4 to i32
27+
%call = tail call i32 @helper(i32 %conv)
28+
ret i32 %call
29+
}
30+
; CHECK: r{{[0-9]+}} >>= 32
31+
; CHECK-NOT: r{{[0-9]+}} >>= 32
32+
; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto
33+
34+
declare dso_local i32 @helper(i32) local_unnamed_addr
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: llc -O2 -march=bpfel -mcpu=v2 -mattr=+alu32 < %s | FileCheck %s
2+
;
3+
; For the below test case, both 'ret' and 'b' at 'ret == b'
4+
; need SLL/SLR. For 'ret', 'ret = a' may receive the value
5+
; from argument with high 32-bit invalid data.
6+
;
7+
; extern int helper(int);
8+
; int test(int a, int b, int c, int d) {
9+
; int ret;
10+
; if (a < b)
11+
; ret = (c < d) ? a : 0;
12+
; else
13+
; ret = (c < a) ? 1 : 2;
14+
; return helper(ret == b);
15+
; }
16+
17+
define dso_local i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr {
18+
entry:
19+
%cmp = icmp slt i32 %a, %b
20+
%cmp1 = icmp slt i32 %c, %d
21+
%cond = select i1 %cmp1, i32 %a, i32 0
22+
%cmp2 = icmp slt i32 %c, %a
23+
%cond3 = select i1 %cmp2, i32 1, i32 2
24+
%ret.0 = select i1 %cmp, i32 %cond, i32 %cond3
25+
%cmp4 = icmp eq i32 %ret.0, %b
26+
%conv = zext i1 %cmp4 to i32
27+
%call = tail call i32 @helper(i32 %conv)
28+
ret i32 %call
29+
}
30+
; CHECK: r{{[0-9]+}} >>= 32
31+
; CHECK: r{{[0-9]+}} >>= 32
32+
; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto
33+
34+
declare dso_local i32 @helper(i32) local_unnamed_addr

llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ define dso_local i64 @select_u(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_add
4747
entry:
4848
%cmp = icmp ugt i32 %a, %b
4949
%c.d = select i1 %cmp, i64 %c, i64 %d
50-
; CHECK-NOT: r{{[0-9]+}} <<= 32
51-
; CHECK-NOT: r{{[0-9]+}} >>= 32
50+
; CHECK: r{{[0-9]+}} <<= 32
51+
; CHECK-NEXT: r{{[0-9]+}} >>= 32
5252
; CHECK: if r{{[0-9]+}} {{<|>}} r{{[0-9]+}} goto
5353
ret i64 %c.d
5454
}
@@ -58,8 +58,8 @@ define dso_local i64 @select_u_2(i32 %a, i64 %b, i64 %c, i64 %d) local_unnamed_a
5858
; CHECK-LABEL: select_u_2:
5959
entry:
6060
%conv = zext i32 %a to i64
61-
; CHECK-NOT: r{{[0-9]+}} <<= 32
62-
; CHECK-NOT: r{{[0-9]+}} >>= 32
61+
; CHECK: r{{[0-9]+}} <<= 32
62+
; CHECK-NEXT: r{{[0-9]+}} >>= 32
6363
%cmp = icmp ugt i64 %conv, %b
6464
%c.d = select i1 %cmp, i64 %c, i64 %d
6565
ret i64 %c.d
@@ -100,8 +100,23 @@ define dso_local i32* @inc_p(i32* readnone %p, i32 %a) local_unnamed_addr #0 {
100100
; CHECK-LABEL: inc_p:
101101
entry:
102102
%idx.ext = zext i32 %a to i64
103-
; CHECK-NOT: r{{[0-9]+}} <<= 32
104-
; CHECK-NOT: r{{[0-9]+}} >>= 32
103+
; CHECK: r{{[0-9]+}} <<= 32
104+
; CHECK-NEXT: r{{[0-9]+}} >>= 32
105105
%add.ptr = getelementptr inbounds i32, i32* %p, i64 %idx.ext
106106
ret i32* %add.ptr
107107
}
108+
109+
define dso_local i32 @test() local_unnamed_addr {
110+
; CHECK-LABEL: test:
111+
entry:
112+
%call = tail call i32 bitcast (i32 (...)* @helper to i32 ()*)()
113+
%cmp = icmp sgt i32 %call, 6
114+
; The shifts can't be optimized out because %call comes from function call
115+
; return i32 so the high bits might be invalid.
116+
; CHECK: r{{[0-9]+}} <<= 32
117+
; CHECK-NEXT: r{{[0-9]+}} s>>= 32
118+
%cond = zext i1 %cmp to i32
119+
; CHECK: if r{{[0-9]+}} s{{<|>}} {{[0-9]+}} goto
120+
ret i32 %cond
121+
}
122+
declare dso_local i32 @helper(...) local_unnamed_addr

0 commit comments

Comments
 (0)