Skip to content

Commit 8d69e95

Browse files
[RISCV] Add combine for shadd family of instructions. (#130829)
For example for the following situation: %6:gpr = SLLI %2:gpr, 2 %7:gpr = ADDI killed %6:gpr, 24 %8:gpr = ADD %0:gpr, %7:gpr If we swap the two add instrucions we can merge the shift and add. The final code will look something like this: %7 = SH2ADD %0, %2 %8 = ADDI %7, 24
1 parent 96efb21 commit 8d69e95

File tree

2 files changed

+261
-1
lines changed

2 files changed

+261
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "llvm/CodeGen/MachineInstrBuilder.h"
3030
#include "llvm/CodeGen/MachineJumpTableInfo.h"
3131
#include "llvm/CodeGen/MachineRegisterInfo.h"
32+
#include "llvm/CodeGen/SDPatternMatch.h"
3233
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
3334
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
3435
#include "llvm/CodeGen/ValueTypes.h"
@@ -79,6 +80,12 @@ static cl::opt<int>
7980
"use for creating a floating-point immediate value"),
8081
cl::init(2));
8182

83+
static cl::opt<bool>
84+
ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
85+
cl::desc("Swap add and addi in cases where the add may "
86+
"be combined with a shift"),
87+
cl::init(true));
88+
8289
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
8390
const RISCVSubtarget &STI)
8491
: TargetLowering(TM), Subtarget(STI) {
@@ -14441,6 +14448,67 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
1444114448
return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
1444214449
}
1444314450

14451+
// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
14452+
// or 3.
14453+
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other,
14454+
SelectionDAG &DAG) {
14455+
using namespace llvm::SDPatternMatch;
14456+
14457+
// Looking for a reg-reg add and not an addi.
14458+
if (isa<ConstantSDNode>(N->getOperand(1)))
14459+
return SDValue();
14460+
14461+
// Based on testing it seems that performance degrades if the ADDI has
14462+
// more than 2 uses.
14463+
if (AddI->use_size() > 2)
14464+
return SDValue();
14465+
14466+
APInt AddVal;
14467+
SDValue SHLVal;
14468+
if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
14469+
return SDValue();
14470+
14471+
APInt VShift;
14472+
if (!sd_match(SHLVal, m_BinOp(ISD::SHL, m_Value(), m_ConstInt(VShift))))
14473+
return SDValue();
14474+
14475+
if (VShift.slt(1) || VShift.sgt(3))
14476+
return SDValue();
14477+
14478+
SDLoc DL(N);
14479+
EVT VT = N->getValueType(0);
14480+
// The shift must be positive but the add can be signed.
14481+
uint64_t ShlConst = VShift.getZExtValue();
14482+
int64_t AddConst = AddVal.getSExtValue();
14483+
14484+
SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
14485+
DAG.getConstant(ShlConst, DL, VT), Other);
14486+
return DAG.getNode(ISD::ADD, DL, VT, SHADD,
14487+
DAG.getSignedConstant(AddConst, DL, VT));
14488+
}
14489+
14490+
// Optimize (add (add (shl x, c0), c1), y) ->
14491+
// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
14492+
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG,
14493+
const RISCVSubtarget &Subtarget) {
14494+
// Perform this optimization only in the zba extension.
14495+
if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba())
14496+
return SDValue();
14497+
14498+
// Skip for vector types and larger types.
14499+
EVT VT = N->getValueType(0);
14500+
if (VT != Subtarget.getXLenVT())
14501+
return SDValue();
14502+
14503+
SDValue AddI = N->getOperand(0);
14504+
SDValue Other = N->getOperand(1);
14505+
if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
14506+
return V;
14507+
if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
14508+
return V;
14509+
return SDValue();
14510+
}
14511+
1444414512
// Combine a constant select operand into its use:
1444514513
//
1444614514
// (and (select cond, -1, c), x)
@@ -14682,9 +14750,12 @@ static SDValue performADDCombine(SDNode *N,
1468214750
return V;
1468314751
if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
1468414752
return V;
14685-
if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14753+
if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
1468614754
if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
1468714755
return V;
14756+
if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
14757+
return V;
14758+
}
1468814759
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
1468914760
return V;
1469014761
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv32-unknown-elf -mattr=+zba %s -o - | FileCheck %s
3+
4+
declare i32 @callee1(i32 noundef)
5+
declare i32 @callee2(i32 noundef, i32 noundef)
6+
declare i32 @callee(i32 noundef, i32 noundef, i32 noundef, i32 noundef)
7+
8+
define void @t1(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 {
9+
; CHECK-LABEL: t1:
10+
; CHECK: # %bb.0: # %entry
11+
; CHECK-NEXT: sh2add a2, a0, a2
12+
; CHECK-NEXT: sh2add a1, a0, a1
13+
; CHECK-NEXT: addi a1, a1, 45
14+
; CHECK-NEXT: addi a2, a2, 45
15+
; CHECK-NEXT: sh2add a3, a0, a3
16+
; CHECK-NEXT: mv a0, a1
17+
; CHECK-NEXT: tail callee
18+
entry:
19+
%shl = shl i32 %a, 2
20+
%add = add nsw i32 %shl, 45
21+
%add1 = add nsw i32 %add, %b
22+
%add3 = add nsw i32 %add, %c
23+
%add5 = add nsw i32 %shl, %d
24+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5)
25+
ret void
26+
}
27+
28+
define void @t2(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 {
29+
; CHECK-LABEL: t2:
30+
; CHECK: # %bb.0: # %entry
31+
; CHECK-NEXT: slli a0, a0, 2
32+
; CHECK-NEXT: addi a5, a0, 42
33+
; CHECK-NEXT: add a4, a5, a1
34+
; CHECK-NEXT: add a3, a5, a2
35+
; CHECK-NEXT: mv a1, a5
36+
; CHECK-NEXT: mv a2, a4
37+
; CHECK-NEXT: tail callee
38+
entry:
39+
%shl = shl i32 %a, 2
40+
%add = add nsw i32 %shl, 42
41+
%add4 = add nsw i32 %add, %b
42+
%add7 = add nsw i32 %add, %c
43+
%call = tail call i32 @callee(i32 noundef %shl, i32 noundef %add, i32 noundef %add4, i32 noundef %add7)
44+
ret void
45+
}
46+
47+
define void @t3(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) #0 {
48+
; CHECK-LABEL: t3:
49+
; CHECK: # %bb.0: # %entry
50+
; CHECK-NEXT: slli a0, a0, 2
51+
; CHECK-NEXT: addi a5, a0, 42
52+
; CHECK-NEXT: add a0, a5, a1
53+
; CHECK-NEXT: add a1, a5, a2
54+
; CHECK-NEXT: add a2, a5, a3
55+
; CHECK-NEXT: add a3, a5, a4
56+
; CHECK-NEXT: tail callee
57+
entry:
58+
%shl = shl i32 %a, 2
59+
%add = add nsw i32 %shl, 42
60+
%add1 = add nsw i32 %add, %b
61+
%add2 = add nsw i32 %add, %c
62+
%add3 = add nsw i32 %add, %d
63+
%add4 = add nsw i32 %add, %e
64+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add2, i32 noundef %add3, i32 noundef %add4)
65+
ret void
66+
}
67+
68+
define void @t4(i32 noundef %a, i32 noundef %b) #0 {
69+
; CHECK-LABEL: t4:
70+
; CHECK: # %bb.0: # %entry
71+
; CHECK-NEXT: sh2add a0, a0, a1
72+
; CHECK-NEXT: addi a0, a0, 42
73+
; CHECK-NEXT: tail callee1
74+
entry:
75+
%shl = shl i32 %a, 2
76+
%add = add nsw i32 %shl, 42
77+
%add1 = add nsw i32 %add, %b
78+
%call = tail call i32 @callee1(i32 noundef %add1)
79+
ret void
80+
}
81+
82+
define void @t5(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 {
83+
; CHECK-LABEL: t5:
84+
; CHECK: # %bb.0: # %entry
85+
; CHECK-NEXT: sh2add a2, a0, a2
86+
; CHECK-NEXT: sh2add a0, a0, a1
87+
; CHECK-NEXT: addi a0, a0, 42
88+
; CHECK-NEXT: addi a1, a2, 42
89+
; CHECK-NEXT: tail callee2
90+
entry:
91+
%shl = shl i32 %a, 2
92+
%add = add nsw i32 %shl, 42
93+
%add1 = add nsw i32 %add, %b
94+
%add2 = add nsw i32 %add, %c
95+
%call = tail call i32 @callee2(i32 noundef %add1, i32 noundef %add2)
96+
ret void
97+
}
98+
99+
define void @t6(i32 noundef %a, i32 noundef %b) #0 {
100+
; CHECK-LABEL: t6:
101+
; CHECK: # %bb.0: # %entry
102+
; CHECK-NEXT: slli a2, a0, 2
103+
; CHECK-NEXT: sh2add a0, a0, a1
104+
; CHECK-NEXT: addi a0, a0, 42
105+
; CHECK-NEXT: mv a1, a2
106+
; CHECK-NEXT: mv a3, a2
107+
; CHECK-NEXT: tail callee
108+
entry:
109+
%shl = shl i32 %a, 2
110+
%add = add nsw i32 %shl, 42
111+
%add1 = add nsw i32 %add, %b
112+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %shl, i32 noundef %shl, i32 noundef %shl)
113+
ret void
114+
}
115+
116+
define void @t7(i32 noundef %a, i32 noundef %b) #0 {
117+
; CHECK-LABEL: t7:
118+
; CHECK: # %bb.0: # %entry
119+
; CHECK-NEXT: slli a0, a0, 2
120+
; CHECK-NEXT: addi a2, a0, 42
121+
; CHECK-NEXT: add a0, a2, a1
122+
; CHECK-NEXT: mv a1, a2
123+
; CHECK-NEXT: mv a3, a2
124+
; CHECK-NEXT: tail callee
125+
entry:
126+
%shl = shl i32 %a, 2
127+
%add = add nsw i32 %shl, 42
128+
%add1 = add nsw i32 %add, %b
129+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add, i32 noundef %add, i32 noundef %add)
130+
ret void
131+
}
132+
133+
define void @t8(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 {
134+
; CHECK-LABEL: t8:
135+
; CHECK: # %bb.0: # %entry
136+
; CHECK-NEXT: sh3add a2, a0, a2
137+
; CHECK-NEXT: sh3add a1, a0, a1
138+
; CHECK-NEXT: lui a4, 1
139+
; CHECK-NEXT: addi a4, a4, 1307
140+
; CHECK-NEXT: add a1, a1, a4
141+
; CHECK-NEXT: add a2, a2, a4
142+
; CHECK-NEXT: sh3add a3, a0, a3
143+
; CHECK-NEXT: mv a0, a1
144+
; CHECK-NEXT: tail callee
145+
entry:
146+
%shl = shl i32 %a, 3
147+
%add = add nsw i32 %shl, 5403
148+
%add1 = add nsw i32 %add, %b
149+
%add3 = add nsw i32 %add, %c
150+
%add5 = add nsw i32 %shl, %d
151+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5)
152+
ret void
153+
}
154+
155+
define void @t9(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 {
156+
; CHECK-LABEL: t9:
157+
; CHECK: # %bb.0: # %entry
158+
; CHECK-NEXT: sh2add a2, a0, a2
159+
; CHECK-NEXT: sh2add a1, a0, a1
160+
; CHECK-NEXT: addi a1, a1, -42
161+
; CHECK-NEXT: addi a2, a2, -42
162+
; CHECK-NEXT: sh2add a3, a0, a3
163+
; CHECK-NEXT: mv a0, a1
164+
; CHECK-NEXT: tail callee
165+
entry:
166+
%shl = shl i32 %a, 2
167+
%add = add nsw i32 %shl, -42
168+
%add1 = add nsw i32 %add, %b
169+
%add3 = add nsw i32 %add, %c
170+
%add5 = add nsw i32 %shl, %d
171+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5)
172+
ret void
173+
}
174+
175+
define void @t10(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 {
176+
; CHECK-LABEL: t10:
177+
; CHECK: # %bb.0: # %entry
178+
; CHECK-NEXT: tail callee
179+
entry:
180+
%shl = shl i32 %a, -2
181+
%add = add nsw i32 %shl, 42
182+
%add1 = add nsw i32 %add, %b
183+
%add3 = add nsw i32 %add, %c
184+
%add5 = add nsw i32 %shl, %d
185+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5)
186+
ret void
187+
}
188+
189+
attributes #0 = { nounwind optsize }

0 commit comments

Comments
 (0)