-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Add combine for shadd family of instructions. #130829
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
df5bcbf
38cfa79
b60ccb8
d452f5d
107d3d0
30e5845
373f2a8
c76c64e
a335458
c54ae87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ | |
#include "llvm/CodeGen/MachineInstrBuilder.h" | ||
#include "llvm/CodeGen/MachineJumpTableInfo.h" | ||
#include "llvm/CodeGen/MachineRegisterInfo.h" | ||
#include "llvm/CodeGen/SDPatternMatch.h" | ||
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" | ||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" | ||
#include "llvm/CodeGen/ValueTypes.h" | ||
|
@@ -79,6 +80,12 @@ static cl::opt<int> | |
"use for creating a floating-point immediate value"), | ||
cl::init(2)); | ||
|
||
static cl::opt<bool> | ||
ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, | ||
cl::desc("Swap add and addi in cases where the add may " | ||
"be combined with a shift"), | ||
cl::init(true)); | ||
|
||
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, | ||
const RISCVSubtarget &STI) | ||
: TargetLowering(TM), Subtarget(STI) { | ||
|
@@ -14306,6 +14313,67 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, | |
return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT)); | ||
} | ||
|
||
// Check if this SDValue is an add immediate that is fed by a shift of 1, 2, | ||
// or 3. | ||
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, | ||
SelectionDAG &DAG) { | ||
using namespace llvm::SDPatternMatch; | ||
|
||
// Looking for a reg-reg add and not an addi. | ||
if (isa<ConstantSDNode>(N->getOperand(1))) | ||
return SDValue(); | ||
|
||
// Based on testing it seems that performance degrades if the ADDI has | ||
// more than 2 uses. | ||
if (AddI->use_size() > 2) | ||
return SDValue(); | ||
|
||
APInt AddVal; | ||
SDValue SHLVal; | ||
if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal)))) | ||
return SDValue(); | ||
|
||
APInt VShift; | ||
if (!sd_match(SHLVal, m_BinOp(ISD::SHL, m_Value(), m_ConstInt(VShift)))) | ||
return SDValue(); | ||
|
||
if (VShift.slt(1) || VShift.sgt(3)) | ||
return SDValue(); | ||
|
||
SDLoc DL(N); | ||
EVT VT = N->getValueType(0); | ||
// The shift must be positive but the add can be signed. | ||
uint64_t ShlConst = VShift.getZExtValue(); | ||
int64_t AddConst = AddVal.getSExtValue(); | ||
|
||
SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0), | ||
DAG.getConstant(ShlConst, DL, VT), Other); | ||
return DAG.getNode(ISD::ADD, DL, VT, SHADD, | ||
DAG.getSignedConstant(AddConst, DL, VT)); | ||
} | ||
|
||
// Optimize (add (add (shl x, c0), c1), y) -> | ||
// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3]. | ||
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, | ||
const RISCVSubtarget &Subtarget) { | ||
// Perform this optimization only in the zba extension. | ||
if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba()) | ||
return SDValue(); | ||
|
||
// Skip for vector types and larger types. | ||
EVT VT = N->getValueType(0); | ||
if (VT != Subtarget.getXLenVT()) | ||
return SDValue(); | ||
|
||
SDValue AddI = N->getOperand(0); | ||
SDValue Other = N->getOperand(1); | ||
if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG)) | ||
return V; | ||
if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG)) | ||
return V; | ||
return SDValue(); | ||
} | ||
|
||
// Combine a constant select operand into its use: | ||
// | ||
// (and (select cond, -1, c), x) | ||
|
@@ -14547,9 +14615,12 @@ static SDValue performADDCombine(SDNode *N, | |
return V; | ||
if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) | ||
return V; | ||
if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) | ||
if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) { | ||
if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) | ||
return V; | ||
if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't look at what There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a good point.
Running I do believe that this is the order in which they run. |
||
return V; | ||
} | ||
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) | ||
return V; | ||
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=riscv32-unknown-elf -mattr=+zba %s -o - | FileCheck %s | ||
|
||
declare i32 @callee1(i32 noundef) | ||
declare i32 @callee2(i32 noundef, i32 noundef) | ||
declare i32 @callee(i32 noundef, i32 noundef, i32 noundef, i32 noundef) | ||
|
||
define void @t1(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { | ||
; CHECK-LABEL: t1: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: sh2add a2, a0, a2 | ||
; CHECK-NEXT: sh2add a1, a0, a1 | ||
; CHECK-NEXT: addi a1, a1, 45 | ||
; CHECK-NEXT: addi a2, a2, 45 | ||
; CHECK-NEXT: sh2add a3, a0, a3 | ||
; CHECK-NEXT: mv a0, a1 | ||
; CHECK-NEXT: tail callee | ||
entry: | ||
%shl = shl i32 %a, 2 | ||
%add = add nsw i32 %shl, 45 | ||
%add1 = add nsw i32 %add, %b | ||
%add3 = add nsw i32 %add, %c | ||
%add5 = add nsw i32 %shl, %d | ||
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) | ||
ret void | ||
} | ||
|
||
define void @t2(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { | ||
; CHECK-LABEL: t2: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: slli a0, a0, 2 | ||
; CHECK-NEXT: addi a5, a0, 42 | ||
; CHECK-NEXT: add a4, a5, a1 | ||
; CHECK-NEXT: add a3, a5, a2 | ||
; CHECK-NEXT: mv a1, a5 | ||
; CHECK-NEXT: mv a2, a4 | ||
; CHECK-NEXT: tail callee | ||
entry: | ||
%shl = shl i32 %a, 2 | ||
%add = add nsw i32 %shl, 42 | ||
%add4 = add nsw i32 %add, %b | ||
%add7 = add nsw i32 %add, %c | ||
%call = tail call i32 @callee(i32 noundef %shl, i32 noundef %add, i32 noundef %add4, i32 noundef %add7) | ||
ret void | ||
} | ||
|
||
define void @t3(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) #0 { | ||
; CHECK-LABEL: t3: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: slli a0, a0, 2 | ||
; CHECK-NEXT: addi a5, a0, 42 | ||
; CHECK-NEXT: add a0, a5, a1 | ||
; CHECK-NEXT: add a1, a5, a2 | ||
; CHECK-NEXT: add a2, a5, a3 | ||
; CHECK-NEXT: add a3, a5, a4 | ||
; CHECK-NEXT: tail callee | ||
entry: | ||
%shl = shl i32 %a, 2 | ||
%add = add nsw i32 %shl, 42 | ||
%add1 = add nsw i32 %add, %b | ||
%add2 = add nsw i32 %add, %c | ||
%add3 = add nsw i32 %add, %d | ||
%add4 = add nsw i32 %add, %e | ||
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add2, i32 noundef %add3, i32 noundef %add4) | ||
ret void | ||
} | ||
|
||
define void @t4(i32 noundef %a, i32 noundef %b) #0 { | ||
; CHECK-LABEL: t4: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: sh2add a0, a0, a1 | ||
; CHECK-NEXT: addi a0, a0, 42 | ||
; CHECK-NEXT: tail callee1 | ||
entry: | ||
%shl = shl i32 %a, 2 | ||
%add = add nsw i32 %shl, 42 | ||
%add1 = add nsw i32 %add, %b | ||
%call = tail call i32 @callee1(i32 noundef %add1) | ||
ret void | ||
} | ||
|
||
define void @t5(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { | ||
; CHECK-LABEL: t5: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: sh2add a2, a0, a2 | ||
; CHECK-NEXT: sh2add a0, a0, a1 | ||
; CHECK-NEXT: addi a0, a0, 42 | ||
; CHECK-NEXT: addi a1, a2, 42 | ||
; CHECK-NEXT: tail callee2 | ||
entry: | ||
%shl = shl i32 %a, 2 | ||
%add = add nsw i32 %shl, 42 | ||
%add1 = add nsw i32 %add, %b | ||
%add2 = add nsw i32 %add, %c | ||
%call = tail call i32 @callee2(i32 noundef %add1, i32 noundef %add2) | ||
ret void | ||
} | ||
|
||
define void @t6(i32 noundef %a, i32 noundef %b) #0 { | ||
; CHECK-LABEL: t6: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: slli a2, a0, 2 | ||
; CHECK-NEXT: sh2add a0, a0, a1 | ||
; CHECK-NEXT: addi a0, a0, 42 | ||
; CHECK-NEXT: mv a1, a2 | ||
; CHECK-NEXT: mv a3, a2 | ||
; CHECK-NEXT: tail callee | ||
entry: | ||
%shl = shl i32 %a, 2 | ||
%add = add nsw i32 %shl, 42 | ||
%add1 = add nsw i32 %add, %b | ||
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %shl, i32 noundef %shl, i32 noundef %shl) | ||
ret void | ||
} | ||
|
||
define void @t7(i32 noundef %a, i32 noundef %b) #0 { | ||
; CHECK-LABEL: t7: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: slli a0, a0, 2 | ||
; CHECK-NEXT: addi a2, a0, 42 | ||
; CHECK-NEXT: add a0, a2, a1 | ||
; CHECK-NEXT: mv a1, a2 | ||
; CHECK-NEXT: mv a3, a2 | ||
; CHECK-NEXT: tail callee | ||
entry: | ||
%shl = shl i32 %a, 2 | ||
%add = add nsw i32 %shl, 42 | ||
%add1 = add nsw i32 %add, %b | ||
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add, i32 noundef %add, i32 noundef %add) | ||
ret void | ||
} | ||
|
||
define void @t8(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { | ||
; CHECK-LABEL: t8: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: sh3add a2, a0, a2 | ||
; CHECK-NEXT: sh3add a1, a0, a1 | ||
; CHECK-NEXT: lui a4, 1 | ||
; CHECK-NEXT: addi a4, a4, 1307 | ||
; CHECK-NEXT: add a1, a1, a4 | ||
; CHECK-NEXT: add a2, a2, a4 | ||
; CHECK-NEXT: sh3add a3, a0, a3 | ||
; CHECK-NEXT: mv a0, a1 | ||
; CHECK-NEXT: tail callee | ||
Comment on lines
+135
to
+144
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed that without this patch, this function would be:
which is one instruction shorter than what you proposed here. |
||
entry: | ||
%shl = shl i32 %a, 3 | ||
%add = add nsw i32 %shl, 5403 | ||
%add1 = add nsw i32 %add, %b | ||
%add3 = add nsw i32 %add, %c | ||
%add5 = add nsw i32 %shl, %d | ||
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) | ||
ret void | ||
} | ||
|
||
define void @t9(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { | ||
; CHECK-LABEL: t9: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: sh2add a2, a0, a2 | ||
; CHECK-NEXT: sh2add a1, a0, a1 | ||
; CHECK-NEXT: addi a1, a1, -42 | ||
; CHECK-NEXT: addi a2, a2, -42 | ||
; CHECK-NEXT: sh2add a3, a0, a3 | ||
; CHECK-NEXT: mv a0, a1 | ||
; CHECK-NEXT: tail callee | ||
Comment on lines
+158
to
+164
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly, without this patch it would be:
Although they have the same number of instructions, is it always a good thing to use shXadd over plain ADD? |
||
entry: | ||
%shl = shl i32 %a, 2 | ||
%add = add nsw i32 %shl, -42 | ||
%add1 = add nsw i32 %add, %b | ||
%add3 = add nsw i32 %add, %c | ||
%add5 = add nsw i32 %shl, %d | ||
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) | ||
ret void | ||
} | ||
|
||
define void @t10(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { | ||
; CHECK-LABEL: t10: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: tail callee | ||
entry: | ||
%shl = shl i32 %a, -2 | ||
%add = add nsw i32 %shl, 42 | ||
%add1 = add nsw i32 %add, %b | ||
%add3 = add nsw i32 %add, %c | ||
%add5 = add nsw i32 %shl, %d | ||
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) | ||
ret void | ||
} | ||
|
||
attributes #0 = { nounwind optsize } |
Uh oh!
There was an error while loading. Please reload this page.