Skip to content

Commit e4e7bde

Browse files
[AArch64]Combine BFXIL to ORR with right shift for ISD::OR instruction selection
- This extends the existing helper function 'isWorthFoldingIntoOrrWithLeftShift' into 'isWorthFoldingIntoOrrWithShift', and encode right-shift imm (the encoding of left-shift imm is no-op). Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D137465
1 parent c333b92 commit e4e7bde

File tree

2 files changed

+35
-30
lines changed

2 files changed

+35
-30
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2803,11 +2803,10 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
28032803
return true;
28042804
}
28052805

2806-
static bool isWorthFoldingIntoOrrWithLeftShift(SDValue Dst,
2807-
SelectionDAG *CurDAG,
2808-
SDValue &LeftShiftedOperand,
2809-
uint64_t &LeftShiftAmount) {
2810-
// Avoid folding Dst into ORR-with-left-shift if Dst has other uses than ORR.
2806+
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
2807+
SDValue &ShiftedOperand,
2808+
uint64_t &ShiftAmount) {
2809+
// Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
28112810
if (!Dst.hasOneUse())
28122811
return false;
28132812

@@ -2852,23 +2851,32 @@ static bool isWorthFoldingIntoOrrWithLeftShift(SDValue Dst,
28522851
VT),
28532852
CurDAG->getTargetConstant(
28542853
SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
2855-
LeftShiftedOperand = SDValue(UBFMNode, 0);
2856-
LeftShiftAmount = NumTrailingZeroInShiftedMask;
2854+
ShiftedOperand = SDValue(UBFMNode, 0);
2855+
ShiftAmount = NumTrailingZeroInShiftedMask;
28572856
return true;
28582857
}
28592858
}
2860-
} else if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
2861-
LeftShiftedOperand = Dst.getOperand(0);
2862-
LeftShiftAmount = ShlImm;
2859+
return false;
2860+
}
2861+
2862+
if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
2863+
ShiftedOperand = Dst.getOperand(0);
2864+
ShiftAmount = ShlImm;
2865+
return true;
2866+
}
2867+
2868+
uint64_t SrlImm;
2869+
if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
2870+
ShiftedOperand = Dst.getOperand(0);
2871+
ShiftAmount = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
28632872
return true;
28642873
}
2865-
// FIXME: Extend the implementation to optimize if Dst is an SRL node.
28662874
return false;
28672875
}
28682876

2869-
static bool tryOrrWithLeftShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
2870-
SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
2871-
const bool BiggerPattern) {
2877+
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
2878+
SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
2879+
const bool BiggerPattern) {
28722880
EVT VT = N->getValueType(0);
28732881
assert((VT == MVT::i32 || VT == MVT::i64) &&
28742882
"Expect result type to be i32 or i64 since N is combinable to BFM");
@@ -2890,13 +2898,13 @@ static bool tryOrrWithLeftShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
28902898
// one node (from Rd), ORR is better since it has higher throughput and
28912899
// smaller latency than BFM on many AArch64 processors (and for the rest
28922900
// ORR is at least as good as BFM).
2893-
SDValue LeftShiftedOperand;
2894-
uint64_t LeftShiftAmount;
2895-
if (isWorthFoldingIntoOrrWithLeftShift(Dst, CurDAG, LeftShiftedOperand,
2896-
LeftShiftAmount)) {
2901+
SDValue ShiftedOperand;
2902+
uint64_t ShiftAmount;
2903+
if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
2904+
ShiftAmount)) {
28972905
unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
2898-
SDValue Ops[] = {OrOpd0, LeftShiftedOperand,
2899-
CurDAG->getTargetConstant(LeftShiftAmount, DL, VT)};
2906+
SDValue Ops[] = {OrOpd0, ShiftedOperand,
2907+
CurDAG->getTargetConstant(ShiftAmount, DL, VT)};
29002908
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
29012909
return true;
29022910
}
@@ -2907,7 +2915,6 @@ static bool tryOrrWithLeftShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
29072915
assert((!BiggerPattern) && "BiggerPattern should be handled above");
29082916

29092917
uint64_t ShlImm;
2910-
// FIXME: Extend the implementation if OrOpd0 is an SRL node.
29112918
if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm) &&
29122919
OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
29132920
unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
@@ -3022,11 +3029,9 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
30223029
Dst = OrOpd1Val;
30233030

30243031
// Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3025-
// with left-shifted operand is more efficient.
3026-
// FIXME: Extend this to compare AArch64::BFM and AArch64::ORR with
3027-
// right-shifted operand as well.
3028-
if (tryOrrWithLeftShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3029-
BiggerPattern))
3032+
// with shifted operand is more efficient.
3033+
if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3034+
BiggerPattern))
30303035
return true;
30313036

30323037
// both parts match

llvm/test/CodeGen/AArch64/fcopysign.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ define fp128@copysign1() {
6363
; CHECK-NEXT: ldr w8, [x8, :lo12:val_float]
6464
; CHECK-NEXT: ldrb w9, [sp, #15]
6565
; CHECK-NEXT: and w8, w8, #0x80000000
66-
; CHECK-NEXT: lsr w8, w8, #24
67-
; CHECK-NEXT: bfxil w8, w9, #0, #7
66+
; CHECK-NEXT: and w9, w9, #0x7f
67+
; CHECK-NEXT: orr w8, w9, w8, lsr #24
6868
; CHECK-NEXT: strb w8, [sp, #15]
6969
; CHECK-NEXT: ldr q0, [sp], #16
7070
; CHECK-NEXT: ret
@@ -79,8 +79,8 @@ define fp128@copysign1() {
7979
; CHECK-NONEON-NEXT: ldr w8, [x8, :lo12:val_float]
8080
; CHECK-NONEON-NEXT: ldrb w9, [sp, #15]
8181
; CHECK-NONEON-NEXT: and w8, w8, #0x80000000
82-
; CHECK-NONEON-NEXT: lsr w8, w8, #24
83-
; CHECK-NONEON-NEXT: bfxil w8, w9, #0, #7
82+
; CHECK-NONEON-NEXT: and w9, w9, #0x7f
83+
; CHECK-NONEON-NEXT: orr w8, w9, w8, lsr #24
8484
; CHECK-NONEON-NEXT: strb w8, [sp, #15]
8585
; CHECK-NONEON-NEXT: ldr q0, [sp], #16
8686
; CHECK-NONEON-NEXT: ret

0 commit comments

Comments
 (0)