Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 65074d6

Browse files
author
Chad Rosier
committed
[AArch64] Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm'.
If and only if the value being inserted sets only known zero bits. This combine transforms things like and w8, w0, #0xfffffff0 movz w9, #5 orr w0, w8, w9 into movz w8, #5 bfxil w0, w8, #0, #4 The combine is tuned to make sure we always reduce the number of instructions. We avoid churning code for what is expected to be performance neutral changes (e.g., converted AND+OR to OR+BFI). Differential Revision: http://reviews.llvm.org/D20387 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@270846 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 22f5417 commit 65074d6

File tree

2 files changed

+180
-1
lines changed

2 files changed

+180
-1
lines changed

lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1981,6 +1981,97 @@ static bool isShiftedMask(uint64_t Mask, EVT VT) {
19811981
return isShiftedMask_64(Mask);
19821982
}
19831983

1984+
// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
1985+
// inserted only sets known zero bits.
1986+
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
1987+
assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
1988+
1989+
EVT VT = N->getValueType(0);
1990+
if (VT != MVT::i32 && VT != MVT::i64)
1991+
return false;
1992+
1993+
unsigned BitWidth = VT.getSizeInBits();
1994+
1995+
uint64_t OrImm;
1996+
if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
1997+
return false;
1998+
1999+
// Skip this transformation if the ORR immediate can be encoded in the ORR.
2000+
// Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2001+
// performance neutral.
2002+
if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2003+
return false;
2004+
2005+
uint64_t MaskImm;
2006+
SDValue And = N->getOperand(0);
2007+
// Must be a single use AND with an immediate operand.
2008+
if (!And.hasOneUse() ||
2009+
!isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2010+
return false;
2011+
2012+
// Compute the Known Zero for the AND as this allows us to catch more general
2013+
// cases than just looking for AND with imm.
2014+
APInt KnownZero, KnownOne;
2015+
CurDAG->computeKnownBits(And, KnownZero, KnownOne);
2016+
2017+
// Non-zero in the sense that they're not provably zero, which is the key
2018+
// point if we want to use this value.
2019+
uint64_t NotKnownZero = (~KnownZero).getZExtValue();
2020+
2021+
// The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2022+
if (!isShiftedMask(KnownZero.getZExtValue(), VT))
2023+
return false;
2024+
2025+
// The bits being inserted must only set those bits that are known to be zero.
2026+
if ((OrImm & NotKnownZero) != 0) {
2027+
// FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2028+
// currently handle this case.
2029+
return false;
2030+
}
2031+
2032+
// BFI/BFXIL dst, src, #lsb, #width.
2033+
int LSB = countTrailingOnes(NotKnownZero);
2034+
int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2035+
2036+
// BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2037+
unsigned ImmR = (BitWidth - LSB) % BitWidth;
2038+
unsigned ImmS = Width - 1;
2039+
2040+
// If we're creating a BFI instruction avoid cases where we need more
2041+
// instructions to materialize the BFI constant as compared to the original
2042+
// ORR. A BFXIL will use the same constant as the original ORR, so the code
2043+
// should be no worse in this case.
2044+
bool IsBFI = LSB != 0;
2045+
uint64_t BFIImm = OrImm >> LSB;
2046+
if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2047+
// We have a BFI instruction and we know the constant can't be materialized
2048+
// with a ORR-immediate with the zero register.
2049+
unsigned OrChunks = 0, BFIChunks = 0;
2050+
for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2051+
if (((OrImm >> Shift) & 0xFFFF) != 0)
2052+
++OrChunks;
2053+
if (((BFIImm >> Shift) & 0xFFFF) != 0)
2054+
++BFIChunks;
2055+
}
2056+
if (BFIChunks > OrChunks)
2057+
return false;
2058+
}
2059+
2060+
// Materialize the constant to be inserted.
2061+
SDLoc DL(N);
2062+
unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2063+
SDNode *MOVI = CurDAG->getMachineNode(
2064+
MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2065+
2066+
// Create the BFI/BFXIL instruction.
2067+
SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2068+
CurDAG->getTargetConstant(ImmR, DL, VT),
2069+
CurDAG->getTargetConstant(ImmS, DL, VT)};
2070+
unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2071+
CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2072+
return true;
2073+
}
2074+
19842075
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
19852076
SelectionDAG *CurDAG) {
19862077
assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
@@ -2159,7 +2250,10 @@ bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
21592250
return true;
21602251
}
21612252

2162-
return tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG);
2253+
if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
2254+
return true;
2255+
2256+
return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
21632257
}
21642258

21652259
/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the

test/CodeGen/AArch64/bitfield-insert.ll

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,3 +378,88 @@ entry:
378378
%or = or i32 %and, %and1
379379
ret i32 %or
380380
}
381+
382+
; CHECK-LABEL: @test1
383+
; CHECK: movz [[REG:w[0-9]+]], #5
384+
; CHECK: bfxil w0, [[REG]], #0, #4
385+
define i32 @test1(i32 %a) {
386+
%1 = and i32 %a, -16 ; 0xfffffff0
387+
%2 = or i32 %1, 5 ; 0x00000005
388+
ret i32 %2
389+
}
390+
391+
; CHECK-LABEL: @test2
392+
; CHECK: movz [[REG:w[0-9]+]], #10
393+
; CHECK: bfi w0, [[REG]], #22, #4
394+
define i32 @test2(i32 %a) {
395+
%1 = and i32 %a, -62914561 ; 0xfc3fffff
396+
%2 = or i32 %1, 41943040 ; 0x06400000
397+
ret i32 %2
398+
}
399+
400+
; CHECK-LABEL: @test3
401+
; CHECK: movz [[REG:x[0-9]+]], #5
402+
; CHECK: bfxil x0, [[REG]], #0, #3
403+
define i64 @test3(i64 %a) {
404+
%1 = and i64 %a, -8 ; 0xfffffffffffffff8
405+
%2 = or i64 %1, 5 ; 0x0000000000000005
406+
ret i64 %2
407+
}
408+
409+
; CHECK-LABEL: @test4
410+
; CHECK: movz [[REG:x[0-9]+]], #9
411+
; CHECK: bfi x0, [[REG]], #1, #7
412+
define i64 @test4(i64 %a) {
413+
%1 = and i64 %a, -255 ; 0xffffffffffffff01
414+
%2 = or i64 %1, 18 ; 0x0000000000000012
415+
ret i64 %2
416+
}
417+
418+
; Don't generate BFI/BFXIL if the immediate can be encoded in the ORR.
419+
; CHECK-LABEL: @test5
420+
; CHECK: and [[REG:w[0-9]+]], w0, #0xfffffff0
421+
; CHECK: orr w0, [[REG]], #0x6
422+
define i32 @test5(i32 %a) {
423+
%1 = and i32 %a, 4294967280 ; 0xfffffff0
424+
%2 = or i32 %1, 6 ; 0x00000006
425+
ret i32 %2
426+
}
427+
428+
; BFXIL will use the same constant as the ORR, so we don't care how the constant
429+
; is materialized (it's an equal cost either way).
430+
; CHECK-LABEL: @test6
431+
; CHECK: movz [[REG:w[0-9]+]], #11, lsl #16
432+
; CHECK: movk [[REG]], #23250
433+
; CHECK: bfxil w0, [[REG]], #0, #20
434+
define i32 @test6(i32 %a) {
435+
%1 = and i32 %a, 4293918720 ; 0xfff00000
436+
%2 = or i32 %1, 744146 ; 0x000b5ad2
437+
ret i32 %2
438+
}
439+
440+
; BFIs that require the same number of instruction to materialize the constant
441+
; as the original ORR are okay.
442+
; CHECK-LABEL: @test7
443+
; CHECK: movz [[REG:w[0-9]+]], #5, lsl #16
444+
; CHECK: movk [[REG]], #44393
445+
; CHECK: bfi w0, [[REG]], #1, #19
446+
define i32 @test7(i32 %a) {
447+
%1 = and i32 %a, 4293918721 ; 0xfff00001
448+
%2 = or i32 %1, 744146 ; 0x000b5ad2
449+
ret i32 %2
450+
}
451+
452+
; BFIs that require more instructions to materialize the constant as compared
453+
; to the original ORR are not okay. In this case we would be replacing the
454+
; 'and' with a 'movk', which would decrease ILP while using the same number of
455+
; instructions.
456+
; CHECK: @test8
457+
; CHECK: movz [[REG2:x[0-9]+]], #36694, lsl #32
458+
; CHECK: and [[REG1:x[0-9]+]], x0, #0xff000000000000ff
459+
; CHECK: movk [[REG2]], #31059, lsl #16
460+
; CHECK: orr x0, [[REG1]], [[REG2]]
461+
define i64 @test8(i64 %a) {
462+
%1 = and i64 %a, -72057594037927681 ; 0xff000000000000ff
463+
%2 = or i64 %1, 157601565442048 ; 0x00008f5679530000
464+
ret i64 %2
465+
}

0 commit comments

Comments
 (0)