Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit ae7c8dd

Browse files
author
Chad Rosier
committed
[AArch64 ] Generate a BFXIL from 'or (and X, Mask0Imm),(and Y, Mask1Imm)'.
Mask0Imm and ~Mask1Imm must be equivalent and one of the MaskImms is a shifted mask (e.g., 0x000ffff0). Both 'and's must have a single use. This changes code like: and w8, w0, #0xffff000f and w9, w1, #0x0000fff0 orr w0, w9, w8 into lsr w8, w1, #4 bfi w0, w8, #4, #12 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@270063 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ab4af72 commit ae7c8dd

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed

lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1974,6 +1974,13 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
19741974
return true;
19751975
}
19761976

1977+
static bool isShiftedMask(uint64_t Mask, EVT VT) {
1978+
assert(VT == MVT::i32 || VT == MVT::i64);
1979+
if (VT == MVT::i32)
1980+
return isShiftedMask_32(Mask);
1981+
return isShiftedMask_64(Mask);
1982+
}
1983+
19771984
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
19781985
SelectionDAG *CurDAG) {
19791986
assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
@@ -2084,6 +2091,58 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
20842091
CurDAG->SelectNodeTo(N, Opc, VT, Ops);
20852092
return true;
20862093
}
2094+
2095+
// Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
2096+
// Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
2097+
// mask (e.g., 0x000ffff0).
2098+
uint64_t Mask0Imm, Mask1Imm;
2099+
SDValue And0 = N->getOperand(0);
2100+
SDValue And1 = N->getOperand(1);
2101+
if (And0.hasOneUse() && And1.hasOneUse() &&
2102+
isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
2103+
isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
2104+
APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
2105+
(isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
2106+
2107+
// We should have already caught the case where we extract hi and low parts.
2108+
// E.g. BFXIL from 'or (and X, 0xffff0000), (and Y, 0x0000ffff)'.
2109+
assert(!(isShiftedMask(Mask0Imm, VT) && isShiftedMask(Mask1Imm, VT)) &&
2110+
"BFXIL should have already been optimized.");
2111+
2112+
// ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
2113+
// (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
2114+
// bits to be inserted.
2115+
if (isShiftedMask(Mask0Imm, VT)) {
2116+
std::swap(And0, And1);
2117+
std::swap(Mask0Imm, Mask1Imm);
2118+
}
2119+
2120+
SDValue Src = And1->getOperand(0);
2121+
SDValue Dst = And0->getOperand(0);
2122+
unsigned LSB = countTrailingZeros(Mask1Imm);
2123+
int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
2124+
2125+
// The BFXIL inserts the low-order bits from a source register, so right
2126+
// shift the needed bits into place.
2127+
SDLoc DL(N);
2128+
unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
2129+
SDNode *LSR = CurDAG->getMachineNode(
2130+
ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
2131+
CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
2132+
2133+
// BFXIL is an alias of BFM, so translate to BFM operands.
2134+
unsigned ImmR = (BitWidth - LSB) % BitWidth;
2135+
unsigned ImmS = Width - 1;
2136+
2137+
// Create the BFXIL instruction.
2138+
SDValue Ops[] = {Dst, SDValue(LSR, 0),
2139+
CurDAG->getTargetConstant(ImmR, DL, VT),
2140+
CurDAG->getTargetConstant(ImmS, DL, VT)};
2141+
unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2142+
CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2143+
return true;
2144+
}
2145+
20872146
return false;
20882147
}
20892148

test/CodeGen/AArch64/bitfield-insert.ll

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,3 +311,70 @@ entry:
311311
store i16 %trunc, i16* %gep
312312
ret void
313313
}
314+
315+
; The next set of tests generate a BFXIL from 'or (and X, Mask0Imm),
316+
; (and Y, Mask1Imm)' iff Mask0Imm and ~Mask1Imm are equivalent and one of the
317+
; MaskImms is a shifted mask (e.g., 0x000ffff0).
318+
319+
; CHECK-LABEL: @test_or_and_and1
320+
; CHECK: lsr w8, w1, #4
321+
; CHECK: bfi w0, w8, #4, #12
322+
define i32 @test_or_and_and1(i32 %a, i32 %b) {
323+
entry:
324+
%and = and i32 %a, -65521 ; 0xffff000f
325+
%and1 = and i32 %b, 65520 ; 0x0000fff0
326+
%or = or i32 %and1, %and
327+
ret i32 %or
328+
}
329+
330+
; CHECK-LABEL: @test_or_and_and2
331+
; CHECK: lsr w8, w0, #4
332+
; CHECK: bfi w1, w8, #4, #12
333+
define i32 @test_or_and_and2(i32 %a, i32 %b) {
334+
entry:
335+
%and = and i32 %a, 65520 ; 0x0000fff0
336+
%and1 = and i32 %b, -65521 ; 0xffff000f
337+
%or = or i32 %and1, %and
338+
ret i32 %or
339+
}
340+
341+
; CHECK-LABEL: @test_or_and_and3
342+
; CHECK: lsr x8, x1, #16
343+
; CHECK: bfi x0, x8, #16, #32
344+
define i64 @test_or_and_and3(i64 %a, i64 %b) {
345+
entry:
346+
%and = and i64 %a, -281474976645121 ; 0xffff00000000ffff
347+
%and1 = and i64 %b, 281474976645120 ; 0x0000ffffffff0000
348+
%or = or i64 %and1, %and
349+
ret i64 %or
350+
}
351+
352+
; Don't convert 'and' with multiple uses.
353+
; CHECK-LABEL: @test_or_and_and4
354+
; CHECK: and w8, w0, #0xffff000f
355+
; CHECK: and w9, w1, #0xfff0
356+
; CHECK: orr w0, w9, w8
357+
; CHECK: str w8, [x2
358+
define i32 @test_or_and_and4(i32 %a, i32 %b, i32* %ptr) {
359+
entry:
360+
%and = and i32 %a, -65521
361+
store i32 %and, i32* %ptr, align 4
362+
%and2 = and i32 %b, 65520
363+
%or = or i32 %and2, %and
364+
ret i32 %or
365+
}
366+
367+
; Don't convert 'and' with multiple uses.
368+
; CHECK-LABEL: @test_or_and_and5
369+
; CHECK: and w8, w1, #0xfff0
370+
; CHECK: and w9, w0, #0xffff000f
371+
; CHECK: orr w0, w8, w9
372+
; CHECK: str w8, [x2]
373+
define i32 @test_or_and_and5(i32 %a, i32 %b, i32* %ptr) {
374+
entry:
375+
%and = and i32 %b, 65520
376+
store i32 %and, i32* %ptr, align 4
377+
%and1 = and i32 %a, -65521
378+
%or = or i32 %and, %and1
379+
ret i32 %or
380+
}

0 commit comments

Comments
 (0)