Skip to content

Commit 100763a

Browse files
committed
[DAG] Extend SearchForAndLoads with any_extend handling
This extends the code in SearchForAndLoads to be able to look through ANY_EXTEND nodes, which can be created from mismatching IR types where the AND node we begin from only demands the low parts of the register. That turns zext and sext into any_extends as only the low bits are demanded. To be able to look through ANY_EXTEND nodes we need to handle mismatching types in a few places, potentially truncating the mask to the size of the final load. Recommitted with a more conservative check for the type of the extend. Differential Revision: https://reviews.llvm.org/D117457
1 parent 31c0e52 commit 100763a

File tree

4 files changed

+167
-84
lines changed

4 files changed

+167
-84
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5491,6 +5491,8 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
54915491

54925492
// Some constants may need fixing up later if they are too large.
54935493
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5494+
if (Mask->getValueType(0) != C->getValueType(0))
5495+
return false;
54945496
if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
54955497
(Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
54965498
NodesWithConsts.insert(N);
@@ -5524,16 +5526,25 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
55245526
case ISD::AssertZext: {
55255527
unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
55265528
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5527-
EVT VT = Op.getOpcode() == ISD::AssertZext ?
5528-
cast<VTSDNode>(Op.getOperand(1))->getVT() :
5529-
Op.getOperand(0).getValueType();
5529+
EVT VT = Op.getOpcode() == ISD::AssertZext
5530+
? cast<VTSDNode>(Op.getOperand(1))->getVT()
5531+
: Op.getOperand(0).getValueType();
55305532

55315533
// We can accept extending nodes if the mask is wider or an equal
55325534
// width to the original type.
55335535
if (ExtVT.bitsGE(VT))
55345536
continue;
55355537
break;
55365538
}
5539+
case ISD::ANY_EXTEND: {
5540+
unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5541+
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5542+
EVT VT = Op.getOperand(0).getValueType();
5543+
if (ExtVT.bitsGE(VT))
5544+
break;
5545+
// Fallthrough to searching for nodes from the operands of the extend.
5546+
LLVM_FALLTHROUGH;
5547+
}
55375548
case ISD::OR:
55385549
case ISD::XOR:
55395550
case ISD::AND:
@@ -5593,12 +5604,14 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
55935604
// masking.
55945605
if (FixupNode) {
55955606
LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5596-
SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5597-
FixupNode->getValueType(0),
5598-
SDValue(FixupNode, 0), MaskOp);
5607+
SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode),
5608+
FixupNode->getValueType(0));
5609+
SDValue And =
5610+
DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0),
5611+
SDValue(FixupNode, 0), MaskOpT);
55995612
DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
56005613
if (And.getOpcode() == ISD ::AND)
5601-
DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5614+
DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT);
56025615
}
56035616

56045617
// Narrow any constants that need it.
@@ -5607,23 +5620,27 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
56075620
SDValue Op1 = LogicN->getOperand(1);
56085621

56095622
if (isa<ConstantSDNode>(Op0))
5610-
std::swap(Op0, Op1);
5623+
std::swap(Op0, Op1);
56115624

5612-
SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5613-
Op1, MaskOp);
5625+
SDValue MaskOpT =
5626+
DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType());
5627+
SDValue And =
5628+
DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT);
56145629

56155630
DAG.UpdateNodeOperands(LogicN, Op0, And);
56165631
}
56175632

56185633
// Create narrow loads.
56195634
for (auto *Load : Loads) {
56205635
LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5636+
SDValue MaskOpT =
5637+
DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0));
56215638
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5622-
SDValue(Load, 0), MaskOp);
5639+
SDValue(Load, 0), MaskOpT);
56235640
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
56245641
if (And.getOpcode() == ISD ::AND)
56255642
And = SDValue(
5626-
DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5643+
DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0);
56275644
SDValue NewLoad = reduceLoadWidth(And.getNode());
56285645
assert(NewLoad &&
56295646
"Shouldn't be masking the load if it can't be narrowed");

llvm/test/CodeGen/AArch64/combine-andintoload.ll

Lines changed: 98 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,14 @@
55
define i64 @load32_and16_and(i32* %p, i64 %y) {
66
; CHECK-LABEL: load32_and16_and:
77
; CHECK: // %bb.0:
8-
; CHECK-NEXT: ldr w8, [x0]
9-
; CHECK-NEXT: and w8, w1, w8
10-
; CHECK-NEXT: and x0, x8, #0xffff
8+
; CHECK-NEXT: ldrh w8, [x0]
9+
; CHECK-NEXT: and w0, w1, w8
1110
; CHECK-NEXT: ret
1211
;
1312
; CHECKBE-LABEL: load32_and16_and:
1413
; CHECKBE: // %bb.0:
15-
; CHECKBE-NEXT: ldr w8, [x0]
16-
; CHECKBE-NEXT: and w8, w1, w8
17-
; CHECKBE-NEXT: and x0, x8, #0xffff
14+
; CHECKBE-NEXT: ldrh w8, [x0, #2]
15+
; CHECKBE-NEXT: and w0, w1, w8
1816
; CHECKBE-NEXT: ret
1917
%x = load i32, i32* %p, align 4
2018
%xz = zext i32 %x to i64
@@ -26,16 +24,14 @@ define i64 @load32_and16_and(i32* %p, i64 %y) {
2624
define i64 @load32_and16_andr(i32* %p, i64 %y) {
2725
; CHECK-LABEL: load32_and16_andr:
2826
; CHECK: // %bb.0:
29-
; CHECK-NEXT: ldr w8, [x0]
30-
; CHECK-NEXT: and w8, w1, w8
31-
; CHECK-NEXT: and x0, x8, #0xffff
27+
; CHECK-NEXT: ldrh w8, [x0]
28+
; CHECK-NEXT: and w0, w1, w8
3229
; CHECK-NEXT: ret
3330
;
3431
; CHECKBE-LABEL: load32_and16_andr:
3532
; CHECKBE: // %bb.0:
36-
; CHECKBE-NEXT: ldr w8, [x0]
37-
; CHECKBE-NEXT: and w8, w1, w8
38-
; CHECKBE-NEXT: and x0, x8, #0xffff
33+
; CHECKBE-NEXT: ldrh w8, [x0, #2]
34+
; CHECKBE-NEXT: and w0, w1, w8
3935
; CHECKBE-NEXT: ret
4036
%x = load i32, i32* %p, align 4
4137
%xz = zext i32 %x to i64
@@ -47,16 +43,14 @@ define i64 @load32_and16_andr(i32* %p, i64 %y) {
4743
define i64 @load32_and16_and_sext(i32* %p, i64 %y) {
4844
; CHECK-LABEL: load32_and16_and_sext:
4945
; CHECK: // %bb.0:
50-
; CHECK-NEXT: ldr w8, [x0]
51-
; CHECK-NEXT: and w8, w1, w8
52-
; CHECK-NEXT: and x0, x8, #0xffff
46+
; CHECK-NEXT: ldrh w8, [x0]
47+
; CHECK-NEXT: and w0, w1, w8
5348
; CHECK-NEXT: ret
5449
;
5550
; CHECKBE-LABEL: load32_and16_and_sext:
5651
; CHECKBE: // %bb.0:
57-
; CHECKBE-NEXT: ldr w8, [x0]
58-
; CHECKBE-NEXT: and w8, w1, w8
59-
; CHECKBE-NEXT: and x0, x8, #0xffff
52+
; CHECKBE-NEXT: ldrh w8, [x0, #2]
53+
; CHECKBE-NEXT: and w0, w1, w8
6054
; CHECKBE-NEXT: ret
6155
%x = load i32, i32* %p, align 4
6256
%xz = sext i32 %x to i64
@@ -68,16 +62,16 @@ define i64 @load32_and16_and_sext(i32* %p, i64 %y) {
6862
define i64 @load32_and16_or(i32* %p, i64 %y) {
6963
; CHECK-LABEL: load32_and16_or:
7064
; CHECK: // %bb.0:
71-
; CHECK-NEXT: ldr w8, [x0]
72-
; CHECK-NEXT: orr w8, w1, w8
73-
; CHECK-NEXT: and x0, x8, #0xffff
65+
; CHECK-NEXT: ldrh w8, [x0]
66+
; CHECK-NEXT: and w9, w1, #0xffff
67+
; CHECK-NEXT: orr w0, w9, w8
7468
; CHECK-NEXT: ret
7569
;
7670
; CHECKBE-LABEL: load32_and16_or:
7771
; CHECKBE: // %bb.0:
78-
; CHECKBE-NEXT: ldr w8, [x0]
79-
; CHECKBE-NEXT: orr w8, w1, w8
80-
; CHECKBE-NEXT: and x0, x8, #0xffff
72+
; CHECKBE-NEXT: ldrh w8, [x0, #2]
73+
; CHECKBE-NEXT: and w9, w1, #0xffff
74+
; CHECKBE-NEXT: orr w0, w9, w8
8175
; CHECKBE-NEXT: ret
8276
%x = load i32, i32* %p, align 4
8377
%xz = zext i32 %x to i64
@@ -170,16 +164,14 @@ define i64 @load16_and16(i16* %p, i64 %y) {
170164
define i64 @load16_and8(i16* %p, i64 %y) {
171165
; CHECK-LABEL: load16_and8:
172166
; CHECK: // %bb.0:
173-
; CHECK-NEXT: ldrh w8, [x0]
174-
; CHECK-NEXT: and w8, w1, w8
175-
; CHECK-NEXT: and x0, x8, #0xff
167+
; CHECK-NEXT: ldrb w8, [x0]
168+
; CHECK-NEXT: and w0, w1, w8
176169
; CHECK-NEXT: ret
177170
;
178171
; CHECKBE-LABEL: load16_and8:
179172
; CHECKBE: // %bb.0:
180-
; CHECKBE-NEXT: ldrh w8, [x0]
181-
; CHECKBE-NEXT: and w8, w1, w8
182-
; CHECKBE-NEXT: and x0, x8, #0xff
173+
; CHECKBE-NEXT: ldrb w8, [x0, #1]
174+
; CHECKBE-NEXT: and w0, w1, w8
183175
; CHECKBE-NEXT: ret
184176
%x = load i16, i16* %p, align 4
185177
%xz = zext i16 %x to i64
@@ -232,15 +224,13 @@ define i64 @load8_and16_zext(i8* %p, i8 %y) {
232224
; CHECK-LABEL: load8_and16_zext:
233225
; CHECK: // %bb.0:
234226
; CHECK-NEXT: ldrb w8, [x0]
235-
; CHECK-NEXT: and w8, w1, w8
236-
; CHECK-NEXT: and x0, x8, #0xff
227+
; CHECK-NEXT: and w0, w1, w8
237228
; CHECK-NEXT: ret
238229
;
239230
; CHECKBE-LABEL: load8_and16_zext:
240231
; CHECKBE: // %bb.0:
241232
; CHECKBE-NEXT: ldrb w8, [x0]
242-
; CHECKBE-NEXT: and w8, w1, w8
243-
; CHECKBE-NEXT: and x0, x8, #0xff
233+
; CHECKBE-NEXT: and w0, w1, w8
244234
; CHECKBE-NEXT: ret
245235
%x = load i8, i8* %p, align 4
246236
%xz = zext i8 %x to i64
@@ -296,16 +286,14 @@ define i64 @load8_and16_or(i8* %p, i64 %y) {
296286
define i64 @load16_and8_manyext(i16* %p, i32 %y) {
297287
; CHECK-LABEL: load16_and8_manyext:
298288
; CHECK: // %bb.0:
299-
; CHECK-NEXT: ldrh w8, [x0]
300-
; CHECK-NEXT: and w8, w1, w8
301-
; CHECK-NEXT: and x0, x8, #0xff
289+
; CHECK-NEXT: ldrb w8, [x0]
290+
; CHECK-NEXT: and w0, w1, w8
302291
; CHECK-NEXT: ret
303292
;
304293
; CHECKBE-LABEL: load16_and8_manyext:
305294
; CHECKBE: // %bb.0:
306-
; CHECKBE-NEXT: ldrh w8, [x0]
307-
; CHECKBE-NEXT: and w8, w1, w8
308-
; CHECKBE-NEXT: and x0, x8, #0xff
295+
; CHECKBE-NEXT: ldrb w8, [x0, #1]
296+
; CHECKBE-NEXT: and w0, w1, w8
309297
; CHECKBE-NEXT: ret
310298
%x = load i16, i16* %p, align 4
311299
%xz = zext i16 %x to i32
@@ -318,18 +306,16 @@ define i64 @load16_and8_manyext(i16* %p, i32 %y) {
318306
define i64 @multiple_load(i16* %p, i32* %q) {
319307
; CHECK-LABEL: multiple_load:
320308
; CHECK: // %bb.0:
321-
; CHECK-NEXT: ldrh w8, [x0]
322-
; CHECK-NEXT: ldr w9, [x1]
323-
; CHECK-NEXT: and w8, w9, w8
324-
; CHECK-NEXT: and x0, x8, #0xff
309+
; CHECK-NEXT: ldrb w8, [x0]
310+
; CHECK-NEXT: ldrb w9, [x1]
311+
; CHECK-NEXT: and w0, w9, w8
325312
; CHECK-NEXT: ret
326313
;
327314
; CHECKBE-LABEL: multiple_load:
328315
; CHECKBE: // %bb.0:
329-
; CHECKBE-NEXT: ldrh w8, [x0]
330-
; CHECKBE-NEXT: ldr w9, [x1]
331-
; CHECKBE-NEXT: and w8, w9, w8
332-
; CHECKBE-NEXT: and x0, x8, #0xff
316+
; CHECKBE-NEXT: ldrb w8, [x0, #1]
317+
; CHECKBE-NEXT: ldrb w9, [x1, #3]
318+
; CHECKBE-NEXT: and w0, w9, w8
333319
; CHECKBE-NEXT: ret
334320
%x = load i16, i16* %p, align 4
335321
%xz = zext i16 %x to i64
@@ -343,18 +329,16 @@ define i64 @multiple_load(i16* %p, i32* %q) {
343329
define i64 @multiple_load_or(i16* %p, i32* %q) {
344330
; CHECK-LABEL: multiple_load_or:
345331
; CHECK: // %bb.0:
346-
; CHECK-NEXT: ldrh w8, [x0]
347-
; CHECK-NEXT: ldr w9, [x1]
348-
; CHECK-NEXT: orr w8, w9, w8
349-
; CHECK-NEXT: and x0, x8, #0xff
332+
; CHECK-NEXT: ldrb w8, [x0]
333+
; CHECK-NEXT: ldrb w9, [x1]
334+
; CHECK-NEXT: orr w0, w9, w8
350335
; CHECK-NEXT: ret
351336
;
352337
; CHECKBE-LABEL: multiple_load_or:
353338
; CHECKBE: // %bb.0:
354-
; CHECKBE-NEXT: ldrh w8, [x0]
355-
; CHECKBE-NEXT: ldr w9, [x1]
356-
; CHECKBE-NEXT: orr w8, w9, w8
357-
; CHECKBE-NEXT: and x0, x8, #0xff
339+
; CHECKBE-NEXT: ldrb w8, [x0, #1]
340+
; CHECKBE-NEXT: ldrb w9, [x1, #3]
341+
; CHECKBE-NEXT: orr w0, w9, w8
358342
; CHECKBE-NEXT: ret
359343
%x = load i16, i16* %p, align 4
360344
%xz = zext i16 %x to i64
@@ -368,16 +352,16 @@ define i64 @multiple_load_or(i16* %p, i32* %q) {
368352
define i64 @load32_and16_zexty(i32* %p, i32 %y) {
369353
; CHECK-LABEL: load32_and16_zexty:
370354
; CHECK: // %bb.0:
371-
; CHECK-NEXT: ldr w8, [x0]
372-
; CHECK-NEXT: orr w8, w1, w8
373-
; CHECK-NEXT: and x0, x8, #0xffff
355+
; CHECK-NEXT: ldrh w8, [x0]
356+
; CHECK-NEXT: and w9, w1, #0xffff
357+
; CHECK-NEXT: orr w0, w9, w8
374358
; CHECK-NEXT: ret
375359
;
376360
; CHECKBE-LABEL: load32_and16_zexty:
377361
; CHECKBE: // %bb.0:
378-
; CHECKBE-NEXT: ldr w8, [x0]
379-
; CHECKBE-NEXT: orr w8, w1, w8
380-
; CHECKBE-NEXT: and x0, x8, #0xffff
362+
; CHECKBE-NEXT: ldrh w8, [x0, #2]
363+
; CHECKBE-NEXT: and w9, w1, #0xffff
364+
; CHECKBE-NEXT: orr w0, w9, w8
381365
; CHECKBE-NEXT: ret
382366
%x = load i32, i32* %p, align 4
383367
%xz = zext i32 %x to i64
@@ -390,16 +374,16 @@ define i64 @load32_and16_zexty(i32* %p, i32 %y) {
390374
define i64 @load32_and16_sexty(i32* %p, i32 %y) {
391375
; CHECK-LABEL: load32_and16_sexty:
392376
; CHECK: // %bb.0:
393-
; CHECK-NEXT: ldr w8, [x0]
394-
; CHECK-NEXT: orr w8, w1, w8
395-
; CHECK-NEXT: and x0, x8, #0xffff
377+
; CHECK-NEXT: ldrh w8, [x0]
378+
; CHECK-NEXT: and w9, w1, #0xffff
379+
; CHECK-NEXT: orr w0, w9, w8
396380
; CHECK-NEXT: ret
397381
;
398382
; CHECKBE-LABEL: load32_and16_sexty:
399383
; CHECKBE: // %bb.0:
400-
; CHECKBE-NEXT: ldr w8, [x0]
401-
; CHECKBE-NEXT: orr w8, w1, w8
402-
; CHECKBE-NEXT: and x0, x8, #0xffff
384+
; CHECKBE-NEXT: ldrh w8, [x0, #2]
385+
; CHECKBE-NEXT: and w9, w1, #0xffff
386+
; CHECKBE-NEXT: orr w0, w9, w8
403387
; CHECKBE-NEXT: ret
404388
%x = load i32, i32* %p, align 4
405389
%xz = zext i32 %x to i64
@@ -408,3 +392,49 @@ define i64 @load32_and16_sexty(i32* %p, i32 %y) {
408392
%r = and i64 %a, 65535
409393
ret i64 %r
410394
}
395+
396+
define zeroext i1 @bigger(i8* nocapture readonly %c, i8* nocapture readonly %e, i64 %d, i64 %p1) {
397+
; CHECK-LABEL: bigger:
398+
; CHECK: // %bb.0: // %entry
399+
; CHECK-NEXT: ldrb w8, [x0, x2]
400+
; CHECK-NEXT: and w10, w3, #0x7
401+
; CHECK-NEXT: ldrb w9, [x1, x2]
402+
; CHECK-NEXT: mov w11, #8
403+
; CHECK-NEXT: sub w10, w11, w10
404+
; CHECK-NEXT: eor w8, w9, w8
405+
; CHECK-NEXT: mov w9, #5
406+
; CHECK-NEXT: lsr w8, w8, w10
407+
; CHECK-NEXT: tst w8, w9
408+
; CHECK-NEXT: cset w0, eq
409+
; CHECK-NEXT: ret
410+
;
411+
; CHECKBE-LABEL: bigger:
412+
; CHECKBE: // %bb.0: // %entry
413+
; CHECKBE-NEXT: ldrb w8, [x0, x2]
414+
; CHECKBE-NEXT: and w10, w3, #0x7
415+
; CHECKBE-NEXT: ldrb w9, [x1, x2]
416+
; CHECKBE-NEXT: mov w11, #8
417+
; CHECKBE-NEXT: sub w10, w11, w10
418+
; CHECKBE-NEXT: eor w8, w9, w8
419+
; CHECKBE-NEXT: mov w9, #5
420+
; CHECKBE-NEXT: lsr w8, w8, w10
421+
; CHECKBE-NEXT: tst w8, w9
422+
; CHECKBE-NEXT: cset w0, eq
423+
; CHECKBE-NEXT: ret
424+
entry:
425+
%0 = trunc i64 %p1 to i16
426+
%1 = and i16 %0, 7
427+
%sh_prom = sub nuw nsw i16 8, %1
428+
%shl = shl nuw nsw i16 5, %sh_prom
429+
%arrayidx = getelementptr inbounds i8, i8* %c, i64 %d
430+
%2 = load i8, i8* %arrayidx, align 1
431+
%3 = and i16 %shl, 255
432+
%conv2 = zext i16 %3 to i32
433+
%arrayidx3 = getelementptr inbounds i8, i8* %e, i64 %d
434+
%4 = load i8, i8* %arrayidx3, align 1
435+
%5 = xor i8 %4, %2
436+
%6 = zext i8 %5 to i32
437+
%7 = and i32 %6, %conv2
438+
%cmp.not = icmp eq i32 %7, 0
439+
ret i1 %cmp.not
440+
}

0 commit comments

Comments
 (0)