Skip to content

[DAGCombiner][ARM] Teach reduceLoadWidth to handle (and (srl (load), C, ShiftedMask)) #80342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14198,7 +14198,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
// away, but using an AND rather than a right shift. HasShiftedOffset is used
// to indicate that the narrowed load should be left-shifted ShAmt bits to get
// the result.
bool HasShiftedOffset = false;
unsigned ShiftedOffset = 0;
// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
// extended to VT.
if (Opc == ISD::SIGN_EXTEND_INREG) {
Expand Down Expand Up @@ -14243,7 +14243,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
if (Mask.isMask()) {
ActiveBits = Mask.countr_one();
} else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
HasShiftedOffset = true;
ShiftedOffset = ShAmt;
} else {
return SDValue();
}
Expand Down Expand Up @@ -14307,6 +14307,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
SDNode *Mask = *(SRL->use_begin());
if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Mask->getOperand(1))) {
unsigned Offset, ActiveBits;
const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
if (ShiftMask.isMask()) {
EVT MaskedVT =
Expand All @@ -14315,6 +14316,18 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
ExtVT = MaskedVT;
} else if (ExtType == ISD::ZEXTLOAD &&
ShiftMask.isShiftedMask(Offset, ActiveBits) &&
(Offset + ShAmt) < VT.getScalarSizeInBits()) {
EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
// If the mask is shifted we can use a narrower load and a shl to insert
// the trailing zeros.
if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
ExtVT = MaskedVT;
ShAmt = Offset + ShAmt;
ShiftedOffset = Offset;
}
}
}

Expand Down Expand Up @@ -14400,12 +14413,12 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
}

if (HasShiftedOffset) {
if (ShiftedOffset != 0) {
// We're using a shifted mask, so the load now has an offset. This means
// that data has been loaded into the lower bytes than it would have been
// before, so we need to shl the loaded data into the correct position in the
// register.
SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
}
Expand Down
112 changes: 112 additions & 0 deletions llvm/test/CodeGen/ARM/shift-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1278,3 +1278,115 @@ define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32
%r = or <4 x i32> %or.ab, %or.cd
ret <4 x i32> %r
}

define arm_aapcscc i32 @test_shift15_and510(ptr nocapture %p) {
; CHECK-ARM-LABEL: test_shift15_and510:
; CHECK-ARM: @ %bb.0: @ %entry
; CHECK-ARM-NEXT: ldrb r0, [r0, #2]
; CHECK-ARM-NEXT: lsl r0, r0, #1
; CHECK-ARM-NEXT: bx lr
;
; CHECK-BE-LABEL: test_shift15_and510:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: ldrb r0, [r0, #1]
; CHECK-BE-NEXT: lsl r0, r0, #1
; CHECK-BE-NEXT: bx lr
;
; CHECK-THUMB-LABEL: test_shift15_and510:
; CHECK-THUMB: @ %bb.0: @ %entry
; CHECK-THUMB-NEXT: ldrb r0, [r0, #2]
; CHECK-THUMB-NEXT: lsls r0, r0, #1
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ALIGN-LABEL: test_shift15_and510:
; CHECK-ALIGN: @ %bb.0: @ %entry
; CHECK-ALIGN-NEXT: ldrb r0, [r0, #2]
; CHECK-ALIGN-NEXT: lsls r0, r0, #1
; CHECK-ALIGN-NEXT: bx lr
;
; CHECK-V6M-LABEL: test_shift15_and510:
; CHECK-V6M: @ %bb.0: @ %entry
; CHECK-V6M-NEXT: ldrb r0, [r0, #2]
; CHECK-V6M-NEXT: lsls r0, r0, #1
; CHECK-V6M-NEXT: bx lr
entry:
%load = load i32, ptr %p, align 4
%lshr = lshr i32 %load, 15
%and = and i32 %lshr, 510
ret i32 %and
}

define arm_aapcscc i32 @test_shift22_and1020(ptr nocapture %p) {
; CHECK-ARM-LABEL: test_shift22_and1020:
; CHECK-ARM: @ %bb.0: @ %entry
; CHECK-ARM-NEXT: ldrb r0, [r0, #3]
; CHECK-ARM-NEXT: lsl r0, r0, #2
; CHECK-ARM-NEXT: bx lr
;
; CHECK-BE-LABEL: test_shift22_and1020:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: ldrb r0, [r0]
; CHECK-BE-NEXT: lsl r0, r0, #2
; CHECK-BE-NEXT: bx lr
;
; CHECK-THUMB-LABEL: test_shift22_and1020:
; CHECK-THUMB: @ %bb.0: @ %entry
; CHECK-THUMB-NEXT: ldrb r0, [r0, #3]
; CHECK-THUMB-NEXT: lsls r0, r0, #2
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ALIGN-LABEL: test_shift22_and1020:
; CHECK-ALIGN: @ %bb.0: @ %entry
; CHECK-ALIGN-NEXT: ldrb r0, [r0, #3]
; CHECK-ALIGN-NEXT: lsls r0, r0, #2
; CHECK-ALIGN-NEXT: bx lr
;
; CHECK-V6M-LABEL: test_shift22_and1020:
; CHECK-V6M: @ %bb.0: @ %entry
; CHECK-V6M-NEXT: ldrb r0, [r0, #3]
; CHECK-V6M-NEXT: lsls r0, r0, #2
; CHECK-V6M-NEXT: bx lr
entry:
%load = load i32, ptr %p, align 4
%lshr = lshr i32 %load, 22
%and = and i32 %lshr, 1020
ret i32 %and
}

define arm_aapcscc i32 @test_zext_shift5_and2040(ptr nocapture %p) {
; CHECK-ARM-LABEL: test_zext_shift5_and2040:
; CHECK-ARM: @ %bb.0: @ %entry
; CHECK-ARM-NEXT: ldrb r0, [r0, #1]
; CHECK-ARM-NEXT: lsl r0, r0, #3
; CHECK-ARM-NEXT: bx lr
;
; CHECK-BE-LABEL: test_zext_shift5_and2040:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: ldrb r0, [r0]
; CHECK-BE-NEXT: lsl r0, r0, #3
; CHECK-BE-NEXT: bx lr
;
; CHECK-THUMB-LABEL: test_zext_shift5_and2040:
; CHECK-THUMB: @ %bb.0: @ %entry
; CHECK-THUMB-NEXT: ldrb r0, [r0, #1]
; CHECK-THUMB-NEXT: lsls r0, r0, #3
; CHECK-THUMB-NEXT: bx lr
;
; CHECK-ALIGN-LABEL: test_zext_shift5_and2040:
; CHECK-ALIGN: @ %bb.0: @ %entry
; CHECK-ALIGN-NEXT: ldrb r0, [r0, #1]
; CHECK-ALIGN-NEXT: lsls r0, r0, #3
; CHECK-ALIGN-NEXT: bx lr
;
; CHECK-V6M-LABEL: test_zext_shift5_and2040:
; CHECK-V6M: @ %bb.0: @ %entry
; CHECK-V6M-NEXT: ldrb r0, [r0, #1]
; CHECK-V6M-NEXT: lsls r0, r0, #3
; CHECK-V6M-NEXT: bx lr
entry:
%load = load i16, ptr %p, align 2
%zext = zext i16 %load to i32
%lshr = lshr i32 %zext, 5
%and = and i32 %lshr, 2040
ret i32 %and
}
4 changes: 3 additions & 1 deletion llvm/test/CodeGen/X86/h-registers-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ define i32 @foo(ptr %x, i32 %y) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: imull %eax, %eax
; CHECK-NEXT: movzbl %ah, %eax
; CHECK-NEXT: movb $77, (%ecx,%eax,8)
; CHECK-NEXT: shll $3, %eax
; CHECK-NEXT: retl

%t0 = lshr i32 %y, 8 ; <i32> [#uses=1]
%t4 = mul i32 %y, %y
%t0 = lshr i32 %t4, 8 ; <i32> [#uses=1]
%t1 = and i32 %t0, 255 ; <i32> [#uses=2]
%t2 = shl i32 %t1, 3
%t3 = getelementptr i8, ptr %x, i32 %t2 ; <ptr> [#uses=1]
Expand Down