Skip to content

[LLVM][AArch64] Correctly lower funnel shifts by constants. #140058

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));

static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
cl::init(false),
cl::desc("Disable the DAG combiner"));

namespace {

class DAGCombiner {
Expand Down Expand Up @@ -248,7 +252,8 @@ namespace {
STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
BatchAA(BatchAA) {
ForCodeSize = DAG.shouldOptForSize();
DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
DisableGenericCombines =
DisableCombines || (STI && STI->disableGenericCombines(OptLevel));

MaximumLegalStoreInBits = 0;
// We use the minimum store size here, since that's all we can guarantee
Expand Down
24 changes: 19 additions & 5 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7259,20 +7259,34 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
// FSHL is converted to FSHR before deciding what to do with it
static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
SDValue Shifts = Op.getOperand(2);
// Check if the shift amount is a constant
// Check if the shift amount is a constant and normalise to [0, SrcBitLen)
// If opcode is FSHL, convert it to FSHR
if (auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
unsigned int NewShiftNo = ShiftNo->getZExtValue() % VT.getFixedSizeInBits();

if (Op.getOpcode() == ISD::FSHL) {
unsigned int NewShiftNo =
VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
if (NewShiftNo == 0)
return Op.getOperand(0);

NewShiftNo = VT.getFixedSizeInBits() - NewShiftNo;
return DAG.getNode(
ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
}

if (Op.getOpcode() == ISD::FSHR) {
if (NewShiftNo == 0)
return Op.getOperand(1);

if (ShiftNo->getZExtValue() == NewShiftNo)
return Op;

// Rewrite using the normalised shift amount.
return DAG.getNode(
ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
} else if (Op.getOpcode() == ISD::FSHR) {
return Op;
}
}

Expand Down
134 changes: 134 additions & 0 deletions llvm/test/CodeGen/AArch64/fsh-combiner-disabled.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc %s -o - | FileCheck %s
; RUN: llc -combiner-disabled %s -o - | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

; Verify lowering code in isolation to ensure we can lower shifts that would
; normally be optimised away.

define i32 @fshl_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
; CHECK-LABEL: fshl_i32_by_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, w1
; CHECK-NEXT: ret
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 0)
ret i32 %r
}

define i32 @fshl_i32_by_half_srclen(i32 %unused, i32 %a, i32 %b) {
; CHECK-LABEL: fshl_i32_by_half_srclen:
; CHECK: // %bb.0:
; CHECK-NEXT: extr w0, w1, w2, #16
; CHECK-NEXT: ret
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 16)
ret i32 %r
}

define i32 @fshl_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
; CHECK-LABEL: fshl_i32_by_srclen:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, w1
; CHECK-NEXT: ret
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 32)
ret i32 %r
}

define i32 @fshl_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
; CHECK-LABEL: fshl_i32_by_srclen_plus1:
; CHECK: // %bb.0:
; CHECK-NEXT: extr w0, w1, w2, #31
; CHECK-NEXT: ret
%r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 33)
ret i32 %r
}

define i64 @fshl_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
; CHECK-LABEL: fshl_i64_by_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: ret
%r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 0)
ret i64 %r
}

define i64 @fshl_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
; CHECK-LABEL: fshl_i64_by_srclen:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: ret
%r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 64)
ret i64 %r
}

define i64 @fshl_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
; CHECK-LABEL: fshl_i64_by_srclen_plus1:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x0, x1, x2, #63
; CHECK-NEXT: ret
%r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 65)
ret i64 %r
}

define i32 @fshr_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
; CHECK-LABEL: fshr_i32_by_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, w2
; CHECK-NEXT: ret
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 0)
ret i32 %r
}

define i32 @fshr_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
; CHECK-LABEL: fshr_i32_by_srclen:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, w2
; CHECK-NEXT: ret
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 32)
ret i32 %r
}

define i32 @fshr_i32_by_half_srclen(i32 %unused, i32 %a, i32 %b) {
; CHECK-LABEL: fshr_i32_by_half_srclen:
; CHECK: // %bb.0:
; CHECK-NEXT: extr w0, w1, w2, #16
; CHECK-NEXT: ret
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 16)
ret i32 %r
}

define i32 @fshr_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
; CHECK-LABEL: fshr_i32_by_srclen_plus1:
; CHECK: // %bb.0:
; CHECK-NEXT: extr w0, w1, w2, #1
; CHECK-NEXT: ret
%r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 33)
ret i32 %r
}

define i64 @fshr_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
; CHECK-LABEL: fshr_i64_by_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, x2
; CHECK-NEXT: ret
%r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 0)
ret i64 %r
}

define i64 @fshr_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
; CHECK-LABEL: fshr_i64_by_srclen:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, x2
; CHECK-NEXT: ret
%r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 64)
ret i64 %r
}

define i64 @fshr_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
; CHECK-LABEL: fshr_i64_by_srclen_plus1:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x0, x1, x2, #1
; CHECK-NEXT: ret
%r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 65)
ret i64 %r
}