llvm · paulwalker-arm · May 20, 2025 · May 15, 2025 · May 15, 2025 · May 15, 2025
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -149,6 +149,10 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
     cl::desc("DAG combiner enable load/<replace bytes>/store with "
              "a narrower store"));
 
+static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
+                                     cl::init(false),
+                                     cl::desc("Disable the DAG combiner"));
+
 namespace {
 
   class DAGCombiner {
@@ -248,7 +252,8 @@ namespace {
           STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
           BatchAA(BatchAA) {
       ForCodeSize = DAG.shouldOptForSize();
-      DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
+      DisableGenericCombines =
+          DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
 
       MaximumLegalStoreInBits = 0;
       // We use the minimum store size here, since that's all we can guarantee

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7259,20 +7259,34 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
 // FSHL is converted to FSHR before deciding what to do with it
 static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
   SDValue Shifts = Op.getOperand(2);
-  // Check if the shift amount is a constant
+  // Check if the shift amount is a constant and normalise to [0, SrcBitLen)
   // If opcode is FSHL, convert it to FSHR
   if (auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
     SDLoc DL(Op);
     MVT VT = Op.getSimpleValueType();
+    unsigned int NewShiftNo = ShiftNo->getZExtValue() % VT.getFixedSizeInBits();
 
     if (Op.getOpcode() == ISD::FSHL) {
-      unsigned int NewShiftNo =
-          VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
+      if (NewShiftNo == 0)
+        return Op.getOperand(0);
+
+      NewShiftNo = VT.getFixedSizeInBits() - NewShiftNo;
+      return DAG.getNode(
+          ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
+          DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
+    }
+
+    if (Op.getOpcode() == ISD::FSHR) {
+      if (NewShiftNo == 0)
+        return Op.getOperand(1);
+
+      if (ShiftNo->getZExtValue() == NewShiftNo)
+        return Op;
+
+      // Rewrite using the normalised shift amount.
       return DAG.getNode(
           ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
           DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
-    } else if (Op.getOpcode() == ISD::FSHR) {
-      return Op;
     }
   }
 

diff --git a/llvm/test/CodeGen/AArch64/fsh-combiner-disabled.ll b/llvm/test/CodeGen/AArch64/fsh-combiner-disabled.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc %s -o - | FileCheck %s
+; RUN: llc -combiner-disabled %s -o - | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Verify lowering code in isolation to ensure we can lower shifts that would
+; normally be optimised away.
+
+define i32 @fshl_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshl_i32_by_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 0)
+  ret i32 %r
+}
+
+define i32 @fshl_i32_by_half_srclen(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshl_i32_by_half_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr w0, w1, w2, #16
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 16)
+  ret i32 %r
+}
+
+define i32 @fshl_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshl_i32_by_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 32)
+  ret i32 %r
+}
+
+define i32 @fshl_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshl_i32_by_srclen_plus1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr w0, w1, w2, #31
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 33)
+  ret i32 %r
+}
+
+define i64 @fshl_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshl_i64_by_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 0)
+  ret i64 %r
+}
+
+define i64 @fshl_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshl_i64_by_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 64)
+  ret i64 %r
+}
+
+define i64 @fshl_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshl_i64_by_srclen_plus1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr x0, x1, x2, #63
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 65)
+  ret i64 %r
+}
+
+define i32 @fshr_i32_by_zero(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshr_i32_by_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, w2
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 0)
+  ret i32 %r
+}
+
+define i32 @fshr_i32_by_srclen(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshr_i32_by_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w0, w2
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 32)
+  ret i32 %r
+}
+
+define i32 @fshr_i32_by_half_srclen(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshr_i32_by_half_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr w0, w1, w2, #16
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 16)
+  ret i32 %r
+}
+
+define i32 @fshr_i32_by_srclen_plus1(i32 %unused, i32 %a, i32 %b) {
+; CHECK-LABEL: fshr_i32_by_srclen_plus1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr w0, w1, w2, #1
+; CHECK-NEXT:    ret
+  %r = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 33)
+  ret i32 %r
+}
+
+define i64 @fshr_i64_by_zero(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshr_i64_by_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 0)
+  ret i64 %r
+}
+
+define i64 @fshr_i64_by_srclen(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshr_i64_by_srclen:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 64)
+  ret i64 %r
+}
+
+define i64 @fshr_i64_by_srclen_plus1(i64 %unused, i64 %a, i64 %b) {
+; CHECK-LABEL: fshr_i64_by_srclen_plus1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    extr x0, x1, x2, #1
+; CHECK-NEXT:    ret
+  %r = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 65)
+  ret i64 %r
+}