[AArch64]Combine BFXIL to ORR with right shift for ISD::OR instruction selection

mingmingl-llvm · mingmingl-llvm · commit e4e7bdebb180 · 2022-11-08T11:20:43.000-08:00
- This extends the existing helper function 'isWorthFoldingIntoOrrWithLeftShift' into 'isWorthFoldingIntoOrrWithShift', and encode right-shift imm (the encoding of left-shift imm is no-op). Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D137465
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2803,11 +2803,10 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
   return true;
 }
 
-static bool isWorthFoldingIntoOrrWithLeftShift(SDValue Dst,
-                                               SelectionDAG *CurDAG,
-                                               SDValue &LeftShiftedOperand,
-                                               uint64_t &LeftShiftAmount) {
-  // Avoid folding Dst into ORR-with-left-shift if Dst has other uses than ORR.
+static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
+                                           SDValue &ShiftedOperand,
+                                           uint64_t &ShiftAmount) {
+  // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
   if (!Dst.hasOneUse())
     return false;
 
@@ -2852,23 +2851,32 @@ static bool isWorthFoldingIntoOrrWithLeftShift(SDValue Dst,
                                       VT),
             CurDAG->getTargetConstant(
                 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
-        LeftShiftedOperand = SDValue(UBFMNode, 0);
-        LeftShiftAmount = NumTrailingZeroInShiftedMask;
+        ShiftedOperand = SDValue(UBFMNode, 0);
+        ShiftAmount = NumTrailingZeroInShiftedMask;
         return true;
       }
     }
-  } else if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
-    LeftShiftedOperand = Dst.getOperand(0);
-    LeftShiftAmount = ShlImm;
+    return false;
+  }
+
+  if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
+    ShiftedOperand = Dst.getOperand(0);
+    ShiftAmount = ShlImm;
+    return true;
+  }
+
+  uint64_t SrlImm;
+  if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
+    ShiftedOperand = Dst.getOperand(0);
+    ShiftAmount = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
     return true;
   }
-  // FIXME: Extend the implementation to optimize if Dst is an SRL node.
   return false;
 }
 
-static bool tryOrrWithLeftShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
-                                SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
-                                const bool BiggerPattern) {
+static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
+                            SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
+                            const bool BiggerPattern) {
   EVT VT = N->getValueType(0);
   assert((VT == MVT::i32 || VT == MVT::i64) &&
          "Expect result type to be i32 or i64 since N is combinable to BFM");
@@ -2890,13 +2898,13 @@ static bool tryOrrWithLeftShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
       // one node (from Rd), ORR is better since it has higher throughput and
       // smaller latency than BFM on many AArch64 processors (and for the rest
       // ORR is at least as good as BFM).
-      SDValue LeftShiftedOperand;
-      uint64_t LeftShiftAmount;
-      if (isWorthFoldingIntoOrrWithLeftShift(Dst, CurDAG, LeftShiftedOperand,
-                                             LeftShiftAmount)) {
+      SDValue ShiftedOperand;
+      uint64_t ShiftAmount;
+      if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
+                                         ShiftAmount)) {
         unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
-        SDValue Ops[] = {OrOpd0, LeftShiftedOperand,
-                         CurDAG->getTargetConstant(LeftShiftAmount, DL, VT)};
+        SDValue Ops[] = {OrOpd0, ShiftedOperand,
+                         CurDAG->getTargetConstant(ShiftAmount, DL, VT)};
         CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
         return true;
       }
@@ -2907,7 +2915,6 @@ static bool tryOrrWithLeftShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
   assert((!BiggerPattern) && "BiggerPattern should be handled above");
 
   uint64_t ShlImm;
-  // FIXME: Extend the implementation if OrOpd0 is an SRL node.
   if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm) &&
       OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
     unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
@@ -3022,11 +3029,9 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
       Dst = OrOpd1Val;
 
     // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
-    // with left-shifted operand is more efficient.
-    // FIXME: Extend this to compare AArch64::BFM and AArch64::ORR with
-    // right-shifted operand as well.
-    if (tryOrrWithLeftShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
-                            BiggerPattern))
+    // with shifted operand is more efficient.
+    if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
+                        BiggerPattern))
       return true;
 
     // both parts match
diff --git a/llvm/test/CodeGen/AArch64/fcopysign.ll b/llvm/test/CodeGen/AArch64/fcopysign.ll
@@ -63,8 +63,8 @@ define fp128@copysign1() {
 ; CHECK-NEXT:    ldr w8, [x8, :lo12:val_float]
 ; CHECK-NEXT:    ldrb w9, [sp, #15]
 ; CHECK-NEXT:    and w8, w8, #0x80000000
-; CHECK-NEXT:    lsr w8, w8, #24
-; CHECK-NEXT:    bfxil w8, w9, #0, #7
+; CHECK-NEXT:    and w9, w9, #0x7f
+; CHECK-NEXT:    orr w8, w9, w8, lsr #24
 ; CHECK-NEXT:    strb w8, [sp, #15]
 ; CHECK-NEXT:    ldr q0, [sp], #16
 ; CHECK-NEXT:    ret
@@ -79,8 +79,8 @@ define fp128@copysign1() {
 ; CHECK-NONEON-NEXT:    ldr w8, [x8, :lo12:val_float]
 ; CHECK-NONEON-NEXT:    ldrb w9, [sp, #15]
 ; CHECK-NONEON-NEXT:    and w8, w8, #0x80000000
-; CHECK-NONEON-NEXT:    lsr w8, w8, #24
-; CHECK-NONEON-NEXT:    bfxil w8, w9, #0, #7
+; CHECK-NONEON-NEXT:    and w9, w9, #0x7f
+; CHECK-NONEON-NEXT:    orr w8, w9, w8, lsr #24
 ; CHECK-NONEON-NEXT:    strb w8, [sp, #15]
 ; CHECK-NONEON-NEXT:    ldr q0, [sp], #16
 ; CHECK-NONEON-NEXT:    ret