[PowerPC] Check value uses in ValueBit tracking

ecnelises · ecnelises · commit a5d5588093af · 2023-09-12T10:49:03.000+08:00
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1629,30 +1629,41 @@ class BitPermutationSelector {
     bool &Interesting = ValueEntry->first;
     SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
     Bits.resize(NumBits);
+    SDValue LHS = V.getNumOperands() > 0 ? V.getOperand(0) : SDValue();
+    SDValue RHS = V.getNumOperands() > 1 ? V.getOperand(1) : SDValue();
 
     switch (V.getOpcode()) {
     default: break;
     case ISD::ROTL:
-      if (isa<ConstantSDNode>(V.getOperand(1))) {
+      if (isa<ConstantSDNode>(RHS)) {
         unsigned RotAmt = V.getConstantOperandVal(1);
 
-        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
-
-        for (unsigned i = 0; i < NumBits; ++i)
-          Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
+        if (LHS.hasOneUse()) {
+          const auto &LHSBits = *getValueBits(LHS, NumBits).second;
+          for (unsigned i = 0; i < NumBits; ++i)
+            Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
+        } else {
+          for (unsigned i = 0; i < NumBits; ++i)
+            Bits[i] =
+                ValueBit(LHS, i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt);
+        }
 
         return std::make_pair(Interesting = true, &Bits);
       }
       break;
     case ISD::SHL:
     case PPCISD::SHL:
-      if (isa<ConstantSDNode>(V.getOperand(1))) {
+      if (isa<ConstantSDNode>(RHS)) {
         unsigned ShiftAmt = V.getConstantOperandVal(1);
 
-        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
-
-        for (unsigned i = ShiftAmt; i < NumBits; ++i)
-          Bits[i] = LHSBits[i - ShiftAmt];
+        if (LHS.hasOneUse()) {
+          const auto &LHSBits = *getValueBits(LHS, NumBits).second;
+          for (unsigned i = ShiftAmt; i < NumBits; ++i)
+            Bits[i] = LHSBits[i - ShiftAmt];
+        } else {
+          for (unsigned i = ShiftAmt; i < NumBits; ++i)
+            Bits[i] = ValueBit(LHS, i - ShiftAmt);
+        }
 
         for (unsigned i = 0; i < ShiftAmt; ++i)
           Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1662,13 +1673,17 @@ class BitPermutationSelector {
       break;
     case ISD::SRL:
     case PPCISD::SRL:
-      if (isa<ConstantSDNode>(V.getOperand(1))) {
+      if (isa<ConstantSDNode>(RHS)) {
         unsigned ShiftAmt = V.getConstantOperandVal(1);
 
-        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
-
-        for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
-          Bits[i] = LHSBits[i + ShiftAmt];
+        if (LHS.hasOneUse()) {
+          const auto &LHSBits = *getValueBits(LHS, NumBits).second;
+          for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
+            Bits[i] = LHSBits[i + ShiftAmt];
+        } else {
+          for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
+            Bits[i] = ValueBit(LHS, i + ShiftAmt);
+        }
 
         for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
           Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1677,23 +1692,27 @@ class BitPermutationSelector {
       }
       break;
     case ISD::AND:
-      if (isa<ConstantSDNode>(V.getOperand(1))) {
+      if (isa<ConstantSDNode>(RHS)) {
         uint64_t Mask = V.getConstantOperandVal(1);
 
-        const SmallVector<ValueBit, 64> *LHSBits;
+        const SmallVector<ValueBit, 64> *LHSBits = nullptr;
         // Mark this as interesting, only if the LHS was also interesting. This
         // prevents the overall procedure from matching a single immediate 'and'
         // (which is non-optimal because such an and might be folded with other
         // things if we don't select it here).
-        std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
+        if (LHS.hasOneUse())
+          std::tie(Interesting, LHSBits) = getValueBits(LHS, NumBits);
 
         for (unsigned i = 0; i < NumBits; ++i)
-          if (((Mask >> i) & 1) == 1)
-            Bits[i] = (*LHSBits)[i];
-          else {
+          if (((Mask >> i) & 1) == 1) {
+            if (LHS.hasOneUse())
+              Bits[i] = (*LHSBits)[i];
+            else
+              Bits[i] = ValueBit(LHS, i);
+          } else {
             // AND instruction masks this bit. If the input is already zero,
             // we have nothing to do here. Otherwise, make the bit ConstZero.
-            if ((*LHSBits)[i].isZero())
+            if (LHS.hasOneUse() && (*LHSBits)[i].isZero())
               Bits[i] = (*LHSBits)[i];
             else
               Bits[i] = ValueBit(ValueBit::ConstZero);
@@ -1703,44 +1722,54 @@ class BitPermutationSelector {
       }
       break;
     case ISD::OR: {
-      const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
-      const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
+      const auto *LHSBits =
+          LHS.hasOneUse() ? getValueBits(LHS, NumBits).second : nullptr;
+      const auto *RHSBits =
+          RHS.hasOneUse() ? getValueBits(RHS, NumBits).second : nullptr;
 
       bool AllDisjoint = true;
       SDValue LastVal = SDValue();
       unsigned LastIdx = 0;
       for (unsigned i = 0; i < NumBits; ++i) {
-        if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
+        if (LHSBits && RHSBits && (*LHSBits)[i].isZero() &&
+            (*RHSBits)[i].isZero()) {
           // If both inputs are known to be zero and one is ConstZero and
           // another is VariableKnownToBeZero, we can select whichever
           // we like. To minimize the number of bit groups, we select
           // VariableKnownToBeZero if this bit is the next bit of the same
           // input variable from the previous bit. Otherwise, we select
           // ConstZero.
-          if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
-              LHSBits[i].getValueBitIndex() == LastIdx + 1)
-            Bits[i] = LHSBits[i];
-          else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
-                   RHSBits[i].getValueBitIndex() == LastIdx + 1)
-            Bits[i] = RHSBits[i];
+          const auto &LBits = *LHSBits;
+          const auto &RBits = *RHSBits;
+          if (LBits[i].hasValue() && LBits[i].getValue() == LastVal &&
+              LBits[i].getValueBitIndex() == LastIdx + 1)
+            Bits[i] = LBits[i];
+          else if (RBits[i].hasValue() && RBits[i].getValue() == LastVal &&
+                   RBits[i].getValueBitIndex() == LastIdx + 1)
+            Bits[i] = RBits[i];
           else
             Bits[i] = ValueBit(ValueBit::ConstZero);
-        }
-        else if (LHSBits[i].isZero())
-          Bits[i] = RHSBits[i];
-        else if (RHSBits[i].isZero())
-          Bits[i] = LHSBits[i];
-        else {
+        } else if (LHSBits && (*LHSBits)[i].isZero()) {
+          if (RHSBits)
+            Bits[i] = (*RHSBits)[i];
+          else
+            Bits[i] = ValueBit(RHS, i);
+        } else if (RHSBits && (*RHSBits)[i].isZero()) {
+          if (LHSBits)
+            Bits[i] = (*LHSBits)[i];
+          else
+            Bits[i] = ValueBit(LHS, i);
+        } else {
           AllDisjoint = false;
           break;
         }
         // We remember the value and bit index of this bit.
         if (Bits[i].hasValue()) {
           LastVal = Bits[i].getValue();
           LastIdx = Bits[i].getValueBitIndex();
-        }
-        else {
-          if (LastVal) LastVal = SDValue();
+        } else {
+          if (LastVal)
+            LastVal = SDValue();
           LastIdx = 0;
         }
       }
@@ -1752,33 +1781,34 @@ class BitPermutationSelector {
     }
     case ISD::ZERO_EXTEND: {
       // We support only the case with zero extension from i32 to i64 so far.
-      if (V.getValueType() != MVT::i64 ||
-          V.getOperand(0).getValueType() != MVT::i32)
+      if (V.getValueType() != MVT::i64 || LHS.getValueType() != MVT::i32)
         break;
 
-      const SmallVector<ValueBit, 64> *LHSBits;
       const unsigned NumOperandBits = 32;
-      std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
-                                                    NumOperandBits);
-
-      for (unsigned i = 0; i < NumOperandBits; ++i)
-        Bits[i] = (*LHSBits)[i];
+      if (LHS.hasOneUse()) {
+        const SmallVector<ValueBit, 64> *LHSBits;
+        std::tie(Interesting, LHSBits) = getValueBits(LHS, NumOperandBits);
+        for (unsigned i = 0; i < NumOperandBits; ++i)
+          Bits[i] = (*LHSBits)[i];
+      } else {
+        for (unsigned i = 0; i < NumOperandBits; ++i)
+          Bits[i] = ValueBit(LHS, i);
+      }
 
       for (unsigned i = NumOperandBits; i < NumBits; ++i)
         Bits[i] = ValueBit(ValueBit::ConstZero);
 
       return std::make_pair(Interesting, &Bits);
     }
     case ISD::TRUNCATE: {
-      EVT FromType = V.getOperand(0).getValueType();
+      EVT FromType = LHS.getValueType();
       EVT ToType = V.getValueType();
       // We support only the case with truncate from i64 to i32.
-      if (FromType != MVT::i64 || ToType != MVT::i32)
+      if (FromType != MVT::i64 || ToType != MVT::i32 || !LHS.hasOneUse())
         break;
       const unsigned NumAllBits = FromType.getSizeInBits();
       SmallVector<ValueBit, 64> *InBits;
-      std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
-                                                    NumAllBits);
+      std::tie(Interesting, InBits) = getValueBits(LHS, NumAllBits);
       const unsigned NumValidBits = ToType.getSizeInBits();
 
       // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
@@ -1801,22 +1831,28 @@ class BitPermutationSelector {
       // For AssertZext, we look through the operand and
       // mark the bits known to be zero.
       const SmallVector<ValueBit, 64> *LHSBits;
-      std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
-                                                    NumBits);
 
-      EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
+      EVT FromType = cast<VTSDNode>(RHS)->getVT();
       const unsigned NumValidBits = FromType.getSizeInBits();
-      for (unsigned i = 0; i < NumValidBits; ++i)
-        Bits[i] = (*LHSBits)[i];
 
       // These bits are known to be zero but the AssertZext may be from a value
       // that already has some constant zero bits (i.e. from a masking and).
-      for (unsigned i = NumValidBits; i < NumBits; ++i)
-        Bits[i] = (*LHSBits)[i].hasValue()
-                      ? ValueBit((*LHSBits)[i].getValue(),
-                                 (*LHSBits)[i].getValueBitIndex(),
-                                 ValueBit::VariableKnownToBeZero)
-                      : ValueBit(ValueBit::ConstZero);
+      if (LHS.hasOneUse()) {
+        std::tie(Interesting, LHSBits) = getValueBits(LHS, NumBits);
+        for (unsigned i = 0; i < NumValidBits; ++i)
+          Bits[i] = (*LHSBits)[i];
+        for (unsigned i = NumValidBits; i < NumBits; ++i)
+          Bits[i] = (*LHSBits)[i].hasValue()
+                        ? ValueBit((*LHSBits)[i].getValue(),
+                                   (*LHSBits)[i].getValueBitIndex(),
+                                   ValueBit::VariableKnownToBeZero)
+                        : ValueBit(ValueBit::ConstZero);
+      } else {
+        for (unsigned i = 0; i < NumValidBits; ++i)
+          Bits[i] = ValueBit(LHS, i);
+        for (unsigned i = NumValidBits; i < NumBits; ++i)
+          Bits[i] = ValueBit(LHS, i, ValueBit::VariableKnownToBeZero);
+      }
 
       return std::make_pair(Interesting, &Bits);
     }
diff --git a/llvm/test/CodeGen/PowerPC/int128_ldst.ll b/llvm/test/CodeGen/PowerPC/int128_ldst.ll
@@ -208,11 +208,10 @@ entry:
 define dso_local i128 @ld_or2___int128___int128(i64 %ptr, i8 zeroext %off) {
 ; CHECK-LABEL: ld_or2___int128___int128:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    rldicr 5, 3, 0, 51
-; CHECK-NEXT:    rotldi 6, 3, 52
-; CHECK-NEXT:    ldx 3, 5, 4
-; CHECK-NEXT:    rldimi 4, 6, 12, 0
-; CHECK-NEXT:    ld 4, 8(4)
+; CHECK-NEXT:    rldicr 3, 3, 0, 51
+; CHECK-NEXT:    or 5, 3, 4
+; CHECK-NEXT:    ldx 3, 3, 4
+; CHECK-NEXT:    ld 4, 8(5)
 ; CHECK-NEXT:    blr
 entry:
   %and = and i64 %ptr, -4096
@@ -740,11 +739,10 @@ entry:
 define dso_local void @st_or2__int128___int128(i64 %ptr, i8 zeroext %off, i128 %str) {
 ; CHECK-LABEL: st_or2__int128___int128:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    rldicr 7, 3, 0, 51
-; CHECK-NEXT:    rotldi 3, 3, 52
-; CHECK-NEXT:    stdx 5, 7, 4
-; CHECK-NEXT:    rldimi 4, 3, 12, 0
-; CHECK-NEXT:    std 6, 8(4)
+; CHECK-NEXT:    rldicr 3, 3, 0, 51
+; CHECK-NEXT:    or 7, 3, 4
+; CHECK-NEXT:    stdx 5, 3, 4
+; CHECK-NEXT:    std 6, 8(7)
 ; CHECK-NEXT:    blr
 entry:
   %and = and i64 %ptr, -4096
diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
@@ -639,9 +639,9 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbzu r0, 1(r5)
 ; CHECK-NEXT:    mulli r29, r0, 171
-; CHECK-NEXT:    rlwinm r28, r29, 24, 8, 30
-; CHECK-NEXT:    srwi r29, r29, 9
-; CHECK-NEXT:    add r29, r29, r28
+; CHECK-NEXT:    srwi r28, r29, 9
+; CHECK-NEXT:    rlwinm r29, r29, 24, 8, 30
+; CHECK-NEXT:    add r29, r28, r29
 ; CHECK-NEXT:    sub r0, r0, r29
 ; CHECK-NEXT:    clrlwi r0, r0, 24
 ; CHECK-NEXT:    cmplwi r0, 1
diff --git a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll
@@ -35,7 +35,7 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) %
 ; CHECK-P9-NEXT:    addi r8, r5, -8
 ; CHECK-P9-NEXT:    lwz r5, 0(r7)
 ; CHECK-P9-NEXT:    extsw r7, r4
-; CHECK-P9-NEXT:    rldic r4, r3, 3, 29
+; CHECK-P9-NEXT:    sldi r4, r3, 3
 ; CHECK-P9-NEXT:    sub r3, r7, r3
 ; CHECK-P9-NEXT:    addi r10, r4, 8
 ; CHECK-P9-NEXT:    lxvdsx vs0, 0, r8
@@ -87,7 +87,7 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) %
 ; CHECK-P10-NEXT:    addi r8, r5, -8
 ; CHECK-P10-NEXT:    lwz r5, 0(r7)
 ; CHECK-P10-NEXT:    extsw r7, r4
-; CHECK-P10-NEXT:    rldic r4, r3, 3, 29
+; CHECK-P10-NEXT:    sldi r4, r3, 3
 ; CHECK-P10-NEXT:    addi r10, r4, 8
 ; CHECK-P10-NEXT:    sub r3, r7, r3
 ; CHECK-P10-NEXT:    lxvdsx vs0, 0, r8
diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix -mcpu=pwr8 | FileCheck %s
+
+define i64 @rldimi1(i64 %a) {
+; CHECK-LABEL: rldimi1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rldimi 3, 3, 8, 0
+; CHECK-NEXT:    blr
+entry:
+  %x0 = shl i64 %a, 8
+  %x1 = and i64 %a, 255
+  %x2 = or i64 %x0, %x1
+  ret i64 %x2
+}
+
+define i64 @rldimi2(i64 %a) {
+; CHECK-LABEL: rldimi2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rldimi 3, 3, 8, 0
+; CHECK-NEXT:    rldimi 3, 3, 16, 0
+; CHECK-NEXT:    blr
+entry:
+  %x0 = shl i64 %a, 8
+  %x1 = and i64 %a, 255
+  %x2 = or i64 %x0, %x1
+  %x3 = shl i64 %x2, 16
+  %x4 = and i64 %x2, 65535
+  %x5 = or i64 %x3, %x4
+  ret i64 %x5
+}
+
+define i64 @rldimi3(i64 %a) {
+; CHECK-LABEL: rldimi3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    rldimi 3, 3, 8, 0
+; CHECK-NEXT:    rldimi 3, 3, 16, 0
+; CHECK-NEXT:    rlwinm 3, 3, 0, 1, 0
+; CHECK-NEXT:    blr
+entry:
+  %0 = shl i64 %a, 8
+  %1 = and i64 %a, 255
+  %2 = or i64 %0, %1
+  %3 = shl i64 %2, 16
+  %4 = and i64 %2, 65535
+  %5 = or i64 %3, %4
+  %6 = shl i64 %5, 32
+  %7 = and i64 %5, 4294967295
+  %8 = or i64 %6, %7
+  ret i64 %8
+}