---

swift-ci · swift-ci · commit 674d2563bf25 · 2019-05-21T16:29:16.000-07:00
yaml --- r: 347895 b: refs/heads/master c: ffa9166 h: refs/heads/master i: 347893: 51f02b5 347891: bf7f285 347887: 0ce3be2
diff --git a/[refs] b/[refs]
@@ -1,5 +1,5 @@
 ---
-refs/heads/master: 598bff9b8306ad74fb4492f9169e217cf78238c3
+refs/heads/master: ffa9166efde979e50181aa9eac14a50cbf3ecaff
 refs/heads/master-next: 203b3026584ecad859eb328b2e12490099409cd5
 refs/tags/osx-passed: b6b74147ef8a386f532cf9357a1bde006e552c54
 refs/tags/swift-2.2-SNAPSHOT-2015-12-01-a: 6bb18e013c2284f2b45f5f84f2df2887dc0f7dea
diff --git a/trunk/include/swift/Basic/ClusteredBitVector.h b/trunk/include/swift/Basic/ClusteredBitVector.h
@@ -539,48 +539,6 @@ class ClusteredBitVector {
     return !any();
   }
 
-  /// A class for scanning for set bits, from low indices to high ones.
-  class SetBitEnumerator {
-    ChunkType CurChunk;
-    const ChunkType *Chunks;
-    unsigned CurChunkIndex;
-    unsigned NumChunks;
-  public:
-    explicit SetBitEnumerator(const ClusteredBitVector &vector) {
-      if (vector.isInlineAndAllClear()) {
-        CurChunkIndex = 0;
-        NumChunks = 0;
-      } else {
-        Chunks = vector.getChunksPtr();
-        CurChunk = Chunks[0];
-        CurChunkIndex = 0;
-        NumChunks = vector.getLengthInChunks();
-      }
-    }
-
-    /// Search for another bit.  Returns false if it can't find one.
-    Optional<size_t> findNext() {
-      if (CurChunkIndex == NumChunks) return None;
-      auto cur = CurChunk;
-      while (!cur) {
-        if (++CurChunkIndex == NumChunks) return None;
-        cur = Chunks[CurChunkIndex];
-      }
-
-      // Find the index of the lowest set bit.
-      size_t bitIndex = llvm::countTrailingZeros(cur, llvm::ZB_Undefined);
-
-      // Clear that bit in the current chunk.
-      CurChunk = cur ^ (ChunkType(1) << bitIndex);
-      assert(!(CurChunk & (ChunkType(1) << bitIndex)));
-
-      return (CurChunkIndex * ChunkSizeInBits + bitIndex);
-    }
-  };
-  SetBitEnumerator enumerateSetBits() const {
-    return SetBitEnumerator(*this);
-  }
-
   friend bool operator==(const ClusteredBitVector &lhs,
                          const ClusteredBitVector &rhs) {
     if (lhs.size() != rhs.size())
diff --git a/trunk/lib/IRGen/EnumPayload.cpp b/trunk/lib/IRGen/EnumPayload.cpp
@@ -696,70 +696,6 @@ EnumPayload::emitApplyOrMask(IRGenFunction &IGF,
   }
 }
 
-/// Gather spare bits into the low bits of a smaller integer value.
-llvm::Value *irgen::emitGatherSpareBits(IRGenFunction &IGF,
-                                        const SpareBitVector &spareBitMask,
-                                        llvm::Value *spareBits,
-                                        unsigned resultLowBit,
-                                        unsigned resultBitWidth) {
-  auto destTy
-    = llvm::IntegerType::get(IGF.IGM.getLLVMContext(), resultBitWidth);
-  unsigned usedBits = resultLowBit;
-  llvm::Value *result = nullptr;
-
-  auto spareBitEnumeration = spareBitMask.enumerateSetBits();
-  for (auto optSpareBit = spareBitEnumeration.findNext();
-       optSpareBit.hasValue() && usedBits < resultBitWidth;
-       optSpareBit = spareBitEnumeration.findNext()) {
-    unsigned u = optSpareBit.getValue();
-    assert(u >= (usedBits - resultLowBit) &&
-           "used more bits than we've processed?!");
-
-    // Shift the bits into place.
-    llvm::Value *newBits;
-    if (u > usedBits)
-      newBits = IGF.Builder.CreateLShr(spareBits, u - usedBits);
-    else if (u < usedBits) {
-      newBits = IGF.Builder.CreateZExtOrTrunc(spareBits, destTy);
-      newBits = IGF.Builder.CreateShl(newBits, usedBits - u);
-    } else
-      newBits = spareBits;
-    newBits = IGF.Builder.CreateZExtOrTrunc(newBits, destTy);
-
-    // See how many consecutive bits we have.
-    unsigned numBits = 1;
-    ++u;
-    // We don't need more bits than the size of the result.
-    unsigned maxBits = resultBitWidth - usedBits;
-    for (unsigned e = spareBitMask.size();
-         u < e && numBits < maxBits && spareBitMask[u];
-         ++u) {
-      ++numBits;
-      (void) spareBitEnumeration.findNext();
-    }
-
-    // Mask out the selected bits.
-    auto val = APInt::getAllOnesValue(numBits);
-    if (numBits < resultBitWidth)
-      val = val.zext(resultBitWidth);
-    val = val.shl(usedBits);
-    auto *mask = llvm::ConstantInt::get(IGF.IGM.getLLVMContext(), val);
-    newBits = IGF.Builder.CreateAnd(newBits, mask);
-
-    // Accumulate the result.
-    if (result)
-      result = IGF.Builder.CreateOr(result, newBits);
-    else
-      result = newBits;
-
-    usedBits += numBits;
-  }
-
-  return result;
-}
-
-
-
 llvm::Value *
 EnumPayload::emitGatherSpareBits(IRGenFunction &IGF,
                                  const SpareBitVector &spareBits,
@@ -798,8 +734,8 @@ EnumPayload::emitGatherSpareBits(IRGenFunction &IGF,
       break;
 
     // Get the spare bits from this part.
-    auto bits = irgen::emitGatherSpareBits(IGF, spareBitsPart,
-                                           v, firstBitOffset, bitWidth);
+    auto bits = irgen::emitGatherBits(IGF, spareBitsPart.asAPInt(),
+                                      v, firstBitOffset, bitWidth);
     firstBitOffset += numBitsInPart;
     
     // Accumulate it into the full set.
diff --git a/trunk/lib/IRGen/ExtraInhabitants.cpp b/trunk/lib/IRGen/ExtraInhabitants.cpp
@@ -29,9 +29,8 @@ static unsigned getNumLowObjCReservedBits(const IRGenModule &IGM) {
     return 0;
 
   // Get the index of the first non-reserved bit.
-  SpareBitVector ObjCMask = IGM.TargetInfo.ObjCPointerReservedBits;
-  ObjCMask.flipAll();
-  return ObjCMask.enumerateSetBits().findNext().getValue();
+  auto &mask = IGM.TargetInfo.ObjCPointerReservedBits;
+  return mask.asAPInt().countTrailingOnes();
 }
 
 /*****************************************************************************/
diff --git a/trunk/lib/IRGen/GenEnum.cpp b/trunk/lib/IRGen/GenEnum.cpp
@@ -6778,62 +6778,141 @@ void irgen::emitStoreEnumTagToAddress(IRGenFunction &IGF,
     .storeTag(IGF, enumTy, enumAddr, theCase);
 }
 
-/// Scatter spare bits from the low bits of an integer value.
-llvm::Value *irgen::emitScatterSpareBits(IRGenFunction &IGF,
-                                         const SpareBitVector &spareBitMask,
-                                         llvm::Value *packedBits,
-                                         unsigned packedLowBit) {
-  auto destTy
-    = llvm::IntegerType::get(IGF.IGM.getLLVMContext(), spareBitMask.size());
+/// Extract the rightmost run of contiguous set bits from the
+/// provided integer or zero if there are no set bits in the
+/// provided integer. For example:
+///
+///   rightmostMask(0x0f0f_0f0f) = 0x0000_000f
+///   rightmostMask(0xf0f0_f0f0) = 0x0000_00f0
+///   rightmostMask(0xffff_ff10) = 0x0000_0010
+///   rightmostMask(0xffff_ff80) = 0xffff_ff80
+///   rightmostMask(0x0000_0000) = 0x0000_0000
+///
+static inline llvm::APInt rightmostMask(const llvm::APInt& mask) {
+  if (mask.isShiftedMask()) {
+    return mask;
+  }
+  // This formula is derived from the formula to "turn off the
+  // rightmost contiguous string of 1's" in Chapter 2-1 of
+  // Hacker's Delight (Second Edition) by Henry S. Warren and
+  // attributed to Luther Woodrum.
+  llvm::APInt result = -mask;
+  result &= mask; // isolate rightmost set bit
+  result += mask; // clear rightmost contiguous set bits
+  result &= mask; // mask out carry bit leftover from add
+  result ^= mask; // extract desired bits
+  return result;
+}
+
+/// Pack masked bits into the low bits of an integer value.
+/// Equivalent to a parallel bit extract instruction (PEXT),
+/// although we don't currently emit PEXT directly.
+llvm::Value *irgen::emitGatherBits(IRGenFunction &IGF,
+                                   llvm::APInt mask,
+                                   llvm::Value *source,
+                                   unsigned resultLowBit,
+                                   unsigned resultBitWidth) {
+  auto &builder = IGF.Builder;
+  auto &context = IGF.IGM.getLLVMContext();
+  assert(mask.getBitWidth() == source->getType()->getIntegerBitWidth()
+    && "source and mask must have same width");
+
+  // The source and mask need to be at least as wide as the result so
+  // that bits can be shifted into the correct position.
+  auto destTy = llvm::IntegerType::get(context, resultBitWidth);
+  if (mask.getBitWidth() < resultBitWidth) {
+    source = builder.CreateZExt(source, destTy);
+    mask = mask.zext(resultBitWidth);
+  }
+
+  // Shift each set of contiguous set bits into position and
+  // accumulate them into the result.
+  int64_t usedBits = resultLowBit;
   llvm::Value *result = nullptr;
-  unsigned usedBits = packedLowBit;
+  while (mask != 0) {
+    // Isolate the rightmost run of contiguous set bits.
+    // Example: 0b0011_01101_1100 -> 0b0000_0001_1100
+    llvm::APInt partMask = rightmostMask(mask);
 
-  // Expand the packed bits to the destination type.
-  packedBits = IGF.Builder.CreateZExtOrTrunc(packedBits, destTy);
-
-  auto spareBitEnumeration = spareBitMask.enumerateSetBits();
-  for (auto nextSpareBit = spareBitEnumeration.findNext();
-       nextSpareBit.hasValue();
-       nextSpareBit = spareBitEnumeration.findNext()) {
-    unsigned u = nextSpareBit.getValue(), startBit = u;
-    assert(u >= usedBits - packedLowBit
-           && "used more bits than we've processed?!");
-
-    // Shift the selected bits into place.
-    llvm::Value *newBits;
-    if (u > usedBits)
-      newBits = IGF.Builder.CreateShl(packedBits, u - usedBits);
-    else if (u < usedBits)
-      newBits = IGF.Builder.CreateLShr(packedBits, usedBits - u);
-    else
-      newBits = packedBits;
-
-    // See how many consecutive bits we have.
-    unsigned numBits = 1;
-    ++u;
-    for (unsigned e = spareBitMask.size(); u < e && spareBitMask[u]; ++u) {
-      ++numBits;
-      auto nextBit = spareBitEnumeration.findNext(); (void) nextBit;
-      assert(nextBit.hasValue());
-    }
-
-    // Mask out the selected bits.
-    auto val = APInt::getAllOnesValue(numBits);
-    if (numBits < spareBitMask.size())
-      val = val.zext(spareBitMask.size());
-    val = val.shl(startBit);
-    auto mask = llvm::ConstantInt::get(IGF.IGM.getLLVMContext(), val);
-    newBits = IGF.Builder.CreateAnd(newBits, mask);
+    // Update the bits we need to mask next.
+    mask ^= partMask;
+
+    // Shift the selected bits into position.
+    llvm::Value *part = source;
+    int64_t offset = int64_t(partMask.countTrailingZeros()) - usedBits;
+    if (offset > 0) {
+      uint64_t shift = uint64_t(offset);
+      part = builder.CreateLShr(part, shift);
+      partMask.lshrInPlace(shift);
+    } else if (offset < 0) {
+      uint64_t shift = uint64_t(-offset);
+      part = builder.CreateShl(part, shift);
+      partMask <<= shift;
+    }
+
+    // Truncate the output to the result size.
+    if (partMask.getBitWidth() > resultBitWidth) {
+      partMask = partMask.trunc(resultBitWidth);
+      part = builder.CreateTrunc(part, destTy);
+    }
+
+    // Mask out selected bits.
+    part = builder.CreateAnd(part, partMask);
 
     // Accumulate the result.
-    if (result)
-      result = IGF.Builder.CreateOr(result, newBits);
-    else
-      result = newBits;
+    result = result ? builder.CreateOr(result, part) : part;
 
-    usedBits += numBits;
+    // Update the offset and remaining mask.
+    usedBits += partMask.countPopulation();
   }
+  return result;
+}
+
+/// Unpack bits from the low bits of an integer value and
+/// move them to the bit positions indicated by the mask.
+/// Equivalent to a parallel bit deposit instruction (PDEP),
+/// although we don't currently emit PDEP directly.
+llvm::Value *irgen::emitScatterBits(IRGenFunction &IGF,
+                                    llvm::APInt mask,
+                                    llvm::Value *source,
+                                    unsigned packedLowBit) {
+  auto &builder = IGF.Builder;
+  auto &context = IGF.IGM.getLLVMContext();
+
+  // Expand the packed bits to the destination type.
+  auto destTy = llvm::IntegerType::get(context, mask.getBitWidth());
+  source = builder.CreateZExtOrTrunc(source, destTy);
+
+  // Shift each set of contiguous set bits into position and
+  // accumulate them into the result.
+  int64_t usedBits = packedLowBit;
+  llvm::Value *result = nullptr;
+  while (mask != 0) {
+    // Isolate the rightmost run of contiguous set bits.
+    // Example: 0b0011_01101_1100 -> 0b0000_0001_1100
+    llvm::APInt partMask = rightmostMask(mask);
+
+    // Update the bits we need to mask next.
+    mask ^= partMask;
 
+    // Shift the selected bits into position.
+    llvm::Value *part = source;
+    int64_t offset = int64_t(partMask.countTrailingZeros()) - usedBits;
+    if (offset > 0) {
+      part = builder.CreateShl(part, uint64_t(offset));
+    } else if (offset < 0) {
+      part = builder.CreateLShr(part, uint64_t(-offset));
+    }
+
+    // Mask out selected bits.
+    part = builder.CreateAnd(part, partMask);
+
+    // Accumulate the result.
+    result = result ? builder.CreateOr(result, part) : part;
+
+    // Update the offset and remaining mask.
+    usedBits += partMask.countPopulation();
+  }
   return result;
 }
 
@@ -6900,8 +6979,8 @@ EnumPayload irgen::interleaveSpareBits(IRGenFunction &IGF,
         payloadValue = IGF.Builder.CreateLShr(payloadValue,
                        llvm::ConstantInt::get(IGF.IGM.Int32Ty, usedBits));
       }
-      payloadValue = emitScatterSpareBits(IGF, spareBitsChunk,
-                                          payloadValue, 0);
+      payloadValue = emitScatterBits(IGF, spareBitsChunk.asAPInt(),
+                                     payloadValue, 0);
       if (payloadValue->getType() != type) {
         if (type->isPointerTy())
           payloadValue = IGF.Builder.CreateIntToPtr(payloadValue, type);
diff --git a/trunk/lib/IRGen/GenEnum.h b/trunk/lib/IRGen/GenEnum.h
@@ -103,17 +103,22 @@ EnumPayload interleaveSpareBits(IRGenFunction &IGF,
                                 const SpareBitVector &spareBitVector,
                                 llvm::Value *value);
 
-/// Gather spare bits into the low bits of a smaller integer value.
-llvm::Value *emitGatherSpareBits(IRGenFunction &IGF,
-                                 const SpareBitVector &spareBitMask,
-                                 llvm::Value *spareBits,
-                                 unsigned resultLowBit,
-                                 unsigned resultBitWidth);
-/// Scatter spare bits from the low bits of a smaller integer value.
-llvm::Value *emitScatterSpareBits(IRGenFunction &IGF,
-                                  const SpareBitVector &spareBitMask,
-                                  llvm::Value *packedBits,
-                                  unsigned packedLowBit);
+/// Pack masked bits into the low bits of an integer value.
+/// Equivalent to a parallel bit extract instruction (PEXT),
+/// although we don't currently emit PEXT directly.
+llvm::Value *emitGatherBits(IRGenFunction &IGF,
+                            llvm::APInt mask,
+                            llvm::Value *source,
+                            unsigned resultLowBit,
+                            unsigned resultBitWidth);
+/// Unpack bits from the low bits of an integer value and
+/// move them to the bit positions indicated by the mask.
+/// Equivalent to a parallel bit deposit instruction (PDEP),
+/// although we don't currently emit PDEP directly.
+llvm::Value *emitScatterBits(IRGenFunction &IGF,
+                             llvm::APInt mask,
+                             llvm::Value *packedBits,
+                             unsigned packedLowBit);
   
 /// An implementation strategy for an enum, which handles how the enum is
 /// laid out and how to perform TypeInfo operations on values of the enum.
diff --git a/trunk/lib/IRGen/GenType.cpp b/trunk/lib/IRGen/GenType.cpp
diff --git a/trunk/unittests/Basic/ClusteredBitVectorTest.cpp b/trunk/unittests/Basic/ClusteredBitVectorTest.cpp
diff --git a/trunk/utils/test-clustered-bit-vector/generator.cpp b/trunk/utils/test-clustered-bit-vector/generator.cpp