@@ -6778,62 +6778,141 @@ void irgen::emitStoreEnumTagToAddress(IRGenFunction &IGF,
6778
6778
.storeTag (IGF, enumTy, enumAddr, theCase);
6779
6779
}
6780
6780
6781
- // / Scatter spare bits from the low bits of an integer value.
6782
- llvm::Value *irgen::emitScatterSpareBits (IRGenFunction &IGF,
6783
- const SpareBitVector &spareBitMask,
6784
- llvm::Value *packedBits,
6785
- unsigned packedLowBit) {
6786
- auto destTy
6787
- = llvm::IntegerType::get (IGF.IGM .getLLVMContext (), spareBitMask.size ());
6781
+ // / Extract the rightmost run of contiguous set bits from the
6782
+ // / provided integer or zero if there are no set bits in the
6783
+ // / provided integer. For example:
6784
+ // /
6785
+ // / rightmostMask(0x0f0f_0f0f) = 0x0000_000f
6786
+ // / rightmostMask(0xf0f0_f0f0) = 0x0000_00f0
6787
+ // / rightmostMask(0xffff_ff10) = 0x0000_0010
6788
+ // / rightmostMask(0xffff_ff80) = 0xffff_ff80
6789
+ // / rightmostMask(0x0000_0000) = 0x0000_0000
6790
+ // /
6791
+ static inline llvm::APInt rightmostMask (const llvm::APInt& mask) {
6792
+ if (mask.isShiftedMask ()) {
6793
+ return mask;
6794
+ }
6795
+ // This formula is derived from the formula to "turn off the
6796
+ // rightmost contiguous string of 1's" in Chapter 2-1 of
6797
+ // Hacker's Delight (Second Edition) by Henry S. Warren and
6798
+ // attributed to Luther Woodrum.
6799
+ llvm::APInt result = -mask;
6800
+ result &= mask; // isolate rightmost set bit
6801
+ result += mask; // clear rightmost contiguous set bits
6802
+ result &= mask; // mask out carry bit leftover from add
6803
+ result ^= mask; // extract desired bits
6804
+ return result;
6805
+ }
6806
+
6807
+ // / Pack masked bits into the low bits of an integer value.
6808
+ // / Equivalent to a parallel bit extract instruction (PEXT),
6809
+ // / although we don't currently emit PEXT directly.
6810
+ llvm::Value *irgen::emitGatherBits (IRGenFunction &IGF,
6811
+ llvm::APInt mask,
6812
+ llvm::Value *source,
6813
+ unsigned resultLowBit,
6814
+ unsigned resultBitWidth) {
6815
+ auto &builder = IGF.Builder ;
6816
+ auto &context = IGF.IGM .getLLVMContext ();
6817
+ assert (mask.getBitWidth () == source->getType ()->getIntegerBitWidth ()
6818
+ && " source and mask must have same width" );
6819
+
6820
+ // The source and mask need to be at least as wide as the result so
6821
+ // that bits can be shifted into the correct position.
6822
+ auto destTy = llvm::IntegerType::get (context, resultBitWidth);
6823
+ if (mask.getBitWidth () < resultBitWidth) {
6824
+ source = builder.CreateZExt (source, destTy);
6825
+ mask = mask.zext (resultBitWidth);
6826
+ }
6827
+
6828
+ // Shift each set of contiguous set bits into position and
6829
+ // accumulate them into the result.
6830
+ int64_t usedBits = resultLowBit;
6788
6831
llvm::Value *result = nullptr ;
6789
- unsigned usedBits = packedLowBit;
6832
+ while (mask != 0 ) {
6833
+ // Isolate the rightmost run of contiguous set bits.
6834
+ // Example: 0b0011_01101_1100 -> 0b0000_0001_1100
6835
+ llvm::APInt partMask = rightmostMask (mask);
6790
6836
6791
- // Expand the packed bits to the destination type.
6792
- packedBits = IGF.Builder .CreateZExtOrTrunc (packedBits, destTy);
6793
-
6794
- auto spareBitEnumeration = spareBitMask.enumerateSetBits ();
6795
- for (auto nextSpareBit = spareBitEnumeration.findNext ();
6796
- nextSpareBit.hasValue ();
6797
- nextSpareBit = spareBitEnumeration.findNext ()) {
6798
- unsigned u = nextSpareBit.getValue (), startBit = u;
6799
- assert (u >= usedBits - packedLowBit
6800
- && " used more bits than we've processed?!" );
6801
-
6802
- // Shift the selected bits into place.
6803
- llvm::Value *newBits;
6804
- if (u > usedBits)
6805
- newBits = IGF.Builder .CreateShl (packedBits, u - usedBits);
6806
- else if (u < usedBits)
6807
- newBits = IGF.Builder .CreateLShr (packedBits, usedBits - u);
6808
- else
6809
- newBits = packedBits;
6810
-
6811
- // See how many consecutive bits we have.
6812
- unsigned numBits = 1 ;
6813
- ++u;
6814
- for (unsigned e = spareBitMask.size (); u < e && spareBitMask[u]; ++u) {
6815
- ++numBits;
6816
- auto nextBit = spareBitEnumeration.findNext (); (void ) nextBit;
6817
- assert (nextBit.hasValue ());
6818
- }
6819
-
6820
- // Mask out the selected bits.
6821
- auto val = APInt::getAllOnesValue (numBits);
6822
- if (numBits < spareBitMask.size ())
6823
- val = val.zext (spareBitMask.size ());
6824
- val = val.shl (startBit);
6825
- auto mask = llvm::ConstantInt::get (IGF.IGM .getLLVMContext (), val);
6826
- newBits = IGF.Builder .CreateAnd (newBits, mask);
6837
+ // Update the bits we need to mask next.
6838
+ mask ^= partMask;
6839
+
6840
+ // Shift the selected bits into position.
6841
+ llvm::Value *part = source;
6842
+ int64_t offset = int64_t (partMask.countTrailingZeros ()) - usedBits;
6843
+ if (offset > 0 ) {
6844
+ uint64_t shift = uint64_t (offset);
6845
+ part = builder.CreateLShr (part, shift);
6846
+ partMask.lshrInPlace (shift);
6847
+ } else if (offset < 0 ) {
6848
+ uint64_t shift = uint64_t (-offset);
6849
+ part = builder.CreateShl (part, shift);
6850
+ partMask <<= shift;
6851
+ }
6852
+
6853
+ // Truncate the output to the result size.
6854
+ if (partMask.getBitWidth () > resultBitWidth) {
6855
+ partMask = partMask.trunc (resultBitWidth);
6856
+ part = builder.CreateTrunc (part, destTy);
6857
+ }
6858
+
6859
+ // Mask out selected bits.
6860
+ part = builder.CreateAnd (part, partMask);
6827
6861
6828
6862
// Accumulate the result.
6829
- if (result)
6830
- result = IGF.Builder .CreateOr (result, newBits);
6831
- else
6832
- result = newBits;
6863
+ result = result ? builder.CreateOr (result, part) : part;
6833
6864
6834
- usedBits += numBits;
6865
+ // Update the offset and remaining mask.
6866
+ usedBits += partMask.countPopulation ();
6835
6867
}
6868
+ return result;
6869
+ }
6870
+
6871
+ // / Unpack bits from the low bits of an integer value and
6872
+ // / move them to the bit positions indicated by the mask.
6873
+ // / Equivalent to a parallel bit deposit instruction (PDEP),
6874
+ // / although we don't currently emit PDEP directly.
6875
+ llvm::Value *irgen::emitScatterBits (IRGenFunction &IGF,
6876
+ llvm::APInt mask,
6877
+ llvm::Value *source,
6878
+ unsigned packedLowBit) {
6879
+ auto &builder = IGF.Builder ;
6880
+ auto &context = IGF.IGM .getLLVMContext ();
6881
+
6882
+ // Expand the packed bits to the destination type.
6883
+ auto destTy = llvm::IntegerType::get (context, mask.getBitWidth ());
6884
+ source = builder.CreateZExtOrTrunc (source, destTy);
6885
+
6886
+ // Shift each set of contiguous set bits into position and
6887
+ // accumulate them into the result.
6888
+ int64_t usedBits = packedLowBit;
6889
+ llvm::Value *result = nullptr ;
6890
+ while (mask != 0 ) {
6891
+ // Isolate the rightmost run of contiguous set bits.
6892
+ // Example: 0b0011_01101_1100 -> 0b0000_0001_1100
6893
+ llvm::APInt partMask = rightmostMask (mask);
6894
+
6895
+ // Update the bits we need to mask next.
6896
+ mask ^= partMask;
6836
6897
6898
+ // Shift the selected bits into position.
6899
+ llvm::Value *part = source;
6900
+ int64_t offset = int64_t (partMask.countTrailingZeros ()) - usedBits;
6901
+ if (offset > 0 ) {
6902
+ part = builder.CreateShl (part, uint64_t (offset));
6903
+ } else if (offset < 0 ) {
6904
+ part = builder.CreateLShr (part, uint64_t (-offset));
6905
+ }
6906
+
6907
+ // Mask out selected bits.
6908
+ part = builder.CreateAnd (part, partMask);
6909
+
6910
+ // Accumulate the result.
6911
+ result = result ? builder.CreateOr (result, part) : part;
6912
+
6913
+ // Update the offset and remaining mask.
6914
+ usedBits += partMask.countPopulation ();
6915
+ }
6837
6916
return result;
6838
6917
}
6839
6918
@@ -6900,8 +6979,8 @@ EnumPayload irgen::interleaveSpareBits(IRGenFunction &IGF,
6900
6979
payloadValue = IGF.Builder .CreateLShr (payloadValue,
6901
6980
llvm::ConstantInt::get (IGF.IGM .Int32Ty , usedBits));
6902
6981
}
6903
- payloadValue = emitScatterSpareBits (IGF, spareBitsChunk,
6904
- payloadValue, 0 );
6982
+ payloadValue = emitScatterBits (IGF, spareBitsChunk. asAPInt () ,
6983
+ payloadValue, 0 );
6905
6984
if (payloadValue->getType () != type) {
6906
6985
if (type->isPointerTy ())
6907
6986
payloadValue = IGF.Builder .CreateIntToPtr (payloadValue, type);
0 commit comments