Skip to content

Commit 79ed012

Browse files
committed
wip
Finishing SetBuilderImpl Fix HashSetBuilder#isAliased and add property tests Do not create temporary empty root node when copyElems() in HashMap/SetBuilder Revert HashCollisionSetNode to using Vectors instead of Arrays Fix hashset#subsetOf Set4#buildTo is private[immutable]
1 parent c870162 commit 79ed012

File tree

4 files changed

+306
-33
lines changed

4 files changed

+306
-33
lines changed

library/src/scala/collection/immutable/ChampHashMap.scala

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -585,19 +585,15 @@ private final class BitmapIndexedMapNode[K, +V](
585585

586586
override def copy(): BitmapIndexedMapNode[K, V] = {
587587
val contentClone = new Array[Any](content.length)
588-
var i = 0
589-
val dataIndices = bitCount(dataMap) * 2
590-
while (i < dataIndices) {
591-
contentClone(i) = content(i)
592-
i += 1
593-
}
588+
val dataIndices = bitCount(dataMap) * TupleLength
589+
Array.copy(content, 0, contentClone, 0, dataIndices)
590+
var i = dataIndices
594591
while (i < content.length) {
595592
contentClone(i) = content(i).asInstanceOf[MapNode[K, V]].copy()
596593
i += 1
597594
}
598595
new BitmapIndexedMapNode[K, V](dataMap, nodeMap, contentClone, originalHashes.clone(), size)
599596
}
600-
601597
}
602598

603599
private final class HashCollisionMapNode[K, +V ](
@@ -987,7 +983,6 @@ private[immutable] final class HashMapBuilder[K, V] extends Builder[(K, V), Hash
987983

988984
/** Copy elements to new mutable structure */
989985
private def copyElems(): Unit = {
990-
aliased = null
991986
rootNode = rootNode.copy()
992987
}
993988

library/src/scala/collection/immutable/ChampHashSet.scala

Lines changed: 249 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ import Hashing.improve
99
import java.lang.Integer.{bitCount, numberOfTrailingZeros}
1010
import java.lang.System.arraycopy
1111

12+
import scala.collection.immutable.Set.Set4
1213
import scala.util.hashing.MurmurHash3
14+
import scala.runtime.Statics.releaseFence
1315

1416
/** This class implements immutable sets using a Compressed Hash-Array Mapped Prefix-tree.
1517
* See paper https://michael.steindorfer.name/publications/oopsla15.pdf for more details.
@@ -26,6 +28,8 @@ final class HashSet[A] private[immutable] (val rootNode: SetNode[A], val cachedJ
2628
with SetOps[A, HashSet, HashSet[A]]
2729
with StrictOptimizedIterableOps[A, HashSet, HashSet[A]] {
2830

31+
releaseFence()
32+
2933
override def iterableFactory: IterableFactory[HashSet] = HashSet
3034

3135
override def knownSize: Int = rootNode.size
@@ -68,6 +72,13 @@ final class HashSet[A] private[immutable] (val rootNode: SetNode[A], val cachedJ
6872
else this
6973
}
7074

75+
override def concat(that: IterableOnce[A]): HashSet[A] = {
76+
val builder = iterableFactory.newBuilder[A]
77+
builder ++= this
78+
builder ++= that
79+
builder.result()
80+
}
81+
7182
override def tail: HashSet[A] = this - head
7283

7384
override def init: HashSet[A] = this - last
@@ -142,9 +153,15 @@ private[immutable] sealed abstract class SetNode[A] extends Node[SetNode[A]] {
142153

143154
def subsetOf(that: SetNode[A], shift: Int): Boolean
144155

156+
def copy(): SetNode[A]
145157
}
146158

147-
private final class BitmapIndexedSetNode[A](val dataMap: Int, val nodeMap: Int, val content: Array[Any], val originalHashes: Array[Int], val size: Int) extends SetNode[A] {
159+
private final class BitmapIndexedSetNode[A](
160+
var dataMap: Int,
161+
var nodeMap: Int,
162+
var content: Array[Any],
163+
var originalHashes: Array[Int],
164+
var size: Int) extends SetNode[A] {
148165

149166
import Node._
150167
import SetNode._
@@ -492,8 +509,8 @@ private final class BitmapIndexedSetNode[A](val dataMap: Int, val nodeMap: Int,
492509
(this eq node) ||
493510
(this.nodeMap == node.nodeMap) &&
494511
(this.dataMap == node.dataMap) &&
495-
java.util.Arrays.equals(this.originalHashes, node.originalHashes) &&
496-
deepContentEquality(this.content, node.content, content.length)
512+
java.util.Arrays.equals(this.originalHashes, node.originalHashes) &&
513+
deepContentEquality(this.content, node.content, content.length)
497514
case _ => false
498515
}
499516

@@ -516,13 +533,24 @@ private final class BitmapIndexedSetNode[A](val dataMap: Int, val nodeMap: Int,
516533
override def hashCode(): Int =
517534
throw new UnsupportedOperationException("Trie nodes do not support hashing.")
518535

536+
override def copy(): BitmapIndexedSetNode[A] = {
537+
val contentClone = new Array[Any](content.length)
538+
val dataIndices = bitCount(dataMap)
539+
Array.copy(content, 0, contentClone, 0, dataIndices)
540+
var i = dataIndices
541+
while (i < content.length) {
542+
contentClone(i) = content(i).asInstanceOf[SetNode[A]].copy()
543+
i += 1
544+
}
545+
new BitmapIndexedSetNode[A](dataMap, nodeMap, contentClone, originalHashes.clone(), size)
546+
}
519547
}
520548

521-
private final class HashCollisionSetNode[A](val originalHash: Int, val hash: Int, val content: Vector[A]) extends SetNode[A] {
549+
private final class HashCollisionSetNode[A](val originalHash: Int, val hash: Int, var content: Vector[Any]) extends SetNode[A] {
522550

523551
import Node._
524552

525-
require(content.size >= 2)
553+
require(content.length >= 2)
526554

527555
def contains(element: A, originalHash: Int, hash: Int, shift: Int): Boolean =
528556
this.hash == hash && content.contains(element)
@@ -549,7 +577,7 @@ private final class HashCollisionSetNode[A](val originalHash: Int, val hash: Int
549577
// assert(updatedContent.size == content.size - 1)
550578

551579
updatedContent.size match {
552-
case 1 => new BitmapIndexedSetNode[A](bitposFrom(maskFrom(hash, 0)), 0, updatedContent.toArray, Array(originalHash), 1)
580+
case 1 => new BitmapIndexedSetNode[A](bitposFrom(maskFrom(hash, 0)), 0, Array(updatedContent(0)), Array(originalHash), 1)
553581
case _ => new HashCollisionSetNode[A](originalHash, hash, updatedContent)
554582
}
555583
}
@@ -563,23 +591,28 @@ private final class HashCollisionSetNode[A](val originalHash: Int, val hash: Int
563591

564592
def hasPayload: Boolean = true
565593

566-
def payloadArity: Int = content.size
594+
def payloadArity: Int = content.length
567595

568-
def getPayload(index: Int): A = content(index)
596+
def getPayload(index: Int): A = content(index).asInstanceOf[A]
569597

570598
override def getHash(index: Int): Int = originalHash
571599

572600
def sizePredicate: Int = SizeMoreThanOne
573601

574-
def size: Int = content.size
602+
def size: Int = content.length
575603

576-
def foreach[U](f: A => U): Unit = content.foreach(f)
604+
def foreach[U](f: A => U): Unit = {
605+
var i = 0
606+
while (i < content.length) {
607+
f(getPayload(i))
608+
i += 1
609+
}
610+
}
577611

578612
def subsetOf(that: SetNode[A], shift: Int): Boolean = if (this eq that) true else that match {
579613
case node: BitmapIndexedSetNode[A] => false
580-
case node: HashCollisionSetNode[A] => {
614+
case node: HashCollisionSetNode[A] =>
581615
this.payloadArity <= node.payloadArity && this.content.forall(node.content.contains)
582-
}
583616
}
584617

585618
override def equals(that: Any): Boolean =
@@ -595,6 +628,8 @@ private final class HashCollisionSetNode[A](val originalHash: Int, val hash: Int
595628
override def hashCode(): Int =
596629
throw new UnsupportedOperationException("Trie nodes do not support hashing.")
597630

631+
override def copy() = new HashCollisionSetNode[A](originalHash, hash, content)
632+
598633
}
599634

600635
private final class SetIterator[A](rootNode: SetNode[A])
@@ -668,16 +703,211 @@ object HashSet extends IterableFactory[HashSet] {
668703
case _ => (newBuilder[A] ++= source).result()
669704
}
670705

671-
def newBuilder[A]: Builder[A, HashSet[A]] =
672-
new ImmutableBuilder[A, HashSet[A]](empty) {
673-
def addOne(element: A): this.type = {
674-
elems = elems + element
675-
this
676-
}
677-
}
706+
def newBuilder[A]: Builder[A, HashSet[A]] = new HashSetBuilder
678707

679708
// scalac generates a `readReplace` method to discard the deserialized state (see https://github.com/scala/bug/issues/10412).
680709
// This prevents it from serializing it in the first place:
681710
private[this] def writeObject(out: ObjectOutputStream): Unit = ()
682711
private[this] def readObject(in: ObjectInputStream): Unit = ()
683712
}
713+
714+
private[collection] final class HashSetBuilder[A] extends Builder[A, HashSet[A]] {
715+
import Node._
716+
import SetNode._
717+
718+
private def newEmptyRootNode = new BitmapIndexedSetNode[A](0, 0, Array(), Array(), 0)
719+
720+
/** The last given out HashSet as a return value of `result()`, if any, otherwise null.
721+
* Indicates that on next add, the elements should be copied to an identical structure, before continuing
722+
* mutations. */
723+
private var aliased: HashSet[A] = _
724+
725+
private def isAliased: Boolean = aliased != null
726+
727+
/** The root node of the partially build hashmap */
728+
private var rootNode: SetNode[A] = newEmptyRootNode
729+
730+
/** The cached hash of the partially-built hashmap */
731+
private var hash: Int = 0
732+
733+
/** Inserts element `elem` into array `as` at index `ix`, shifting right the trailing elems */
734+
private def insertElement(as: Array[Int], ix: Int, elem: Int): Array[Int] = {
735+
if (ix < 0) throw new ArrayIndexOutOfBoundsException
736+
if (ix > as.length) throw new ArrayIndexOutOfBoundsException
737+
val result = new Array[Int](as.length + 1)
738+
arraycopy(as, 0, result, 0, ix)
739+
result(ix) = elem
740+
arraycopy(as, ix, result, ix + 1, as.length - ix)
741+
result
742+
}
743+
744+
/** Inserts key-value into the bitmapIndexMapNode. Requires that this is a new key-value pair */
745+
private def insertValue[A1 >: A](bm: BitmapIndexedSetNode[A], bitpos: Int, key: A, originalHash: Int, keyHash: Int): Unit = {
746+
val dataIx = bm.dataIndex(bitpos)
747+
val idx = TupleLength * dataIx
748+
749+
val src = bm.content
750+
val dst = new Array[Any](src.length + TupleLength)
751+
752+
// copy 'src' and insert 2 element(s) at position 'idx'
753+
arraycopy(src, 0, dst, 0, idx)
754+
dst(idx) = key
755+
arraycopy(src, idx, dst, idx + TupleLength, src.length - idx)
756+
757+
val dstHashes = insertElement(bm.originalHashes, dataIx, originalHash)
758+
759+
bm.dataMap = bm.dataMap | bitpos
760+
bm.content = dst
761+
bm.originalHashes = dstHashes
762+
bm.size += 1
763+
}
764+
765+
/** Removes element at index `ix` from array `as`, shifting the trailing elements right */
766+
private def removeElement(as: Array[Int], ix: Int): Array[Int] = {
767+
if (ix < 0) throw new ArrayIndexOutOfBoundsException
768+
if (ix > as.length - 1) throw new ArrayIndexOutOfBoundsException
769+
val result = new Array[Int](as.length - 1)
770+
arraycopy(as, 0, result, 0, ix)
771+
arraycopy(as, ix + 1, result, ix, as.length - ix - 1)
772+
result
773+
}
774+
775+
/** Mutates `bm` to replace inline data at bit position `bitpos` with node `node` */
776+
private def migrateFromInlineToNode(bm: BitmapIndexedSetNode[A], bitpos: Int, node: SetNode[A]): Unit = {
777+
val dataIx = bm.dataIndex(bitpos)
778+
val idxOld = TupleLength * dataIx
779+
val idxNew = bm.content.length - TupleLength - bm.nodeIndex(bitpos)
780+
781+
val src = bm.content
782+
val dst = new Array[Any](src.length - TupleLength + 1)
783+
784+
// copy 'src' and remove 2 element(s) at position 'idxOld' and
785+
// insert 1 element(s) at position 'idxNew'
786+
// assert(idxOld <= idxNew)
787+
arraycopy(src, 0, dst, 0, idxOld)
788+
arraycopy(src, idxOld + TupleLength, dst, idxOld, idxNew - idxOld)
789+
dst(idxNew) = node
790+
arraycopy(src, idxNew + TupleLength, dst, idxNew + 1, src.length - idxNew - TupleLength)
791+
792+
val dstHashes = removeElement(bm.originalHashes, dataIx)
793+
794+
bm.dataMap ^= bitpos
795+
bm.nodeMap |= bitpos
796+
bm.content = dst
797+
bm.originalHashes = dstHashes
798+
bm.size = bm.size - 1 + node.size
799+
}
800+
801+
/** Mutates `bm` to replace inline data at bit position `bitpos` with updated key/value */
802+
private def setValue[A1 >: A](bm: BitmapIndexedSetNode[A], bitpos: Int, elem: A): Unit = {
803+
val dataIx = bm.dataIndex(bitpos)
804+
val idx = TupleLength * dataIx
805+
bm.content(idx) = elem
806+
}
807+
808+
def update(setNode: SetNode[A], element: A, originalHash: Int, elementHash: Int, shift: Int): Unit =
809+
setNode match {
810+
case bm: BitmapIndexedSetNode[A] =>
811+
val mask = maskFrom(elementHash, shift)
812+
val bitpos = bitposFrom(mask)
813+
814+
if ((bm.dataMap & bitpos) != 0) {
815+
val index = indexFrom(bm.dataMap, mask, bitpos)
816+
val element0 = bm.getPayload(index)
817+
val element0UnimprovedHash = bm.getHash(index)
818+
819+
if (element0UnimprovedHash == originalHash && element0 == element) {
820+
setValue(bm, bitpos, element0)
821+
} else {
822+
val element0Hash = improve(element0UnimprovedHash)
823+
val subNodeNew = bm.mergeTwoKeyValPairs(element0, element0UnimprovedHash, element0Hash, element, originalHash, elementHash, shift + BitPartitionSize)
824+
hash += elementHash
825+
migrateFromInlineToNode(bm, bitpos, subNodeNew)
826+
}
827+
} else if ((bm.nodeMap & bitpos) != 0) {
828+
val index = indexFrom(bm.nodeMap, mask, bitpos)
829+
val subNode = bm.getNode(index)
830+
val beforeSize = subNode.size
831+
update(subNode, element, originalHash, elementHash, shift + BitPartitionSize)
832+
bm.size += subNode.size - beforeSize
833+
} else {
834+
insertValue(bm, bitpos, element, originalHash, elementHash)
835+
hash += elementHash
836+
}
837+
case hc: HashCollisionSetNode[A] =>
838+
val index = hc.content.indexOf(element)
839+
if (index < 0) {
840+
hash += elementHash
841+
hc.content = hc.content.appended(element)
842+
} else {
843+
hc.content = hc.content.updated(index, element)
844+
}
845+
}
846+
847+
/** If currently referencing aliased structure, copy elements to new mutable structure */
848+
private def ensureUnaliased():Unit = {
849+
if (isAliased) copyElems()
850+
aliased = null
851+
}
852+
853+
/** Copy elements to new mutable structure */
854+
private def copyElems(): Unit = {
855+
rootNode = rootNode.copy()
856+
}
857+
858+
override def result(): HashSet[A] =
859+
if (rootNode.size == 0) {
860+
HashSet.empty
861+
} else if (aliased != null) {
862+
aliased
863+
} else {
864+
aliased = new HashSet(rootNode, hash)
865+
releaseFence()
866+
aliased
867+
}
868+
869+
override def addOne(elem: A): this.type = {
870+
ensureUnaliased()
871+
val h = elem.##
872+
val im = improve(h)
873+
update(rootNode, elem, h, im, 0)
874+
this
875+
}
876+
877+
override def addAll(xs: IterableOnce[A]) = {
878+
ensureUnaliased()
879+
xs match {
880+
case hm: HashSet[A] =>
881+
new ChampBaseIterator(hm.rootNode) {
882+
while(hasNext) {
883+
val originalHash = currentValueNode.getHash(currentValueCursor)
884+
update(
885+
setNode = rootNode,
886+
element = currentValueNode.getPayload(currentValueCursor),
887+
originalHash = originalHash,
888+
elementHash = improve(originalHash),
889+
shift = 0
890+
)
891+
currentValueCursor += 1
892+
}
893+
}
894+
case other =>
895+
val it = other.iterator
896+
while(it.hasNext) addOne(it.next())
897+
}
898+
899+
this
900+
}
901+
902+
override def clear(): Unit = {
903+
aliased = null
904+
if (rootNode.size > 0) {
905+
// if rootNode is empty, we will not have given it away anyways, we instead give out the reused Set.empty
906+
rootNode = newEmptyRootNode
907+
}
908+
hash = 0
909+
}
910+
911+
private[collection] def size: Int = rootNode.size
912+
}
913+

0 commit comments

Comments
 (0)