swiftlang · lorentey · Dec 11, 2018 · Dec 11, 2018 · Dec 11, 2018 · Dec 11, 2018
diff --git a/stdlib/public/core/DictionaryBuilder.swift b/stdlib/public/core/DictionaryBuilder.swift
@@ -42,3 +42,134 @@ struct _DictionaryBuilder<Key: Hashable, Value> {
     return Dictionary(_native: _target)
   }
 }
+
+extension Dictionary {
+  /// Creates a new dictionary with the specified capacity, then calls the given
+  /// closure to initialize its contents.
+  ///
+  /// Foundation uses this initializer to bridge the contents of an NSDictionary
+  /// instance without allocating a pair of intermediary buffers.  Pass the
+  /// required capacity and a closure that can intialize the dictionary's
+  /// elements. The closure must return `c`, the number of initialized elements
+  /// in both buffers, such that the elements in the range `0..<c` are
+  /// initialized and the elements in the range `c..<capacity` are
+  /// uninitialized.
+  ///
+  /// The resulting dictionary has a `count` less than or equal to `c`. The
+  /// actual count is less iff some of the initialized keys were duplicates.
+  /// (This cannot happen if `allowingDuplicates` is false.)
+  ///
+  /// The buffers passed to the closure are only valid for the duration of the
+  /// call.  After the closure returns, this initializer moves all initialized
+  /// elements into their correct buckets.
+  ///
+  /// - Parameters:
+  ///   - capacity: The capacity of the new dictionary.
+  ///   - allowingDuplicates: If false, then the caller guarantees that all keys
+  ///     are unique. This promise isn't verified -- if it turns out to be
+  ///     false, then the resulting dictionary won't be valid.
+  ///   - body: A closure that can initialize the dictionary's elements. This
+  ///     closure must return the count of the initialized elements, starting at
+  ///     the beginning of the buffer.
+  @inlinable
+  public // SPI(Foundation)
+  init(
+    _unsafeUninitializedCapacity capacity: Int,
+    allowingDuplicates: Bool,
+    initializingWith initializer: (
+      _ keys: UnsafeMutableBufferPointer<Key>,
+      _ values: UnsafeMutableBufferPointer<Value>,
+      _ initializedCount: inout Int
+    ) -> Void
+  ) {
+    self.init(_native: _NativeDictionary(
+        _unsafeUninitializedCapacity: capacity,
+        allowingDuplicates: allowingDuplicates,
+        initializingWith: initializer))
+  }
+}
+
+extension _NativeDictionary {
+  @inlinable
+  internal init(
+    _unsafeUninitializedCapacity capacity: Int,
+    allowingDuplicates: Bool,
+    initializingWith initializer: (
+      _ keys: UnsafeMutableBufferPointer<Key>,
+      _ values: UnsafeMutableBufferPointer<Value>,
+      _ initializedCount: inout Int
+    ) -> Void
+  ) {
+    self.init(capacity: capacity)
+    var initializedCount = 0
+    initializer(
+      UnsafeMutableBufferPointer(start: _keys, count: capacity),
+      UnsafeMutableBufferPointer(start: _values, count: capacity),
+      &initializedCount)
+    _precondition(initializedCount >= 0 && initializedCount <= capacity)
+    _storage._count = initializedCount
+
+    // Hash initialized elements and move each of them into their correct
+    // buckets.
+    //
+    // - We have some number of unprocessed elements at the start of the
+    //   key/value buffers -- buckets up to and including `bucket`. Everything
+    //   in this region is either unprocessed or in use. There are no
+    //   uninitialized entries in it.
+    //
+    // - Everything after `bucket` is either uninitialized or in use. This
+    //   region works exactly like regular dictionary storage.
+    //
+    // - "in use" is tracked by the bitmap in `hashTable`, the same way it would
+    //   be for a working Dictionary.
+    //
+    // Each iteration of the loop below processes an unprocessed element, and/or
+    // reduces the size of the unprocessed region, while ensuring the above
+    // invariants.
+    var bucket = _HashTable.Bucket(offset: initializedCount - 1)
+    while bucket.offset >= 0 {
+      if hashTable._isOccupied(bucket) {
+        // We've moved an element here in a previous iteration.
+        bucket.offset -= 1
+        continue
+      }
+      // Find the target bucket for this entry and mark it as in use.
+      let target: Bucket
+      if _isDebugAssertConfiguration() || allowingDuplicates {
+        let (b, found) = find(_keys[bucket.offset])
+        if found {
+          _internalInvariant(b != bucket)
+          _precondition(allowingDuplicates, "Duplicate keys found")
+          // Discard duplicate entry.
+          uncheckedDestroy(at: bucket)
+          _storage._count -= 1
+          bucket.offset -= 1
+          continue
+        }
+        hashTable.insert(b)
+        target = b
+      } else {
+        let hashValue = self.hashValue(for: _keys[bucket.offset])
+        target = hashTable.insertNew(hashValue: hashValue)
+      }
+
+      if target > bucket {
+        // The target is outside the unprocessed region.  We can simply move the
+        // entry, leaving behind an uninitialized bucket.
+        moveEntry(from: bucket, to: target)
+        // Restore invariants by lowering the region boundary.
+        bucket.offset -= 1
+      } else if target == bucket {
+        // Already in place.
+        bucket.offset -= 1
+      } else {
+        // The target bucket is also in the unprocessed region. Swap the current
+        // item into place, then try again with the swapped-in value, so that we
+        // don't lose it.
+        swapEntry(target, with: bucket)
+      }
+    }
+    // When there are no more unprocessed entries, we're left with a valid
+    // Dictionary.
+  }
+}
diff --git a/stdlib/public/core/HashTable.swift b/stdlib/public/core/HashTable.swift
@@ -127,7 +127,6 @@ extension _HashTable {
     @inlinable
     @inline(__always)
     internal init(offset: Int) {
-      _internalInvariant(offset >= 0)
       self.offset = offset
     }
 

diff --git a/stdlib/public/core/NativeDictionary.swift b/stdlib/public/core/NativeDictionary.swift
@@ -137,7 +137,7 @@ extension _NativeDictionary { // Low-level unchecked operations
   @inline(__always)
   internal func uncheckedDestroy(at bucket: Bucket) {
     defer { _fixLifetime(self) }
-    _internalInvariant(hashTable.isOccupied(bucket))
+    _internalInvariant(hashTable.isValid(bucket))
     (_keys + bucket.offset).deinitialize(count: 1)
     (_values + bucket.offset).deinitialize(count: 1)
   }
@@ -619,11 +619,22 @@ extension _NativeDictionary: _HashTableDelegate {
   @inlinable
   @inline(__always)
   internal func moveEntry(from source: Bucket, to target: Bucket) {
+    _internalInvariant(hashTable.isValid(source))
+    _internalInvariant(hashTable.isValid(target))
     (_keys + target.offset)
       .moveInitialize(from: _keys + source.offset, count: 1)
     (_values + target.offset)
       .moveInitialize(from: _values + source.offset, count: 1)
   }
+
+  @inlinable
+  @inline(__always)
+  internal func swapEntry(_ left: Bucket, with right: Bucket) {
+    _internalInvariant(hashTable.isValid(left))
+    _internalInvariant(hashTable.isValid(right))
+    swap(&_keys[left.offset], &_keys[right.offset])
+    swap(&_values[left.offset], &_values[right.offset])
+  }
 }
 
 extension _NativeDictionary { // Deletion

diff --git a/validation-test/stdlib/Dictionary.swift b/validation-test/stdlib/Dictionary.swift
@@ -5547,6 +5547,62 @@ DictionaryTestSuite.test("IndexValidation.RemoveAt.AfterGrow") {
   d.remove(at: i)
 }
 
+DictionaryTestSuite.test("BulkLoadingInitializer.Unique") {
+  for c in [0, 1, 2, 3, 5, 10, 25, 150] {
+    let d1 = Dictionary<TestKeyTy, TestEquatableValueTy>(
+      _unsafeUninitializedCapacity: c,
+      allowingDuplicates: false
+    ) { keys, values, count in
+      let k = keys.baseAddress!
+      let v = values.baseAddress!
+      for i in 0 ..< c {
+        (k + i).initialize(to: TestKeyTy(i))
+        (v + i).initialize(to: TestEquatableValueTy(i))
+        count += 1
+      }
+    }
+
+    let d2 = Dictionary(
+      uniqueKeysWithValues: (0..<c).map {
+        (TestKeyTy($0), TestEquatableValueTy($0))
+      })
+
+    for i in 0 ..< c {
+      expectEqual(TestEquatableValueTy(i), d1[TestKeyTy(i)])
+    }
+    expectEqual(d2, d1)
+  }
+}
+
+DictionaryTestSuite.test("BulkLoadingInitializer.Nonunique") {
+  for c in [0, 1, 2, 3, 5, 10, 25, 150] {
+    let d1 = Dictionary<TestKeyTy, TestEquatableValueTy>(
+      _unsafeUninitializedCapacity: c,
+      allowingDuplicates: true
+    ) { keys, values, count in
+      let k = keys.baseAddress!
+      let v = values.baseAddress!
+      for i in 0 ..< c {
+        (k + i).initialize(to: TestKeyTy(i / 2))
+        (v + i).initialize(to: TestEquatableValueTy(i / 2))
+        count += 1
+      }
+    }
+
+    let d2 = Dictionary(
+      (0 ..< c).map {
+        (TestKeyTy($0 / 2), TestEquatableValueTy($0 / 2))
+      },
+      uniquingKeysWith: { a, b in a })
+
+    expectEqual(d1.count, d2.count)
+    for i in 0 ..< c / 2 {
+      expectEqual(TestEquatableValueTy(i), d1[TestKeyTy(i)])
+    }
+    expectEqual(d2, d1)
+  }
+}
+
 DictionaryTestSuite.setUp {
 #if _runtime(_ObjC)
   // Exercise ARC's autoreleased return value optimization in Foundation.