Skip to content

Some performance improvements in the stdlib, mainly for small string literals #16890

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 2 additions & 18 deletions lib/SILOptimizer/SILCombiner/SILCombinerMiscVisitors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -686,20 +686,6 @@ SILInstruction *SILCombiner::visitCondFailInst(CondFailInst *CFI) {
return nullptr;
}

static bool isValueToBridgeObjectremovable(ValueToBridgeObjectInst *VTBOI) {
SILValue operand = VTBOI->getOperand();
// If operand is a struct $UInt (% : $Builtin.), fetch the real source
if (auto *SI = dyn_cast<StructInst>(operand)) {
assert(SI->SILInstruction::getAllOperands().size() == 1 &&
"Expected a single operand");
operand = SI->getOperand(0);
}
if (auto *BI = dyn_cast<BuiltinInst>(operand)) {
return (BI->getBuiltinInfo().ID == BuiltinValueKind::StringObjectOr);
}
return false;
}

SILInstruction *SILCombiner::visitStrongRetainInst(StrongRetainInst *SRI) {
// Retain of ThinToThickFunction is a no-op.
SILValue funcOper = SRI->getOperand();
Expand All @@ -718,8 +704,7 @@ SILInstruction *SILCombiner::visitStrongRetainInst(StrongRetainInst *SRI) {
// builtin "stringObjectOr_Int64" (or to tag the string)
// value_to_bridge_object (cast the UInt to bridge object)
if (auto *VTBOI = dyn_cast<ValueToBridgeObjectInst>(SRI->getOperand())) {
if (isValueToBridgeObjectremovable(VTBOI))
return eraseInstFromFunction(*SRI);
return eraseInstFromFunction(*SRI);
}

// Sometimes in the stdlib due to hand offs, we will see code like:
Expand Down Expand Up @@ -1169,8 +1154,7 @@ SILInstruction *SILCombiner::visitStrongReleaseInst(StrongReleaseInst *SRI) {
// builtin "stringObjectOr_Int64" (or to tag the string)
// value_to_bridge_object (cast the UInt to bridge object)
if (auto *VTBOI = dyn_cast<ValueToBridgeObjectInst>(SRI->getOperand())) {
if (isValueToBridgeObjectremovable(VTBOI))
return eraseInstFromFunction(*SRI);
return eraseInstFromFunction(*SRI);
}

// Release of a classbound existential converted from a class is just a
Expand Down
1 change: 0 additions & 1 deletion stdlib/public/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ set(SWIFTLIB_ESSENTIAL
Runtime.swift.gyb
RuntimeFunctionCounters.swift
SipHash.swift
SentinelCollection.swift
Sequence.swift
SequenceAlgorithms.swift
SequenceWrapper.swift
Expand Down
2 changes: 2 additions & 0 deletions stdlib/public/core/ExistentialCollection.swift.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ internal final class _${Kind}Box<S : ${Kind}> : _Any${Kind}Box<S.Iterator.Elemen
{
internal typealias Element = S.Element

@inline(__always)
@inlinable
internal override func _makeIterator() -> AnyIterator<Element> {
return AnyIterator(_base.makeIterator())
Expand Down Expand Up @@ -710,6 +711,7 @@ extension Any${Kind} {
% else:
/// Returns an iterator over the elements of this collection.
% end
@inline(__always)
@inlinable
public func makeIterator() -> Iterator {
return _box._makeIterator()
Expand Down
1 change: 0 additions & 1 deletion stdlib/public/core/GroupInfo.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
"Repeat.swift",
"Sort.swift",
"Range.swift",
"SentinelCollection.swift",
"ClosedRange.swift",
"CollectionOfOne.swift",
"HeapBuffer.swift",
Expand Down
143 changes: 0 additions & 143 deletions stdlib/public/core/SentinelCollection.swift

This file was deleted.

37 changes: 27 additions & 10 deletions stdlib/public/core/SmallString.swift
Original file line number Diff line number Diff line change
Expand Up @@ -127,25 +127,30 @@ extension _SmallUTF8String {
#endif
}

@inline(__always)
@inlinable
@effects(readonly)
public // @testable
init?(_ codeUnits: UnsafeBufferPointer<UInt8>) {
#if arch(i386) || arch(arm)
return nil // Never form small strings on 32-bit
#else
let count = codeUnits.count
guard count <= _SmallUTF8String.capacity else { return nil }
self.init()
self._withAllUnsafeMutableBytes { rawBufPtr in
let rawDst = rawBufPtr.baseAddress._unsafelyUnwrappedUnchecked
memcpy_(
dst: rawDst.assumingMemoryBound(to: UInt8.self),
src: codeUnits.baseAddress._unsafelyUnwrappedUnchecked,
count: count
)

let addr = codeUnits.baseAddress._unsafelyUnwrappedUnchecked
var high: UInt
let lowCount: Int
if count > 8 {
lowCount = 8
high = _bytesToUInt(addr + 8, count &- 8)
} else {
lowCount = count
high = 0
}
_sanityCheck(self.count == 0, "overwrote count early?")
self.count = count
high |= (UInt(count) &<< (8*15))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you instead put this in the setter for count, so that all other code paths can benefit?

Also, rather than have a naked 15, you can use capacity.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, the count setter would have to mask out the old value, which is not needed here.
Also, I followed the count getter+setter pattern which also hard-code the '15'.

let low = _bytesToUInt(addr, lowCount)
_storage = (low, high)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the code would be more clear if this was in the if-else blocks. We'd also not need to introduce so many in-scope variables.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to reduce the number of calls to _bytesToUInt. Although llvm can fold away the whole loop, this is not yet done on SIL level, so duplicating _bytes_ToUInt in the then- and else-branch generates an additional loop in SIL.


// FIXME: support transcoding
if !self.isASCII { return nil }
Expand All @@ -169,6 +174,18 @@ extension _SmallUTF8String {
}
}

@inline(__always)
@inlinable
func _bytesToUInt(_ input: UnsafePointer<UInt8>, _ c: Int) -> UInt {
var r: UInt = 0
var shift: Int = 0
for idx in 0..<c {
r = r | (UInt(input[idx]) &<< shift)
shift = shift &+ 8
}
return r
}
Copy link
Member

@milseman milseman May 29, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this whole function just a (potentially-unaligned) load and bitfield-insert mask? Can you verify what code is generated here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The whole code is folded away (that's the point of the change). I added a test for that: SILOptimizer/string_literals.swift

Copy link
Member

@milseman milseman May 29, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(following up), this code is also called for many other dynamic String initializations, so we'd want efficient code even when it can't be constant folded.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Erik and I discussed changing the UBP representation. I couldn't think of any good reason to keep the (start, end] representation for the buffer, as long as the iterator keeps its representation.


//
// Small string read interface
//
Expand Down
8 changes: 3 additions & 5 deletions stdlib/public/core/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,8 @@ extension String {
decodingCString nullTerminatedCodeUnits: UnsafePointer<Encoding.CodeUnit>,
as sourceEncoding: Encoding.Type) {

let codeUnits = _SentinelCollection(
UnsafeBufferPointer(_unboundedStartingAt: nullTerminatedCodeUnits),
until: _IsZero()
)
self.init(decoding: codeUnits, as: sourceEncoding)
self = String.decodeCString(
nullTerminatedCodeUnits, as: sourceEncoding)!.result
}

/// Calls the given closure with a pointer to the contents of the string,
Expand Down Expand Up @@ -737,6 +734,7 @@ extension String : _ExpressibleByBuiltinUTF16StringLiteral {
}

extension String : _ExpressibleByBuiltinStringLiteral {
@inline(__always)
@inlinable
@effects(readonly)
@_semantics("string.makeUTF8")
Expand Down
Loading