Skip to content

Speedup indexof #201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/api/kotlinx-io-core.api
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ public final class kotlinx/io/BuffersJvmKt {
}

public final class kotlinx/io/BuffersKt {
public static final fun indexOf (Lkotlinx/io/Buffer;BJJ)J
public static synthetic fun indexOf$default (Lkotlinx/io/Buffer;BJJILjava/lang/Object;)J
public static final fun snapshot (Lkotlinx/io/Buffer;)Lkotlinx/io/bytestring/ByteString;
}

Expand Down
2 changes: 1 addition & 1 deletion core/common/src/Buffer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ public class Buffer : Source, Sink {
* Invoke `lambda` with the segment and offset at `fromIndex`. Searches from the front or the back
* depending on what's closer to `fromIndex`.
*/
private inline fun <T> Buffer.seek(
internal inline fun <T> Buffer.seek(
fromIndex: Long,
lambda: (Segment?, Long) -> T
): T {
Expand Down
51 changes: 50 additions & 1 deletion core/common/src/Buffers.kt
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,53 @@ public fun Buffer.snapshot(): ByteString {
curr = curr.next
} while (curr !== head)
}
}
}

/**
* Returns an index of [byte] first occurrence in the range of [startIndex] to [endIndex],
* or `-1` when the range doesn't contain [byte].
*
* The scan terminates at either [endIndex] or buffers' exhaustion, whichever comes first.
*
* @param byte the value to find.
* @param startIndex the start of the range (inclusive) to find [byte], `0` by default.
* @param endIndex the end of the range (exclusive) to find [byte], [Buffer.size] by default.
*
* @throws IllegalStateException when the source is closed.
* @throws IllegalArgumentException when `startIndex > endIndex` or either of indices is negative.
*
* @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.indexOfByteSample
*/
public fun Buffer.indexOf(byte: Byte, startIndex: Long = 0, endIndex: Long = size): Long {
// For compatibility with Source.indexOf accept endIndices greater than size and truncate them.
val endOffset = minOf(endIndex, size)
checkBounds(size, startIndex, endOffset)
if (startIndex == endOffset) return -1L

seek(startIndex) { seg, o ->
if (o == -1L) {
return -1L
}
var segment = seg!!
var offset = o
do {
check(endOffset > offset)
val idx = segment.indexOf(
byte,
// If start index within this segment, the diff will be positive and
// we'll scan the segment starting from the corresponding offset.
// Otherwise, the diff will be negative and we'll scan the segment from the beginning.
maxOf((startIndex - offset).toInt(), 0),
// If endOffset is within this segment - scan until it, otherwise - scan whole segment.
minOf(segment.size, (endOffset - offset).toInt())
)
if (idx != -1) {
// offset corresponds to the segment's start, idx - to offset within the segment.
return offset + idx.toLong()
}
offset += segment.size
segment = segment.next!!
} while (segment !== head && offset < endOffset)
return -1L
}
}
17 changes: 17 additions & 0 deletions core/common/src/Segment.kt
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ internal class Segment {
pos += byteCount
}

val size: Int
get() = limit - pos

companion object {
/** The size of all segments in bytes. */
const val SIZE = 8192
Expand All @@ -193,3 +196,17 @@ internal class Segment {
const val SHARE_MINIMUM = 1024
}
}

internal fun Segment.indexOf(byte: Byte, startOffset: Int, endOffset: Int): Int {
require(startOffset in 0 until size) {
"$startOffset"
}
require(endOffset in startOffset..size) { "$endOffset" }
val p = pos
for (idx in startOffset until endOffset) {
if (data[p + idx] == byte) {
return idx
}
}
return -1
}
16 changes: 7 additions & 9 deletions core/common/src/Sources.kt
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ public fun Source.readHexadecimalUnsignedLong(): Long {
*
* @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.indexOfByteSample
*/
@OptIn(InternalIoApi::class)
public fun Source.indexOf(byte: Byte, startIndex: Long = 0L, endIndex: Long = Long.MAX_VALUE): Long {
require(startIndex in 0..endIndex) {
if (endIndex < 0) {
Expand All @@ -192,15 +193,12 @@ public fun Source.indexOf(byte: Byte, startIndex: Long = 0L, endIndex: Long = Lo
if (startIndex == endIndex) return -1L

var offset = startIndex
val peekSource = peek()

if (!peekSource.request(offset)) {
return -1L
}
peekSource.skip(offset)
while (offset < endIndex && peekSource.request(1)) {
if (peekSource.readByte() == byte) return offset
offset++
while (offset < endIndex && request(offset + 1)) {
val idx = buffer.indexOf(byte, offset, minOf(endIndex, buffer.size))
if (idx != -1L) {
return idx
}
offset = buffer.size
}
return -1L
}
Expand Down