Skip to content

Commit f338e2d

Browse files
authored
Speedup indexof (#201)
Improved Source.indexOf performance.
1 parent b4920ed commit f338e2d

File tree

5 files changed

+77
-11
lines changed

5 files changed

+77
-11
lines changed

core/api/kotlinx-io-core.api

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ public final class kotlinx/io/BuffersJvmKt {
4848
}
4949

5050
public final class kotlinx/io/BuffersKt {
51+
public static final fun indexOf (Lkotlinx/io/Buffer;BJJ)J
52+
public static synthetic fun indexOf$default (Lkotlinx/io/Buffer;BJJILjava/lang/Object;)J
5153
public static final fun snapshot (Lkotlinx/io/Buffer;)Lkotlinx/io/bytestring/ByteString;
5254
}
5355

core/common/src/Buffer.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ public class Buffer : Source, Sink {
648648
* Invoke `lambda` with the segment and offset at `fromIndex`. Searches from the front or the back
649649
* depending on what's closer to `fromIndex`.
650650
*/
651-
private inline fun <T> Buffer.seek(
651+
internal inline fun <T> Buffer.seek(
652652
fromIndex: Long,
653653
lambda: (Segment?, Long) -> T
654654
): T {

core/common/src/Buffers.kt

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,53 @@ public fun Buffer.snapshot(): ByteString {
2626
curr = curr.next
2727
} while (curr !== head)
2828
}
29-
}
29+
}
30+
31+
/**
32+
* Returns an index of [byte] first occurrence in the range of [startIndex] to [endIndex],
33+
* or `-1` when the range doesn't contain [byte].
34+
*
35+
* The scan terminates at either [endIndex] or buffers' exhaustion, whichever comes first.
36+
*
37+
* @param byte the value to find.
38+
* @param startIndex the start of the range (inclusive) to find [byte], `0` by default.
39+
* @param endIndex the end of the range (exclusive) to find [byte], [Buffer.size] by default.
40+
*
41+
* @throws IllegalStateException when the source is closed.
42+
* @throws IllegalArgumentException when `startIndex > endIndex` or either of indices is negative.
43+
*
44+
* @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.indexOfByteSample
45+
*/
46+
public fun Buffer.indexOf(byte: Byte, startIndex: Long = 0, endIndex: Long = size): Long {
47+
// For compatibility with Source.indexOf accept endIndices greater than size and truncate them.
48+
val endOffset = minOf(endIndex, size)
49+
checkBounds(size, startIndex, endOffset)
50+
if (startIndex == endOffset) return -1L
51+
52+
seek(startIndex) { seg, o ->
53+
if (o == -1L) {
54+
return -1L
55+
}
56+
var segment = seg!!
57+
var offset = o
58+
do {
59+
check(endOffset > offset)
60+
val idx = segment.indexOf(
61+
byte,
62+
// If start index within this segment, the diff will be positive and
63+
// we'll scan the segment starting from the corresponding offset.
64+
// Otherwise, the diff will be negative and we'll scan the segment from the beginning.
65+
maxOf((startIndex - offset).toInt(), 0),
66+
// If endOffset is within this segment - scan until it, otherwise - scan whole segment.
67+
minOf(segment.size, (endOffset - offset).toInt())
68+
)
69+
if (idx != -1) {
70+
// offset corresponds to the segment's start, idx - to offset within the segment.
71+
return offset + idx.toLong()
72+
}
73+
offset += segment.size
74+
segment = segment.next!!
75+
} while (segment !== head && offset < endOffset)
76+
return -1L
77+
}
78+
}

core/common/src/Segment.kt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@ internal class Segment {
185185
pos += byteCount
186186
}
187187

188+
val size: Int
189+
get() = limit - pos
190+
188191
companion object {
189192
/** The size of all segments in bytes. */
190193
const val SIZE = 8192
@@ -193,3 +196,17 @@ internal class Segment {
193196
const val SHARE_MINIMUM = 1024
194197
}
195198
}
199+
200+
internal fun Segment.indexOf(byte: Byte, startOffset: Int, endOffset: Int): Int {
201+
require(startOffset in 0 until size) {
202+
"$startOffset"
203+
}
204+
require(endOffset in startOffset..size) { "$endOffset" }
205+
val p = pos
206+
for (idx in startOffset until endOffset) {
207+
if (data[p + idx] == byte) {
208+
return idx
209+
}
210+
}
211+
return -1
212+
}

core/common/src/Sources.kt

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ public fun Source.readHexadecimalUnsignedLong(): Long {
181181
*
182182
* @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.indexOfByteSample
183183
*/
184+
@OptIn(InternalIoApi::class)
184185
public fun Source.indexOf(byte: Byte, startIndex: Long = 0L, endIndex: Long = Long.MAX_VALUE): Long {
185186
require(startIndex in 0..endIndex) {
186187
if (endIndex < 0) {
@@ -192,15 +193,12 @@ public fun Source.indexOf(byte: Byte, startIndex: Long = 0L, endIndex: Long = Lo
192193
if (startIndex == endIndex) return -1L
193194

194195
var offset = startIndex
195-
val peekSource = peek()
196-
197-
if (!peekSource.request(offset)) {
198-
return -1L
199-
}
200-
peekSource.skip(offset)
201-
while (offset < endIndex && peekSource.request(1)) {
202-
if (peekSource.readByte() == byte) return offset
203-
offset++
196+
while (offset < endIndex && request(offset + 1)) {
197+
val idx = buffer.indexOf(byte, offset, minOf(endIndex, buffer.size))
198+
if (idx != -1L) {
199+
return idx
200+
}
201+
offset = buffer.size
204202
}
205203
return -1L
206204
}

0 commit comments

Comments
 (0)