70
70
package kotlinx.io
71
71
72
72
import kotlinx.io.internal.*
73
+ import kotlinx.io.unsafe.UnsafeBufferOperations
74
+ import kotlinx.io.unsafe.withData
75
+ import kotlin.math.min
73
76
74
77
/* *
75
78
* Returns the number of bytes used to encode the slice of `string` as UTF-8 when using [Sink.writeString].
@@ -454,6 +457,7 @@ private fun Buffer.commonReadUtf8CodePoint(): Int {
454
457
}
455
458
}
456
459
460
+ @OptIn(UnsafeIoApi ::class )
457
461
private inline fun Buffer.commonWriteUtf8 (beginIndex : Int , endIndex : Int , charAt : (Int ) -> Char ) {
458
462
// Transcode a UTF-16 chars to UTF-8 bytes.
459
463
var i = beginIndex
@@ -462,45 +466,49 @@ private inline fun Buffer.commonWriteUtf8(beginIndex: Int, endIndex: Int, charAt
462
466
463
467
when {
464
468
c < 0x80 -> {
465
- val tail = writableSegment(1 )
466
- val data = tail.data
467
- val segmentOffset = tail.limit - i
468
- val runLimit = minOf(endIndex, Segment .SIZE - segmentOffset)
469
-
470
- // Emit a 7-bit character with 1 byte.
471
- data[segmentOffset + i++ ] = c.toByte() // 0xxxxxxx
472
-
473
- // Fast-path contiguous runs of ASCII characters. This is ugly, but yields a ~4x performance
474
- // improvement over independent calls to writeByte().
475
- while (i < runLimit) {
476
- c = charAt(i).code
477
- if (c >= 0x80 ) break
478
- data[segmentOffset + i++ ] = c.toByte() // 0xxxxxxx
469
+ UnsafeBufferOperations .writeToTail(this , 1 ) { ctx, segment ->
470
+ val segmentOffset = - i
471
+ val runLimit = minOf(endIndex, i + segment.remainingCapacity)
472
+
473
+ // Emit a 7-bit character with 1 byte.
474
+ ctx.setUnchecked(segment, segmentOffset + i++ , c.toByte()) // 0xxxxxxx
475
+
476
+ // Fast-path contiguous runs of ASCII characters. This is ugly, but yields a ~4x performance
477
+ // improvement over independent calls to writeByte().
478
+ while (i < runLimit) {
479
+ c = charAt(i).code
480
+ if (c >= 0x80 ) break
481
+ ctx.setUnchecked(segment, segmentOffset + i++ , c.toByte()) // 0xxxxxxx
482
+ }
483
+
484
+ i + segmentOffset // Equivalent to i - (previous i).
479
485
}
480
-
481
- val runSize = i + segmentOffset - tail.limit // Equivalent to i - (previous i).
482
- tail.limit + = runSize
483
- sizeMut + = runSize.toLong()
484
486
}
485
487
486
488
c < 0x800 -> {
487
489
// Emit a 11-bit character with 2 bytes.
488
- val tail = writableSegment(2 )
489
- tail.data[tail.limit] = (c shr 6 or 0xc0 ).toByte() // 110xxxxx
490
- tail.data[tail.limit + 1 ] = (c and 0x3f or 0x80 ).toByte() // 10xxxxxx
491
- tail.limit + = 2
492
- sizeMut + = 2L
490
+ UnsafeBufferOperations .writeToTail(this , 2 ) { ctx, segment ->
491
+ ctx.setUnchecked(
492
+ segment, 0 ,
493
+ (c shr 6 or 0xc0 ).toByte(), // 110xxxxx
494
+ (c and 0x3f or 0x80 ).toByte() // 10xxxxxx
495
+ )
496
+ 2
497
+ }
493
498
i++
494
499
}
495
500
496
501
c < 0xd800 || c > 0xdfff -> {
497
502
// Emit a 16-bit character with 3 bytes.
498
- val tail = writableSegment(3 )
499
- tail.data[tail.limit] = (c shr 12 or 0xe0 ).toByte() // 1110xxxx
500
- tail.data[tail.limit + 1 ] = (c shr 6 and 0x3f or 0x80 ).toByte() // 10xxxxxx
501
- tail.data[tail.limit + 2 ] = (c and 0x3f or 0x80 ).toByte() // 10xxxxxx
502
- tail.limit + = 3
503
- sizeMut + = 3L
503
+ UnsafeBufferOperations .writeToTail(this , 3 ) { ctx, segment ->
504
+ ctx.setUnchecked(
505
+ segment, 0 ,
506
+ (c shr 12 or 0xe0 ).toByte(), // 1110xxxx
507
+ (c shr 6 and 0x3f or 0x80 ).toByte(), // 10xxxxxx
508
+ (c and 0x3f or 0x80 ).toByte() // 10xxxxxx
509
+ )
510
+ 3
511
+ }
504
512
i++
505
513
}
506
514
@@ -519,20 +527,23 @@ private inline fun Buffer.commonWriteUtf8(beginIndex: Int, endIndex: Int, charAt
519
527
val codePoint = 0x010000 + (c and 0x03ff shl 10 or (low and 0x03ff ))
520
528
521
529
// Emit a 21-bit character with 4 bytes.
522
- val tail = writableSegment(4 )
523
- tail.data[tail.limit] = (codePoint shr 18 or 0xf0 ).toByte() // 11110xxx
524
- tail.data[tail.limit + 1 ] = (codePoint shr 12 and 0x3f or 0x80 ).toByte() // 10xxxxxx
525
- tail.data[tail.limit + 2 ] = (codePoint shr 6 and 0x3f or 0x80 ).toByte() // 10xxyyyy
526
- tail.data[tail.limit + 3 ] = (codePoint and 0x3f or 0x80 ).toByte() // 10yyyyyy
527
- tail.limit + = 4
528
- sizeMut + = 4L
530
+ UnsafeBufferOperations .writeToTail(this , 4 ) { ctx, segment ->
531
+ ctx.setUnchecked(segment, 0 ,
532
+ (codePoint shr 18 or 0xf0 ).toByte(), // 11110xxx
533
+ (codePoint shr 12 and 0x3f or 0x80 ).toByte(), // 10xxxxxx
534
+ (codePoint shr 6 and 0x3f or 0x80 ).toByte(), // 10xxyyyy
535
+ (codePoint and 0x3f or 0x80 ).toByte() // 10yyyyyy
536
+ )
537
+ 4
538
+ }
529
539
i + = 2
530
540
}
531
541
}
532
542
}
533
543
}
534
544
}
535
545
546
+ @OptIn(UnsafeIoApi ::class )
536
547
private fun Buffer.commonWriteUtf8CodePoint (codePoint : Int ) {
537
548
when {
538
549
codePoint < 0 || codePoint > 0x10ffff -> {
@@ -548,11 +559,11 @@ private fun Buffer.commonWriteUtf8CodePoint(codePoint: Int) {
548
559
549
560
codePoint < 0x800 -> {
550
561
// Emit a 11-bit code point with 2 bytes.
551
- val tail = writableSegment( 2 )
552
- tail.data[tail.limit] = (codePoint shr 6 or 0xc0 ).toByte() // 110xxxxx
553
- tail.data[tail.limit + 1 ] = ( codePoint and 0x3f or 0x80 ).toByte() // 10xxxxxx
554
- tail.limit + = 2
555
- sizeMut + = 2L
562
+ UnsafeBufferOperations .writeToTail( this , 2 ) { ctx, segment ->
563
+ ctx.setUnchecked(segment, 0 , (codePoint shr 6 or 0xc0 ).toByte() ) // 110xxxxx
564
+ ctx.setUnchecked(segment, 1 , ( codePoint and 0x3f or 0x80 ).toByte() ) // 10xxxxxx
565
+ 2
566
+ }
556
567
}
557
568
558
569
codePoint in 0xd800 .. 0xdfff -> {
@@ -562,48 +573,47 @@ private fun Buffer.commonWriteUtf8CodePoint(codePoint: Int) {
562
573
563
574
codePoint < 0x10000 -> {
564
575
// Emit a 16-bit code point with 3 bytes.
565
- val tail = writableSegment( 3 )
566
- tail.data[tail.limit] = (codePoint shr 12 or 0xe0 ).toByte() // 1110xxxx
567
- tail.data[tail.limit + 1 ] = ( codePoint shr 6 and 0x3f or 0x80 ).toByte() // 10xxxxxx
568
- tail.data[tail.limit + 2 ] = ( codePoint and 0x3f or 0x80 ).toByte() // 10xxxxxx
569
- tail.limit + = 3
570
- sizeMut + = 3L
576
+ UnsafeBufferOperations .writeToTail( this , 3 ) { ctx, segment ->
577
+ ctx.setUnchecked(segment, 0 , (codePoint shr 12 or 0xe0 ).toByte() ) // 1110xxxx
578
+ ctx.setUnchecked(segment, 1 , ( codePoint shr 6 and 0x3f or 0x80 ).toByte() ) // 10xxxxxx
579
+ ctx.setUnchecked(segment, 2 , ( codePoint and 0x3f or 0x80 ).toByte() ) // 10xxxxxx
580
+ 3
581
+ }
571
582
}
572
583
573
584
else -> { // [0x10000, 0x10ffff]
574
585
// Emit a 21-bit code point with 4 bytes.
575
- val tail = writableSegment( 4 )
576
- tail.data[tail.limit] = ( codePoint shr 18 or 0xf0 ).toByte() // 11110xxx
577
- tail.data[tail.limit + 1 ] = ( codePoint shr 12 and 0x3f or 0x80 ).toByte() // 10xxxxxx
578
- tail.data[tail.limit + 2 ] = ( codePoint shr 6 and 0x3f or 0x80 ).toByte() // 10xxyyyy
579
- tail.data[tail.limit + 3 ] = ( codePoint and 0x3f or 0x80 ).toByte() // 10yyyyyy
580
- tail.limit + = 4
581
- sizeMut + = 4L
586
+ UnsafeBufferOperations .writeToTail( this , 4 ) { ctx, segment ->
587
+ ctx.setUnchecked(segment, 0 , ( codePoint shr 18 or 0xf0 ).toByte() ) // 11110xxx
588
+ ctx.setUnchecked(segment, 1 , ( codePoint shr 12 and 0x3f or 0x80 ).toByte() ) // 10xxxxxx
589
+ ctx.setUnchecked(segment, 2 , ( codePoint shr 6 and 0x3f or 0x80 ).toByte() ) // 10xxyyyy
590
+ ctx.setUnchecked(segment, 3 , ( codePoint and 0x3f or 0x80 ).toByte() ) // 10yyyyyy
591
+ 4
592
+ }
582
593
}
583
594
}
584
595
}
585
596
597
+ @OptIn(UnsafeIoApi ::class )
586
598
private fun Buffer.commonReadUtf8 (byteCount : Long ): String {
587
599
require(byteCount >= 0 && byteCount <= Int .MAX_VALUE ) {
588
600
" byteCount ($byteCount ) is not within the range [0..${Int .MAX_VALUE } )"
589
601
}
590
602
require(byteCount)
591
603
if (byteCount == 0L ) return " "
592
604
593
- val s = head!!
594
- if (s.pos + byteCount > s.limit) {
595
- // If the string spans multiple segments, delegate to readBytes().
596
-
597
- return readByteArray(byteCount.toInt()).commonToUtf8String()
598
- }
599
-
600
- val result = s.data.commonToUtf8String(s.pos, s.pos + byteCount.toInt())
601
- s.pos + = byteCount.toInt()
602
- sizeMut - = byteCount
603
-
604
- if (s.pos == s.limit) {
605
- recycleHead()
605
+ UnsafeBufferOperations .iterate(this ) { ctx, head ->
606
+ head!!
607
+ if (head.size >= byteCount) {
608
+ var result = " "
609
+ ctx.withData(head) { data, pos, limit ->
610
+ result = data.commonToUtf8String(pos, min(limit, pos + byteCount.toInt()))
611
+ skip(byteCount)
612
+ return result
613
+ }
614
+ }
606
615
}
607
616
608
- return result
617
+ // If the string spans multiple segments, delegate to readBytes().
618
+ return readByteArray(byteCount.toInt()).commonToUtf8String()
609
619
}
0 commit comments