@@ -122,6 +122,9 @@ internal fun String.utf8Size(startIndex: Int = 0, endIndex: Int = length): Long
122
122
/* *
123
123
* Encodes [codePoint] in UTF-8 and writes it to this sink.
124
124
*
125
+ * [codePoint] should represent valid Unicode code point, meaning that its value should be within the Unicode codespace
126
+ * (`U+000000` .. `U+10ffff`), otherwise [IllegalArgumentException] will be thrown.
127
+ *
125
128
* Note that in general, a value retrieved from [Char.code] could not be written directly
126
129
* as it may be a part of a [surrogate pair](https://www.unicode.org/faq/utf_bom.html#utf16-2) (that could be
127
130
* detected using [Char.isSurrogate], or [Char.isHighSurrogate] and [Char.isLowSurrogate]).
@@ -136,6 +139,7 @@ internal fun String.utf8Size(startIndex: Int = 0, endIndex: Int = length): Long
136
139
* @param codePoint the codePoint to be written.
137
140
*
138
141
* @throws IllegalStateException when the sink is closed.
142
+ * @throws IllegalArgumentException when [codePoint] value is negative, or greater than `U+10ffff`.
139
143
*
140
144
* @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.writeUtf8CodePointSample
141
145
* @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.writeSurrogatePair
@@ -510,6 +514,12 @@ private fun Buffer.commonWriteUtf8(string: String, beginIndex: Int, endIndex: In
510
514
511
515
private fun Buffer.commonWriteUtf8CodePoint (codePoint : Int ) {
512
516
when {
517
+ codePoint < 0 || codePoint > 0x10ffff -> {
518
+ throw IllegalArgumentException (
519
+ " Code point value is out of Unicode codespace 0..0x10ffff: 0x${codePoint.toHexString()} ($codePoint )"
520
+ )
521
+ }
522
+
513
523
codePoint < 0x80 -> {
514
524
// Emit a 7-bit code point with 1 byte.
515
525
writeByte(codePoint.toByte())
@@ -539,7 +549,7 @@ private fun Buffer.commonWriteUtf8CodePoint(codePoint: Int) {
539
549
size + = 3L
540
550
}
541
551
542
- codePoint <= 0x10ffff -> {
552
+ else -> { // [0x10000, 0x10ffff]
543
553
// Emit a 21-bit code point with 4 bytes.
544
554
val tail = writableSegment(4 )
545
555
tail.data[tail.limit] = (codePoint shr 18 or 0xf0 ).toByte() // 11110xxx
@@ -549,10 +559,6 @@ private fun Buffer.commonWriteUtf8CodePoint(codePoint: Int) {
549
559
tail.limit + = 4
550
560
size + = 4L
551
561
}
552
-
553
- else -> {
554
- throw IllegalArgumentException (" Unexpected code point: 0x${codePoint.toHexString()} " )
555
- }
556
562
}
557
563
}
558
564
0 commit comments