@@ -127,8 +127,8 @@ internal fun String.utf8Size(startIndex: Int = 0, endIndex: Int = length): Long
127
127
* detected using [Char.isSurrogate], or [Char.isHighSurrogate] and [Char.isLowSurrogate]).
128
128
* Such a pair of characters needs to be manually converted back to a single code point
129
129
* which then could be written to a [Sink].
130
- * Without such a conversion, data written to a [Sink] will no
131
- * longer be converted back to a string from which a surrogate pair was retrieved.
130
+ * Without such a conversion, data written to a [Sink] can not be converted back
131
+ * to a string from which a surrogate pair was retrieved.
132
132
*
133
133
* @param codePoint the codePoint to be written.
134
134
*
@@ -210,11 +210,13 @@ public fun Source.readString(byteCount: Long): String {
210
210
* If this source is exhausted before a complete code point can be read, this throws an
211
211
* [EOFException] and consumes no input.
212
212
*
213
- * If this source doesn't start with a properly encoded UTF-8 code point, this method will remove
214
- * 1 or more non-UTF-8 bytes and return the replacement character (`U+fffd`). This covers encoding
215
- * problems (the input is not properly encoded UTF-8), characters out of range (beyond the
216
- * `0x10ffff` limit of Unicode), code points for UTF-16 surrogates (`U+d800`..`U+dfff`) and overlong
217
- * encodings (such as `0xc080` for the NUL character in modified UTF-8).
213
+ * If this source starts with an ill-formed UTF-8 code units sequence, this method will remove
214
+ * 1 or more non-UTF-8 bytes and return the replacement character (`U+fffd`).
215
+ *
216
+ * The replacement character (`U+fffd`) will be also returned if the source starts with a well-formed
217
+ * code units sequences, but a decoded value does not pass further validation, such as
218
+ * the value is of range (beyond the `0x10ffff` limit of Unicode), maps to UTF-16 surrogates (`U+d800`..`U+dfff`),
219
+ * or an overlong encoding is detected (such as `0xc080` for the NUL character in modified UTF-8).
218
220
*
219
221
* Note that in general, returned value may not be directly converted to [Char] as it may be out
220
222
* of [Char]'s values range and should be manually converted to a
0 commit comments