Skip to content

Commit 6e6632e

Browse files
committed
SI-9038 fix scaladoc syntax highlightning to leave unicode alone
Syntax highlightning in code blocks used to manipulate the raw bytes of a String, converting them to chars when needed, which breaks Unicode surrogate pairs. Using a char array instead of a byte array will leave them alone.
1 parent 2dc40cc commit 6e6632e

File tree

3 files changed

+40
-17
lines changed

3 files changed

+40
-17
lines changed

build.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,7 @@ TODO:
978978
<pathelement location="${test.junit.classes}"/>
979979
<path refid="quick.compiler.build.path"/>
980980
<path refid="quick.repl.build.path"/>
981+
<path refid="quick.scaladoc.build.path"/>
981982
<path refid="quick.partest-extras.build.path"/>
982983
<path refid="junit.classpath"/>
983984
</path>

src/scaladoc/scala/tools/nsc/doc/html/SyntaxHigh.scala

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@ private[html] object SyntaxHigh {
5252
"Triple", "TypeTag", "Unit")
5353

5454
def apply(data: String): NodeSeq = {
55-
val buf = data.getBytes
55+
val buf = data.toCharArray
5656
val out = new StringBuilder
5757

5858
def compare(offset: Int, key: String): Int = {
5959
var i = offset
6060
var j = 0
6161
val l = key.length
6262
while (i < buf.length && j < l) {
63-
val bch = buf(i).toChar
63+
val bch = buf(i)
6464
val kch = key charAt j
6565
if (bch < kch) return -1
6666
else if (bch > kch) return 1
@@ -94,13 +94,13 @@ private[html] object SyntaxHigh {
9494
def line(i: Int): Int =
9595
if (i == buf.length || buf(i) == '\n') i
9696
else {
97-
out append buf(i).toChar
97+
out append buf(i)
9898
line(i+1)
9999
}
100100
var level = 0
101101
def multiline(i: Int, star: Boolean): Int = {
102102
if (i == buf.length) return i
103-
val ch = buf(i).toChar
103+
val ch = buf(i)
104104
out append ch
105105
ch match {
106106
case '*' =>
@@ -127,7 +127,7 @@ private[html] object SyntaxHigh {
127127
if (i == buf.length) i
128128
else if (i > j+6) { out setLength 0; j }
129129
else {
130-
val ch = buf(i).toChar
130+
val ch = buf(i)
131131
out append ch
132132
ch match {
133133
case '\\' =>
@@ -148,7 +148,7 @@ private[html] object SyntaxHigh {
148148
val out = new StringBuilder("\"")
149149
def strlit0(i: Int, bslash: Boolean): Int = {
150150
if (i == buf.length) return i
151-
val ch = buf(i).toChar
151+
val ch = buf(i)
152152
out append ch
153153
ch match {
154154
case '\\' =>
@@ -167,7 +167,7 @@ private[html] object SyntaxHigh {
167167
val out = new StringBuilder
168168
def intg(i: Int): Int = {
169169
if (i == buf.length) return i
170-
val ch = buf(i).toChar
170+
val ch = buf(i)
171171
ch match {
172172
case '.' =>
173173
out append ch
@@ -181,7 +181,7 @@ private[html] object SyntaxHigh {
181181
}
182182
def frac(i: Int): Int = {
183183
if (i == buf.length) return i
184-
val ch = buf(i).toChar
184+
val ch = buf(i)
185185
ch match {
186186
case 'e' | 'E' =>
187187
out append ch
@@ -195,7 +195,7 @@ private[html] object SyntaxHigh {
195195
}
196196
def expo(i: Int, signed: Boolean): Int = {
197197
if (i == buf.length) return i
198-
val ch = buf(i).toChar
198+
val ch = buf(i)
199199
ch match {
200200
case '+' | '-' if !signed =>
201201
out append ch
@@ -222,7 +222,7 @@ private[html] object SyntaxHigh {
222222
case '&' =>
223223
parse("&amp;", i+1)
224224
case '<' if i+1 < buf.length =>
225-
val ch = buf(i+1).toChar
225+
val ch = buf(i+1)
226226
if (ch == '-' || ch == ':' || ch == '%')
227227
parse("<span class=\"kw\">&lt;"+ch+"</span>", i+2)
228228
else
@@ -236,19 +236,19 @@ private[html] object SyntaxHigh {
236236
if (i+1 < buf.length && buf(i+1) == '>')
237237
parse("<span class=\"kw\">=&gt;</span>", i+2)
238238
else
239-
parse(buf(i).toChar.toString, i+1)
239+
parse(buf(i).toString, i+1)
240240
case '/' =>
241241
if (i+1 < buf.length && (buf(i+1) == '/' || buf(i+1) == '*')) {
242242
val c = comment(i+1)
243243
parse("<span class=\"cmt\">"+c+"</span>", i+c.length)
244244
} else
245-
parse(buf(i).toChar.toString, i+1)
245+
parse(buf(i).toString, i+1)
246246
case '\'' =>
247247
val s = charlit(i+1)
248248
if (s.length > 0)
249249
parse("<span class=\"lit\">"+s+"</span>", i+s.length)
250250
else
251-
parse(buf(i).toChar.toString, i+1)
251+
parse(buf(i).toString, i+1)
252252
case '"' =>
253253
val s = strlit(i+1)
254254
parse("<span class=\"lit\">"+s+"</span>", i+s.length)
@@ -257,9 +257,9 @@ private[html] object SyntaxHigh {
257257
if (k >= 0)
258258
parse("<span class=\"ano\">@"+annotations(k)+"</span>", i+annotations(k).length+1)
259259
else
260-
parse(buf(i).toChar.toString, i+1)
260+
parse(buf(i).toString, i+1)
261261
case _ =>
262-
if (i == 0 || (i >= 1 && !Character.isJavaIdentifierPart(buf(i-1).toChar))) {
262+
if (i == 0 || (i >= 1 && !Character.isJavaIdentifierPart(buf(i-1)))) {
263263
if (Character.isDigit(buf(i).toInt) ||
264264
(buf(i) == '.' && i + 1 < buf.length && Character.isDigit(buf(i+1).toInt))) {
265265
val s = numlit(i)
@@ -273,11 +273,11 @@ private[html] object SyntaxHigh {
273273
if (k >= 0)
274274
parse("<span class=\"std\">"+standards(k)+"</span>", i+standards(k).length)
275275
else
276-
parse(buf(i).toChar.toString, i+1)
276+
parse(buf(i).toString, i+1)
277277
}
278278
}
279279
} else
280-
parse(buf(i).toChar.toString, i+1)
280+
parse(buf(i).toString, i+1)
281281
}
282282
}
283283

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package scala.tools.nsc.doc.html
2+
3+
import org.junit.Test
4+
import org.junit.Assert._
5+
import org.junit.runner.RunWith
6+
import org.junit.runners.JUnit4
7+
8+
import scala.tools.testing.AssertUtil._
9+
10+
@RunWith(classOf[JUnit4])
11+
class HtmlDocletTest {
12+
@Test
13+
def testSyntaxHighlightningUnicode() {
14+
val in = "unicode: …"
15+
16+
val out = SyntaxHigh(in).toString
17+
18+
// SI-9038, this failed with
19+
// "unicode: …" != "unicode: ¬タᆭ"
20+
assertEquals(in, out)
21+
}
22+
}

0 commit comments

Comments
 (0)