Skip to content

Commit 89c3540

Browse files
committed
Fix columnation and forward port improvements
Avoid boxed ints when creating line index table. Reject more bad inputs, but keep the convention that the current line at EOF is the last line (whether or not the last line is empty).
1 parent 794e7c9 commit 89c3540

File tree

3 files changed

+140
-19
lines changed

3 files changed

+140
-19
lines changed

compiler/src/dotty/tools/dotc/util/SourceFile.scala

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ import core.Contexts._
1111
import scala.io.Codec
1212
import Chars._
1313
import scala.annotation.internal.sharable
14-
import scala.collection.mutable
15-
import scala.collection.mutable.ArrayBuffer
14+
import scala.collection.mutable.ArrayBuilder
1615
import scala.util.chaining.given
1716

1817
import java.io.File.separator
@@ -91,9 +90,9 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends
9190

9291
def apply(idx: Int): Char = content().apply(idx)
9392

94-
/** length of the original source file
95-
* Note that when the source is from Tasty, content() could be empty even though length > 0.
96-
* Use content().length to determine the length of content(). */
93+
/** length of the original source file.
94+
* Note that when the source is from Tasty, content() could be empty even though length > 0.
95+
* Use content().length to determine the length of content(). */
9796
def length: Int =
9897
if lineIndicesCache ne null then lineIndicesCache.last
9998
else content().length
@@ -119,23 +118,25 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends
119118
def positionInUltimateSource(position: SourcePosition): SourcePosition =
120119
SourcePosition(underlying, position.span shift start)
121120

122-
private def calculateLineIndicesFromContents() = {
121+
private def calculateLineIndicesFromContents(): Array[Int] =
123122
val cs = content()
124-
val buf = new ArrayBuffer[Int]
125-
buf += 0
123+
val buf = new ArrayBuilder.ofInt
124+
buf.sizeHint(cs.length / 30) // guesstimate line count
125+
buf.addOne(0)
126126
var i = 0
127127
while i < cs.length do
128128
val isLineBreak =
129129
val ch = cs(i)
130130
// don't identify the CR in CR LF as a line break, since LF will do.
131131
if ch == CR then i + 1 == cs.length || cs(i + 1) != LF
132132
else isLineBreakChar(ch)
133-
if isLineBreak then buf += i + 1
133+
if isLineBreak then buf.addOne(i + 1)
134134
i += 1
135-
buf += cs.length // sentinel, so that findLine below works smoother
136-
buf.toArray
137-
}
135+
buf.addOne(cs.length) // sentinel, so that findLine below works smoother
136+
buf.result()
138137

138+
// offsets of lines, plus a sentinel which is the content length.
139+
// if the last line is empty (end of content is NL) then the penultimate index == sentinel.
139140
private var lineIndicesCache: Array[Int] = _
140141
private def lineIndices: Array[Int] =
141142
if lineIndicesCache eq null then
@@ -156,7 +157,9 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends
156157
lineIndicesCache = indices
157158

158159
/** Map line to offset of first character in line */
159-
def lineToOffset(index: Int): Int = lineIndices(index)
160+
def lineToOffset(index: Int): Int =
161+
if index < 0 || index >= lineIndices.length - 1 then throw new IndexOutOfBoundsException(index.toString)
162+
lineIndices(index)
160163

161164
/** Like `lineToOffset`, but doesn't crash if the index is out of bounds. */
162165
def lineToOffsetOpt(index: Int): Option[Int] =
@@ -168,24 +171,28 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends
168171
/** A cache to speed up offsetToLine searches to similar lines */
169172
private var lastLine = 0
170173

171-
/** Convert offset to line in this source file
172-
* Lines are numbered from 0
174+
/** Convert offset to line in this source file.
175+
* Lines are numbered from 0. Conventionally, a final empty line begins at EOF.
176+
* For simplicity, the line at EOF is the last line (possibly non-empty).
173177
*/
174-
def offsetToLine(offset: Int): Int = {
178+
def offsetToLine(offset: Int): Int =
179+
//if lineIndices.isEmpty || offset < lineIndices.head || offset > lineIndices.last then
180+
// throw new IndexOutOfBoundsException(offset.toString)
175181
lastLine = Util.bestFit(lineIndices, lineIndices.length, offset, lastLine)
176182
if (offset >= length) lastLine -= 1 // compensate for the sentinel
177183
lastLine
178-
}
179184

180185
/** The index of the first character of the line containing position `offset` */
181186
def startOfLine(offset: Int): Int = {
182187
require(offset >= 0)
183188
lineToOffset(offsetToLine(offset))
184189
}
185190

186-
/** The start index of the line following the one containing position `offset` */
191+
/** The start index of the line following the one containing position `offset` or `length` at last line */
187192
def nextLine(offset: Int): Int =
188-
lineToOffset(offsetToLine(offset) + 1 min lineIndices.length - 1)
193+
val next = offsetToLine(offset) + 1
194+
if next >= lineIndices.length - 1 then lineIndices.last
195+
else lineToOffset(next)
189196

190197
/** The content of the line containing position `offset` */
191198
def lineContent(offset: Int): String =
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
2+
package dotty.tools
3+
package dotc.util
4+
5+
import Spans.*
6+
7+
import org.junit.Assert.{assertThrows as _, *}
8+
import org.junit.Test
9+
10+
class SourceFileTests:
11+
@Test def `i15209 source column handles tabs as line index`: Unit =
12+
val text = "\ta\n \tb\n \t c\n"
13+
val f = SourceFile.virtual("batch", text)
14+
assertEquals(1, f.column(text.indexOf('a')))
15+
assertEquals(2, f.column(text.indexOf('b')))
16+
assertEquals(3, f.column(text.indexOf('c')))
17+
18+
def lineContentOf(code: String, offset: Int) = SourceFile.virtual("batch", code).lineContent(offset)
19+
20+
@Test def si8205_lineToString: Unit =
21+
assertEquals("", lineContentOf("", 0))
22+
assertEquals("abc", lineContentOf("abc", 0))
23+
assertEquals("abc", lineContentOf("abc", 3))
24+
assertEquals("code no newline", lineContentOf("code no newline", 1))
25+
assertEquals("\n", lineContentOf("\n", 0))
26+
assertEquals("abc\n", lineContentOf("abc\ndef", 0))
27+
assertEquals("abc\n", lineContentOf("abc\ndef", 3))
28+
assertEquals("def", lineContentOf("abc\ndef", 4))
29+
assertEquals("def", lineContentOf("abc\ndef", 6))
30+
assertEquals("def\n", lineContentOf("abc\ndef\n", 7))
31+
32+
@Test def CRisEOL: Unit =
33+
assertEquals("\r", lineContentOf("\r", 0))
34+
assertEquals("abc\r", lineContentOf("abc\rdef", 0))
35+
assertEquals("abc\r", lineContentOf("abc\rdef", 3))
36+
assertEquals("def", lineContentOf("abc\rdef", 4))
37+
assertEquals("def", lineContentOf("abc\rdef", 6))
38+
assertEquals("def\r", lineContentOf("abc\rdef\r", 7))
39+
40+
@Test def CRNLisEOL(): Unit =
41+
assertEquals("\r\n", lineContentOf("\r\n", 0))
42+
assertEquals("abc\r\n", lineContentOf("abc\r\ndef", 0))
43+
assertEquals("abc\r\n", lineContentOf("abc\r\ndef", 3))
44+
assertEquals("abc\r\n", lineContentOf("abc\r\ndef", 4))
45+
assertEquals("def", lineContentOf("abc\r\ndef", 5))
46+
assertEquals("def", lineContentOf("abc\r\ndef", 7))
47+
assertEquals("def", lineContentOf("abc\r\ndef", 8))
48+
assertEquals("def\r\n", lineContentOf("abc\r\ndef\r\n", 9))
49+
50+
@Test def `t9885 lineToOffset throws on bad line`: Unit =
51+
val text = "a\nb\nc\n"
52+
val f = SourceFile.virtual("batch", text)
53+
// was: EOL is line terminator, not line separator, so there is not an empty 4th line
54+
val splitsville = text.split("\n")
55+
assertEquals(List(0, 2, 4, 6), (0 to splitsville.nn.length).toList.map(f.lineToOffset))
56+
assertThrows[IndexOutOfBoundsException] {
57+
f.lineToOffset(4) // was: 3 in Scala 2 (no empty line), 5 in Scala 3 (sentinel induces off-by-one)
58+
}
59+
60+
// Position and SourceFile used to count differently
61+
val p = SourcePosition(f, Span(text.length - 1))
62+
val q = SourcePosition(f, Span(f.lineToOffset(p.line - 1)))
63+
assertEquals(2, p.line)
64+
assertEquals(p.line - 1, q.line)
65+
assertEquals(p.column, q.column + 1)
66+
assertEquals(f.startOfLine(p.span.start), SourcePosition(f, Span(f.lineToOffset(p.line))).span.start)
67+
68+
@Test def `t9885 lineToOffset ignores lack of final EOL`: Unit =
69+
val text = "a\nb\nc"
70+
val f = SourceFile.virtual("batch", text)
71+
assertThrows[IndexOutOfBoundsException] {
72+
f.lineToOffset(3)
73+
}
74+
assertEquals(4, f.lineToOffset(2))
75+
assertEquals(2, f.offsetToLine(text.length))
76+
77+
@Test def `t11572 offsetToLine throws on bad offset`: Unit =
78+
val text = "a\nb\nc\n"
79+
val f = SourceFile.virtual("batch", text)
80+
/* current code requires offsets untethered from source
81+
assertThrows[IndexOutOfBoundsException] {
82+
f.offsetToLine(-1) // was: -1
83+
}
84+
assertThrows[IndexOutOfBoundsException] {
85+
f.offsetToLine(7) // was: 3
86+
}
87+
*/
88+
assertEquals(0, f.offsetToLine(0))
89+
assertEquals(0, f.offsetToLine(1))
90+
assertEquals(1, f.offsetToLine(2))
91+
assertEquals(2, f.offsetToLine(4))
92+
assertEquals(2, f.offsetToLine(5))
93+
assertEquals(3, f.offsetToLine(6))
94+
95+
@Test def `t11572b offsetToLine throws on bad offset`: Unit =
96+
val text = "a\nb\nc\nd"
97+
val f = SourceFile.virtual("batch", text)
98+
/*
99+
assertThrows[IndexOutOfBoundsException] {
100+
f.offsetToLine(-1)
101+
}
102+
assertThrows[IndexOutOfBoundsException] {
103+
f.offsetToLine(8)
104+
}
105+
*/
106+
assertEquals(0, f.offsetToLine(0))
107+
assertEquals(0, f.offsetToLine(1))
108+
assertEquals(1, f.offsetToLine(2))
109+
assertEquals(2, f.offsetToLine(4))
110+
assertEquals(2, f.offsetToLine(5))
111+
assertEquals(3, f.offsetToLine(6))
112+
assertEquals(3, f.offsetToLine(7))

compiler/test/dotty/tools/utils.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ def readFile(f: File): String = withFile(f)(_.mkString)
3939

4040
private object Unthrown extends ControlThrowable
4141

42+
def assertThrows[T <: Throwable: ClassTag](body: => Any): Unit = assertThrows[T](_ => true)(body)
43+
4244
def assertThrows[T <: Throwable: ClassTag](p: T => Boolean)(body: => Any): Unit =
4345
try
4446
body

0 commit comments

Comments
 (0)