Skip to content

Commit e8365de

Browse files
committed
Merge pull request scala#4209 from kanielc/SI-8988
SI-8988 Escaping character in StringLike.split(c) is slow
2 parents 9d09247 + d1d3225 commit e8365de

File tree

2 files changed

+65
-3
lines changed

2 files changed

+65
-3
lines changed

src/library/scala/collection/immutable/StringLike.scala

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ package scala
1010
package collection
1111
package immutable
1212

13-
import mutable.Builder
13+
import mutable.{ ArrayBuilder, Builder }
1414
import scala.util.matching.Regex
1515
import scala.math.ScalaNumber
1616
import scala.reflect.ClassTag
@@ -203,8 +203,33 @@ self =>
203203

204204
private def escape(ch: Char): String = "\\Q" + ch + "\\E"
205205

206-
@throws(classOf[java.util.regex.PatternSyntaxException])
207-
def split(separator: Char): Array[String] = toString.split(escape(separator))
206+
def split(separator: Char): Array[String] = {
207+
val thisString = toString
208+
var pos = thisString.indexOf(separator)
209+
210+
if (pos != -1) {
211+
val res = new ArrayBuilder.ofRef[String]
212+
213+
var prev = 0
214+
do {
215+
res += thisString.substring(prev, pos)
216+
prev = pos + 1
217+
pos = thisString.indexOf(separator, prev)
218+
} while (pos != -1)
219+
220+
if (prev != thisString.size)
221+
res += thisString.substring(prev, thisString.size)
222+
223+
val initialResult = res.result()
224+
pos = initialResult.length
225+
while (pos > 0 && initialResult(pos - 1).isEmpty) pos = pos - 1
226+
if (pos != initialResult.length) {
227+
val trimmed = new Array[String](pos)
228+
Array.copy(initialResult, 0, trimmed, 0, pos)
229+
trimmed
230+
} else initialResult
231+
} else Array[String](thisString)
232+
}
208233

209234
@throws(classOf[java.util.regex.PatternSyntaxException])
210235
def split(separators: Array[Char]): Array[String] = {
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package scala.collection.immutable
2+
3+
import org.junit.Test
4+
import org.junit.runner.RunWith
5+
import org.junit.runners.JUnit4
6+
7+
import scala.tools.testing.AssertUtil
8+
import scala.util.Random
9+
10+
/* Test for SI-8988 */
11+
@RunWith(classOf[JUnit4])
12+
class StringLikeTest {
13+
@Test
14+
def testStringSplitWithChar: Unit = {
15+
val chars = (0 to 255).map(_.toChar)
16+
def randString = Random.nextString(30)
17+
18+
for (c <- chars) {
19+
val s = randString
20+
val jString = new java.lang.String(s)
21+
22+
// make sure we can match a literal character done by Java's split
23+
val jSplit = jString.split("\\Q" + c.toString + "\\E")
24+
val sSplit = s.split(c)
25+
AssertUtil.assertSameElements(jSplit, sSplit, s"Not same result as Java split for char $c in string $s")
26+
}
27+
}
28+
29+
@Test
30+
def testSplitEdgeCases: Unit = {
31+
AssertUtil.assertSameElements("abcd".split('d'), Array("abc")) // not Array("abc", "")
32+
AssertUtil.assertSameElements("abccc".split('c'), Array("ab")) // not Array("ab", "", "", "")
33+
AssertUtil.assertSameElements("xxx".split('x'), Array[String]()) // not Array("", "", "", "")
34+
AssertUtil.assertSameElements("".split('x'), Array("")) // not Array()
35+
AssertUtil.assertSameElements("--ch--omp--".split("-"), Array("", "", "ch", "", "omp")) // All the cases!
36+
}
37+
}

0 commit comments

Comments
 (0)