Skip to content

Commit ce1bbfe

Browse files
committed
Regex.unapplySeq should not take Any (Fixes SI-6406)
This deprecates unapplySeq(Any) and adds overloaded unapplySeq(CharSequence) and unapplySeq(Match), with the putative advantage that you can't try to extract the unextractable. Regex is massaged so that the underlying Pattern is primary, rather than the String-valued expression. Regex and its unanchored companion (I almost wrote unmoored) share a Pattern object, so that unapplySeq(Match) can easily test whether the Match was generated by this Regex; in that case, the match result is used immediately, instead of reapplying the regex to the matched string. The documentation is massaged to reflect unanchored and also to align with the underlying terminology, e.g., "subgroup" really just means "group."
1 parent 4f026f0 commit ce1bbfe

File tree

6 files changed

+110
-34
lines changed

6 files changed

+110
-34
lines changed

src/library/scala/util/matching/Regex.scala

Lines changed: 64 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ import java.util.regex.{ Pattern, Matcher }
131131
* @author Martin Odersky
132132
* @version 1.1, 29/01/2008
133133
*
134-
* @param regex A string representing a regular expression
134+
* @param pattern The compiled pattern
135135
* @param groupNames A mapping from names to indices in capture groups
136136
*
137137
* @define replacementString
@@ -144,41 +144,67 @@ import java.util.regex.{ Pattern, Matcher }
144144
* to automatically escape these characters.
145145
*/
146146
@SerialVersionUID(-2094783597747625537L)
147-
class Regex(regex: String, groupNames: String*) extends Serializable {
147+
class Regex private[matching](val pattern: Pattern, groupNames: String*) extends Serializable {
148148
outer =>
149149

150150
import Regex._
151151

152-
/** The compiled pattern */
153-
val pattern = Pattern.compile(regex)
152+
/**
153+
* @param regex A string representing a regular expression
154+
* @param groupNames A mapping from names to indices in capture groups
155+
*/
156+
def this(regex: String, groupNames: String*) = this(Pattern.compile(regex), groupNames: _*)
154157

155-
/** Tries to match target (whole match) and returns the matching subgroups.
156-
* if the pattern has no subgroups, then it returns an empty list on a
157-
* successful match.
158-
*
159-
* Note, however, that if some subgroup has not been matched, a `null` will
160-
* be returned for that subgroup.
158+
/** Tries to match a [[java.lang.CharSequence]].
159+
* If the match succeeds, the result is a list of the matching
160+
* groups (or a `null` element if a group did not match any input).
161+
* If the pattern specifies no groups, then the result will be an empty list
162+
* on a successful match.
161163
*
164+
* This method attempts to match the entire input by default; to find the next
165+
* matching subsequence, use an unanchored Regex.
166+
162167
* For example:
163168
*
164169
* {{{
165170
* val p1 = "ab*c".r
166-
* val p2 = "a(b*)c".r
167-
*
168171
* val p1Matches = "abbbc" match {
169172
* case p1() => true
170173
* case _ => false
171174
* }
172-
*
175+
* val p2 = "a(b*)c".r
173176
* val numberOfB = "abbbc" match {
174177
* case p2(b) => Some(b.length)
175178
* case _ => None
176179
* }
180+
* val p3 = "b*".r.unanchored
181+
* val p3Matches = "abbbc" match {
182+
* case p3() => true
183+
* case _ => false
184+
* }
177185
* }}}
178186
*
179-
* @param target The string to match
187+
* @param s The string to match
180188
* @return The matches
181189
*/
190+
def unapplySeq(s: CharSequence): Option[Seq[String]] = {
191+
val m = pattern matcher s
192+
if (runMatcher(m)) Some(1 to m.groupCount map m.group)
193+
else None
194+
}
195+
196+
/** Tries to match on a [[scala.util.matching.Regex.Match]].
197+
* A previously failed match results in None.
198+
* If a successful match was made against the current pattern, then that result is used.
199+
* Otherwise, this Regex is applied to the previously matched input,
200+
* and the result of that match is used.
201+
*/
202+
def unapplySeq(m: Match): Option[Seq[String]] =
203+
if (m.matched == null) None
204+
else if (m.matcher.pattern == this.pattern) Some(1 to m.groupCount map m.group)
205+
else unapplySeq(m.matched)
206+
207+
@deprecated("Extracting a match result from anything but a CharSequence or Match is deprecated", "2.10.0")
182208
def unapplySeq(target: Any): Option[List[String]] = target match {
183209
case s: CharSequence =>
184210
val m = pattern matcher s
@@ -187,6 +213,8 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
187213
case m: Match => unapplySeq(m.matched)
188214
case _ => None
189215
}
216+
217+
// @see UnanchoredRegex
190218
protected def runMatcher(m: Matcher) = m.matches()
191219

192220
/** Return all matches of this regexp in given character sequence as a [[scala.util.matching.Regex.MatchIterator]],
@@ -200,7 +228,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
200228
* @return A [[scala.util.matching.Regex.MatchIterator]] of all matches.
201229
* @example {{{for (words <- """\w+""".r findAllIn "A simple example.") yield words}}}
202230
*/
203-
def findAllIn(source: java.lang.CharSequence) = new Regex.MatchIterator(source, this, groupNames)
231+
def findAllIn(source: CharSequence) = new Regex.MatchIterator(source, this, groupNames)
204232

205233

206234
/** Return all matches of this regexp in given character sequence as a
@@ -210,7 +238,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
210238
* @return A [[scala.collection.Iterator]] of [[scala.util.matching.Regex.Match]] for all matches.
211239
* @example {{{for (words <- """\w+""".r findAllMatchIn "A simple example.") yield words.start}}}
212240
*/
213-
def findAllMatchIn(source: java.lang.CharSequence): Iterator[Match] = {
241+
def findAllMatchIn(source: CharSequence): Iterator[Match] = {
214242
val matchIterator = findAllIn(source)
215243
new Iterator[Match] {
216244
def hasNext = matchIterator.hasNext
@@ -228,7 +256,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
228256
* @return An [[scala.Option]] of the first matching string in the text.
229257
* @example {{{"""\w+""".r findFirstIn "A simple example." foreach println // prints "A"}}}
230258
*/
231-
def findFirstIn(source: java.lang.CharSequence): Option[String] = {
259+
def findFirstIn(source: CharSequence): Option[String] = {
232260
val m = pattern.matcher(source)
233261
if (m.find) Some(m.group) else None
234262
}
@@ -245,7 +273,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
245273
* @return A [[scala.Option]] of [[scala.util.matching.Regex.Match]] of the first matching string in the text.
246274
* @example {{{("""[a-z]""".r findFirstMatchIn "A simple example.") map (_.start) // returns Some(2), the index of the first match in the text}}}
247275
*/
248-
def findFirstMatchIn(source: java.lang.CharSequence): Option[Match] = {
276+
def findFirstMatchIn(source: CharSequence): Option[Match] = {
249277
val m = pattern.matcher(source)
250278
if (m.find) Some(new Match(source, m, groupNames)) else None
251279
}
@@ -262,7 +290,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
262290
* @return A [[scala.Option]] of the matched prefix.
263291
* @example {{{"""[a-z]""".r findPrefixOf "A simple example." // returns None, since the text does not begin with a lowercase letter}}}
264292
*/
265-
def findPrefixOf(source: java.lang.CharSequence): Option[String] = {
293+
def findPrefixOf(source: CharSequence): Option[String] = {
266294
val m = pattern.matcher(source)
267295
if (m.lookingAt) Some(m.group) else None
268296
}
@@ -279,7 +307,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
279307
* @return A [[scala.Option]] of the [[scala.util.matching.Regex.Match]] of the matched string.
280308
* @example {{{"""\w+""".r findPrefixMatchOf "A simple example." map (_.after) // returns Some(" simple example.")}}}
281309
*/
282-
def findPrefixMatchOf(source: java.lang.CharSequence): Option[Match] = {
310+
def findPrefixMatchOf(source: CharSequence): Option[Match] = {
283311
val m = pattern.matcher(source)
284312
if (m.lookingAt) Some(new Match(source, m, groupNames)) else None
285313
}
@@ -293,7 +321,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
293321
* @return The resulting string
294322
* @example {{{"""\d+""".r replaceAllIn ("July 15", "<NUMBER>") // returns "July <NUMBER>"}}}
295323
*/
296-
def replaceAllIn(target: java.lang.CharSequence, replacement: String): String = {
324+
def replaceAllIn(target: CharSequence, replacement: String): String = {
297325
val m = pattern.matcher(target)
298326
m.replaceAll(replacement)
299327
}
@@ -316,7 +344,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
316344
* @param replacer The function which maps a match to another string.
317345
* @return The target string after replacements.
318346
*/
319-
def replaceAllIn(target: java.lang.CharSequence, replacer: Match => String): String = {
347+
def replaceAllIn(target: CharSequence, replacer: Match => String): String = {
320348
val it = new Regex.MatchIterator(target, this, groupNames).replacementData
321349
it foreach (md => it replace replacer(md))
322350
it.replaced
@@ -343,7 +371,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
343371
* @param replacer The function which optionally maps a match to another string.
344372
* @return The target string after replacements.
345373
*/
346-
def replaceSomeIn(target: java.lang.CharSequence, replacer: Match => Option[String]): String = {
374+
def replaceSomeIn(target: CharSequence, replacer: Match => Option[String]): String = {
347375
val it = new Regex.MatchIterator(target, this, groupNames).replacementData
348376
for (matchdata <- it ; replacement <- replacer(matchdata))
349377
it replace replacement
@@ -359,7 +387,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
359387
* @param replacement The string that will replace the match
360388
* @return The resulting string
361389
*/
362-
def replaceFirstIn(target: java.lang.CharSequence, replacement: String): String = {
390+
def replaceFirstIn(target: CharSequence, replacement: String): String = {
363391
val m = pattern.matcher(target)
364392
m.replaceFirst(replacement)
365393
}
@@ -370,7 +398,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
370398
* @return The array of strings computed by splitting the
371399
* input around matches of this regexp
372400
*/
373-
def split(toSplit: java.lang.CharSequence): Array[String] =
401+
def split(toSplit: CharSequence): Array[String] =
374402
pattern.split(toSplit)
375403

376404
/** Create a new Regex with the same pattern, but no requirement that
@@ -390,9 +418,11 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
390418
*
391419
* @return The new unanchored regex
392420
*/
393-
def unanchored: UnanchoredRegex = new Regex(regex, groupNames: _*) with UnanchoredRegex { override def anchored = outer }
421+
def unanchored: UnanchoredRegex = new Regex(pattern, groupNames: _*) with UnanchoredRegex { override def anchored = outer }
394422
def anchored: Regex = this
395423

424+
def regex: String = pattern.pattern
425+
396426
/** The string defining the regular expression */
397427
override def toString = regex
398428
}
@@ -421,7 +451,7 @@ object Regex {
421451
trait MatchData {
422452

423453
/** The source from where the match originated */
424-
val source: java.lang.CharSequence
454+
val source: CharSequence
425455

426456
/** The names of the groups, or some empty sequence if one defined */
427457
val groupNames: Seq[String]
@@ -459,25 +489,25 @@ object Regex {
459489

460490
/** The char sequence before first character of match,
461491
* or `null` if nothing was matched */
462-
def before: java.lang.CharSequence =
492+
def before: CharSequence =
463493
if (start >= 0) source.subSequence(0, start)
464494
else null
465495

466496
/** The char sequence before first character of match in group `i`,
467497
* or `null` if nothing was matched for that group */
468-
def before(i: Int): java.lang.CharSequence =
498+
def before(i: Int): CharSequence =
469499
if (start(i) >= 0) source.subSequence(0, start(i))
470500
else null
471501

472502
/** Returns char sequence after last character of match,
473503
* or `null` if nothing was matched */
474-
def after: java.lang.CharSequence =
504+
def after: CharSequence =
475505
if (end >= 0) source.subSequence(end, source.length)
476506
else null
477507

478508
/** The char sequence after last character of match in group `i`,
479509
* or `null` if nothing was matched for that group */
480-
def after(i: Int): java.lang.CharSequence =
510+
def after(i: Int): CharSequence =
481511
if (end(i) >= 0) source.subSequence(end(i), source.length)
482512
else null
483513

@@ -501,8 +531,8 @@ object Regex {
501531

502532
/** Provides information about a succesful match.
503533
*/
504-
class Match(val source: java.lang.CharSequence,
505-
matcher: Matcher,
534+
class Match(val source: CharSequence,
535+
private[matching] val matcher: Matcher,
506536
val groupNames: Seq[String]) extends MatchData {
507537

508538
/** The index of the first matched character */
@@ -563,7 +593,7 @@ object Regex {
563593

564594
/** A class to step through a sequence of regex matches
565595
*/
566-
class MatchIterator(val source: java.lang.CharSequence, val regex: Regex, val groupNames: Seq[String])
596+
class MatchIterator(val source: CharSequence, val regex: Regex, val groupNames: Seq[String])
567597
extends AbstractIterator[String] with Iterator[String] with MatchData { self =>
568598

569599
protected[Regex] val matcher = regex.pattern.matcher(source)

test/files/neg/t6406-regextract.check

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
t6406-regextract.scala:4: warning: method unapplySeq in class Regex is deprecated: Extracting a match result from anything but a CharSequence or Match is deprecated
2+
List(1) collect { case r(i) => i }
3+
^
4+
error: No warnings can be incurred under -Xfatal-warnings.
5+
one warning found
6+
one error found

test/files/neg/t6406-regextract.flags

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
-Xfatal-warnings

test/files/neg/t6406-regextract.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
object Test extends App {
3+
val r = "(\\d+)".r
4+
List(1) collect { case r(i) => i }
5+
}

test/files/run/t6406-regextract.check

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
List(1, 3)
2+
List(1, 3)
3+
List(1, 3)
4+
Some(2011) Some(2011)

test/files/run/t6406-regextract.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
2+
object Test extends App {
3+
import util.matching._
4+
import Regex._
5+
6+
val r = "(\\d+)".r
7+
val q = """(\d)""".r
8+
val ns = List("1,2","x","3,4")
9+
val u = r.unanchored
10+
11+
val is = ns collect { case u(x) => x } map { case r(x) => x }
12+
println(is)
13+
// Match from same pattern
14+
val js = (ns map { u findFirstMatchIn _ }).flatten map { case r(x) => x }
15+
println(js)
16+
// Match not from same pattern
17+
val ks = (ns map { q findFirstMatchIn _ }).flatten map { case r(x) => x }
18+
println(ks)
19+
20+
val t = "Last modified 2011-07-15"
21+
val p1 = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
22+
val y1: Option[String] = for {
23+
p1(year, month, day) <- p1 findFirstIn t
24+
} yield year
25+
val y2: Option[String] = for {
26+
p1(year, month, day) <- p1 findFirstMatchIn t
27+
} yield year
28+
println(s"$y1 $y2")
29+
30+
}

0 commit comments

Comments
 (0)