Skip to content

Commit 4c34732

Browse files
committed
Merge pull request #955 from Manishearth/regexes
Regexes
2 parents b4d7880 + 51d166f commit 4c34732

File tree

4 files changed

+125
-44
lines changed

4 files changed

+125
-44
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
# Change Log
22
All notable changes to this project will be documented in this file.
33

4+
## 0.0.70 — TBD
5+
* [`invalid_regex`] and [`trivial_regex`] can now warn on `RegexSet::new`,
6+
`RegexBuilder::new` and byte regexes
7+
48
## 0.0.69 — 2016-05-20
59
* Rustup to *rustc 1.10.0-nightly (476fe6eef 2016-05-21)*
6-
* `used_underscore_binding` has been made `Allow` temporarily
10+
* [`used_underscore_binding`] has been made `Allow` temporarily
711

812
## 0.0.68 — 2016-05-17
913
* Rustup to *rustc 1.10.0-nightly (cd6a40017 2016-05-16)*

src/regex.rs

Lines changed: 71 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use std::error::Error;
99
use syntax::ast::{LitKind, NodeId};
1010
use syntax::codemap::{Span, BytePos};
1111
use syntax::parse::token::InternedString;
12-
use utils::{is_expn_of, match_path, match_type, paths, span_lint, span_help_and_lint};
12+
use utils::{is_expn_of, match_def_path, match_type, paths, span_lint, span_help_and_lint};
1313

1414
/// **What it does:** This lint checks `Regex::new(_)` invocations for correct regex syntax.
1515
///
@@ -81,7 +81,7 @@ impl LateLintPass for RegexPass {
8181
span,
8282
"`regex!(_)` found. \
8383
Please use `Regex::new(_)`, which is faster for now.");
84-
self.spans.insert(span);
84+
self.spans.insert(span);
8585
}
8686
self.last = Some(block.id);
8787
}}
@@ -96,46 +96,22 @@ impl LateLintPass for RegexPass {
9696
fn check_expr(&mut self, cx: &LateContext, expr: &Expr) {
9797
if_let_chain!{[
9898
let ExprCall(ref fun, ref args) = expr.node,
99-
let ExprPath(_, ref path) = fun.node,
100-
match_path(path, &paths::REGEX_NEW) && args.len() == 1
99+
args.len() == 1,
100+
let Some(def) = cx.tcx.def_map.borrow().get(&fun.id),
101101
], {
102-
if let ExprLit(ref lit) = args[0].node {
103-
if let LitKind::Str(ref r, _) = lit.node {
104-
match regex_syntax::Expr::parse(r) {
105-
Ok(r) => {
106-
if let Some(repl) = is_trivial_regex(&r) {
107-
span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
108-
"trivial regex",
109-
&format!("consider using {}", repl));
110-
}
111-
}
112-
Err(e) => {
113-
span_lint(cx,
114-
INVALID_REGEX,
115-
str_span(args[0].span, &r, e.position()),
116-
&format!("regex syntax error: {}",
117-
e.description()));
118-
}
119-
}
120-
}
121-
} else if let Some(r) = const_str(cx, &*args[0]) {
122-
match regex_syntax::Expr::parse(&r) {
123-
Ok(r) => {
124-
if let Some(repl) = is_trivial_regex(&r) {
125-
span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
126-
"trivial regex",
127-
&format!("consider using {}", repl));
128-
}
129-
}
130-
Err(e) => {
131-
span_lint(cx,
132-
INVALID_REGEX,
133-
args[0].span,
134-
&format!("regex syntax error on position {}: {}",
135-
e.position(),
136-
e.description()));
137-
}
138-
}
102+
let def_id = def.def_id();
103+
if match_def_path(cx, def_id, &paths::REGEX_NEW) {
104+
check_regex(cx, &args[0], true);
105+
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_NEW) {
106+
check_regex(cx, &args[0], false);
107+
} else if match_def_path(cx, def_id, &paths::REGEX_BUILDER_NEW) {
108+
check_regex(cx, &args[0], true);
109+
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_BUILDER_NEW) {
110+
check_regex(cx, &args[0], false);
111+
} else if match_def_path(cx, def_id, &paths::REGEX_SET_NEW) {
112+
check_set(cx, &args[0], true);
113+
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_SET_NEW) {
114+
check_set(cx, &args[0], false);
139115
}
140116
}}
141117
}
@@ -193,3 +169,57 @@ fn is_trivial_regex(s: &regex_syntax::Expr) -> Option<&'static str> {
193169
_ => None,
194170
}
195171
}
172+
173+
fn check_set(cx: &LateContext, expr: &Expr, utf8: bool) {
174+
if_let_chain! {[
175+
let ExprAddrOf(_, ref expr) = expr.node,
176+
let ExprVec(ref exprs) = expr.node,
177+
], {
178+
for expr in exprs {
179+
check_regex(cx, expr, utf8);
180+
}
181+
}}
182+
}
183+
184+
fn check_regex(cx: &LateContext, expr: &Expr, utf8: bool) {
185+
let builder = regex_syntax::ExprBuilder::new().unicode(utf8);
186+
187+
if let ExprLit(ref lit) = expr.node {
188+
if let LitKind::Str(ref r, _) = lit.node {
189+
match builder.parse(r) {
190+
Ok(r) => {
191+
if let Some(repl) = is_trivial_regex(&r) {
192+
span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
193+
"trivial regex",
194+
&format!("consider using {}", repl));
195+
}
196+
}
197+
Err(e) => {
198+
span_lint(cx,
199+
INVALID_REGEX,
200+
str_span(expr.span, r, e.position()),
201+
&format!("regex syntax error: {}",
202+
e.description()));
203+
}
204+
}
205+
}
206+
} else if let Some(r) = const_str(cx, expr) {
207+
match builder.parse(&r) {
208+
Ok(r) => {
209+
if let Some(repl) = is_trivial_regex(&r) {
210+
span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
211+
"trivial regex",
212+
&format!("consider using {}", repl));
213+
}
214+
}
215+
Err(e) => {
216+
span_lint(cx,
217+
INVALID_REGEX,
218+
expr.span,
219+
&format!("regex syntax error on position {}: {}",
220+
e.position(),
221+
e.description()));
222+
}
223+
}
224+
}
225+
}

src/utils/paths.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@ pub const RANGE_TO_INCLUSIVE: [&'static str; 3] = ["core", "ops", "RangeToInclus
4646
pub const RANGE_TO_INCLUSIVE_STD: [&'static str; 3] = ["std", "ops", "RangeToInclusive"];
4747
pub const RANGE_TO_STD: [&'static str; 3] = ["std", "ops", "RangeTo"];
4848
pub const REGEX: [&'static str; 3] = ["regex", "re_unicode", "Regex"];
49-
pub const REGEX_NEW: [&'static str; 3] = ["regex", "Regex", "new"];
49+
pub const REGEX_BUILDER_NEW: [&'static str; 5] = ["regex", "re_builder", "unicode", "RegexBuilder", "new"];
50+
pub const REGEX_BYTES: [&'static str; 3] = ["regex", "re_bytes", "Regex"];
51+
pub const REGEX_BYTES_BUILDER_NEW: [&'static str; 5] = ["regex", "re_builder", "bytes", "RegexBuilder", "new"];
52+
pub const REGEX_BYTES_NEW: [&'static str; 4] = ["regex", "re_bytes", "Regex", "new"];
53+
pub const REGEX_BYTES_SET_NEW: [&'static str; 5] = ["regex", "re_set", "bytes", "RegexSet", "new"];
54+
pub const REGEX_NEW: [&'static str; 4] = ["regex", "re_unicode", "Regex", "new"];
55+
pub const REGEX_SET_NEW: [&'static str; 5] = ["regex", "re_set", "unicode", "RegexSet", "new"];
5056
pub const RESULT: [&'static str; 3] = ["core", "result", "Result"];
5157
pub const STRING: [&'static str; 3] = ["collections", "string", "String"];
5258
pub const TRANSMUTE: [&'static str; 4] = ["core", "intrinsics", "", "transmute"];

tests/compile-fail/regex.rs

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,17 @@
66

77
extern crate regex;
88

9-
use regex::Regex;
9+
use regex::{Regex, RegexSet, RegexBuilder};
10+
use regex::bytes::{Regex as BRegex, RegexSet as BRegexSet, RegexBuilder as BRegexBuilder};
1011

1112
const OPENING_PAREN : &'static str = "(";
1213
const NOT_A_REAL_REGEX : &'static str = "foobar";
1314

1415
fn syntax_error() {
1516
let pipe_in_wrong_position = Regex::new("|");
1617
//~^ERROR: regex syntax error: empty alternate
18+
let pipe_in_wrong_position_builder = RegexBuilder::new("|");
19+
//~^ERROR: regex syntax error: empty alternate
1720
let wrong_char_ranice = Regex::new("[z-a]");
1821
//~^ERROR: regex syntax error: invalid character class range
1922
let some_unicode = Regex::new("[é-è]");
@@ -22,15 +25,46 @@ fn syntax_error() {
2225
let some_regex = Regex::new(OPENING_PAREN);
2326
//~^ERROR: regex syntax error on position 0: unclosed
2427

28+
let binary_pipe_in_wrong_position = BRegex::new("|");
29+
//~^ERROR: regex syntax error: empty alternate
30+
let some_binary_regex = BRegex::new(OPENING_PAREN);
31+
//~^ERROR: regex syntax error on position 0: unclosed
32+
let some_binary_regex_builder = BRegexBuilder::new(OPENING_PAREN);
33+
//~^ERROR: regex syntax error on position 0: unclosed
34+
2535
let closing_paren = ")";
2636
let not_linted = Regex::new(closing_paren);
37+
38+
let set = RegexSet::new(&[
39+
r"[a-z]+@[a-z]+\.(com|org|net)",
40+
r"[a-z]+\.(com|org|net)",
41+
]);
42+
let bset = BRegexSet::new(&[
43+
r"[a-z]+@[a-z]+\.(com|org|net)",
44+
r"[a-z]+\.(com|org|net)",
45+
]);
46+
47+
let set_error = RegexSet::new(&[
48+
OPENING_PAREN,
49+
//~^ERROR: regex syntax error on position 0: unclosed
50+
r"[a-z]+\.(com|org|net)",
51+
]);
52+
let bset_error = BRegexSet::new(&[
53+
OPENING_PAREN,
54+
//~^ERROR: regex syntax error on position 0: unclosed
55+
r"[a-z]+\.(com|org|net)",
56+
]);
2757
}
2858

2959
fn trivial_regex() {
3060
let trivial_eq = Regex::new("^foobar$");
3161
//~^ERROR: trivial regex
3262
//~|HELP consider using `==` on `str`s
3363

64+
let trivial_eq_builder = RegexBuilder::new("^foobar$");
65+
//~^ERROR: trivial regex
66+
//~|HELP consider using `==` on `str`s
67+
3468
let trivial_starts_with = Regex::new("^foobar");
3569
//~^ERROR: trivial regex
3670
//~|HELP consider using `str::starts_with`
@@ -64,12 +98,19 @@ fn trivial_regex() {
6498
//~^ERROR: trivial regex
6599
//~|HELP consider using `str::is_empty`
66100

101+
let binary_trivial_empty = BRegex::new("^$");
102+
//~^ERROR: trivial regex
103+
//~|HELP consider using `str::is_empty`
104+
67105
// non-trivial regexes
68106
let non_trivial_dot = Regex::new("a.b");
107+
let non_trivial_dot_builder = RegexBuilder::new("a.b");
69108
let non_trivial_eq = Regex::new("^foo|bar$");
70109
let non_trivial_starts_with = Regex::new("^foo|bar");
71110
let non_trivial_ends_with = Regex::new("^foo|bar");
72111
let non_trivial_ends_with = Regex::new("foo|bar");
112+
let non_trivial_binary = BRegex::new("foo|bar");
113+
let non_trivial_binary_builder = BRegexBuilder::new("foo|bar");
73114
}
74115

75116
fn main() {

0 commit comments

Comments
 (0)