Skip to content

Commit 4f11f84

Browse files
committed
Lint binary regexes
1 parent 77ed899 commit 4f11f84

File tree

4 files changed

+109
-44
lines changed

4 files changed

+109
-44
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
# Change Log
22
All notable changes to this project will be documented in this file.
33

4+
## 0.0.70 — TBD
5+
* [`invalid_regex`] and [`trivial_regex`] can now warn on `RegexSet::new` and
6+
byte regexes
7+
48
## 0.0.69 — 2016-05-20
59
* Rustup to *rustc 1.10.0-nightly (476fe6eef 2016-05-21)*
6-
* `used_underscore_binding` has been made `Allow` temporarily
10+
* [`used_underscore_binding`] has been made `Allow` temporarily
711

812
## 0.0.68 — 2016-05-17
913
* Rustup to *rustc 1.10.0-nightly (cd6a40017 2016-05-16)*

src/regex.rs

Lines changed: 67 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use std::error::Error;
99
use syntax::ast::{LitKind, NodeId};
1010
use syntax::codemap::{Span, BytePos};
1111
use syntax::parse::token::InternedString;
12-
use utils::{is_expn_of, match_path, match_type, paths, span_lint, span_help_and_lint};
12+
use utils::{is_expn_of, match_def_path, match_type, paths, span_lint, span_help_and_lint};
1313

1414
/// **What it does:** This lint checks `Regex::new(_)` invocations for correct regex syntax.
1515
///
@@ -81,7 +81,7 @@ impl LateLintPass for RegexPass {
8181
span,
8282
"`regex!(_)` found. \
8383
Please use `Regex::new(_)`, which is faster for now.");
84-
self.spans.insert(span);
84+
self.spans.insert(span);
8585
}
8686
self.last = Some(block.id);
8787
}}
@@ -96,46 +96,18 @@ impl LateLintPass for RegexPass {
9696
fn check_expr(&mut self, cx: &LateContext, expr: &Expr) {
9797
if_let_chain!{[
9898
let ExprCall(ref fun, ref args) = expr.node,
99-
let ExprPath(_, ref path) = fun.node,
100-
match_path(path, &paths::REGEX_NEW) && args.len() == 1
99+
args.len() == 1,
100+
let Some(def) = cx.tcx.def_map.borrow().get(&fun.id),
101101
], {
102-
if let ExprLit(ref lit) = args[0].node {
103-
if let LitKind::Str(ref r, _) = lit.node {
104-
match regex_syntax::Expr::parse(r) {
105-
Ok(r) => {
106-
if let Some(repl) = is_trivial_regex(&r) {
107-
span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
108-
"trivial regex",
109-
&format!("consider using {}", repl));
110-
}
111-
}
112-
Err(e) => {
113-
span_lint(cx,
114-
INVALID_REGEX,
115-
str_span(args[0].span, &r, e.position()),
116-
&format!("regex syntax error: {}",
117-
e.description()));
118-
}
119-
}
120-
}
121-
} else if let Some(r) = const_str(cx, &*args[0]) {
122-
match regex_syntax::Expr::parse(&r) {
123-
Ok(r) => {
124-
if let Some(repl) = is_trivial_regex(&r) {
125-
span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
126-
"trivial regex",
127-
&format!("consider using {}", repl));
128-
}
129-
}
130-
Err(e) => {
131-
span_lint(cx,
132-
INVALID_REGEX,
133-
args[0].span,
134-
&format!("regex syntax error on position {}: {}",
135-
e.position(),
136-
e.description()));
137-
}
138-
}
102+
let def_id = def.def_id();
103+
if match_def_path(cx, def_id, &paths::REGEX_NEW) {
104+
check_regex(cx, &args[0], true);
105+
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_NEW) {
106+
check_regex(cx, &args[0], false);
107+
} else if match_def_path(cx, def_id, &paths::REGEX_SET_NEW) {
108+
check_set(cx, &args[0], true);
109+
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_SET_NEW) {
110+
check_set(cx, &args[0], false);
139111
}
140112
}}
141113
}
@@ -193,3 +165,57 @@ fn is_trivial_regex(s: &regex_syntax::Expr) -> Option<&'static str> {
193165
_ => None,
194166
}
195167
}
168+
169+
fn check_set(cx: &LateContext, expr: &Expr, utf8: bool) {
170+
if_let_chain! {[
171+
let ExprAddrOf(_, ref expr) = expr.node,
172+
let ExprVec(ref exprs) = expr.node,
173+
], {
174+
for expr in exprs {
175+
check_regex(cx, expr, utf8);
176+
}
177+
}}
178+
}
179+
180+
fn check_regex(cx: &LateContext, expr: &Expr, utf8: bool) {
181+
let builder = regex_syntax::ExprBuilder::new().unicode(utf8);
182+
183+
if let ExprLit(ref lit) = expr.node {
184+
if let LitKind::Str(ref r, _) = lit.node {
185+
match builder.parse(r) {
186+
Ok(r) => {
187+
if let Some(repl) = is_trivial_regex(&r) {
188+
span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
189+
"trivial regex",
190+
&format!("consider using {}", repl));
191+
}
192+
}
193+
Err(e) => {
194+
span_lint(cx,
195+
INVALID_REGEX,
196+
str_span(expr.span, r, e.position()),
197+
&format!("regex syntax error: {}",
198+
e.description()));
199+
}
200+
}
201+
}
202+
} else if let Some(r) = const_str(cx, expr) {
203+
match builder.parse(&r) {
204+
Ok(r) => {
205+
if let Some(repl) = is_trivial_regex(&r) {
206+
span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
207+
"trivial regex",
208+
&format!("consider using {}", repl));
209+
}
210+
}
211+
Err(e) => {
212+
span_lint(cx,
213+
INVALID_REGEX,
214+
expr.span,
215+
&format!("regex syntax error on position {}: {}",
216+
e.position(),
217+
e.description()));
218+
}
219+
}
220+
}
221+
}

src/utils/paths.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,11 @@ pub const RANGE_TO_INCLUSIVE: [&'static str; 3] = ["core", "ops", "RangeToInclus
4646
pub const RANGE_TO_INCLUSIVE_STD: [&'static str; 3] = ["std", "ops", "RangeToInclusive"];
4747
pub const RANGE_TO_STD: [&'static str; 3] = ["std", "ops", "RangeTo"];
4848
pub const REGEX: [&'static str; 3] = ["regex", "re_unicode", "Regex"];
49-
pub const REGEX_NEW: [&'static str; 3] = ["regex", "Regex", "new"];
49+
pub const REGEX_BYTES: [&'static str; 3] = ["regex", "re_bytes", "Regex"];
50+
pub const REGEX_BYTES_NEW: [&'static str; 4] = ["regex", "re_bytes", "Regex", "new"];
51+
pub const REGEX_BYTES_SET_NEW: [&'static str; 5] = ["regex", "re_set", "bytes", "RegexSet", "new"];
52+
pub const REGEX_NEW: [&'static str; 4] = ["regex", "re_unicode", "Regex", "new"];
53+
pub const REGEX_SET_NEW: [&'static str; 5] = ["regex", "re_set", "unicode", "RegexSet", "new"];
5054
pub const RESULT: [&'static str; 3] = ["core", "result", "Result"];
5155
pub const STRING: [&'static str; 3] = ["collections", "string", "String"];
5256
pub const TRANSMUTE: [&'static str; 4] = ["core", "intrinsics", "", "transmute"];

tests/compile-fail/regex.rs

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
extern crate regex;
88

9-
use regex::Regex;
9+
use regex::{Regex, RegexSet};
10+
use regex::bytes::{Regex as BRegex, RegexSet as BRegexSet};
1011

1112
const OPENING_PAREN : &'static str = "(";
1213
const NOT_A_REAL_REGEX : &'static str = "foobar";
@@ -22,8 +23,33 @@ fn syntax_error() {
2223
let some_regex = Regex::new(OPENING_PAREN);
2324
//~^ERROR: regex syntax error on position 0: unclosed
2425

26+
let binary_pipe_in_wrong_position = BRegex::new("|");
27+
//~^ERROR: regex syntax error: empty alternate
28+
let some_binary_regex = BRegex::new(OPENING_PAREN);
29+
//~^ERROR: regex syntax error on position 0: unclosed
30+
2531
let closing_paren = ")";
2632
let not_linted = Regex::new(closing_paren);
33+
34+
let set = RegexSet::new(&[
35+
r"[a-z]+@[a-z]+\.(com|org|net)",
36+
r"[a-z]+\.(com|org|net)",
37+
]);
38+
let bset = BRegexSet::new(&[
39+
r"[a-z]+@[a-z]+\.(com|org|net)",
40+
r"[a-z]+\.(com|org|net)",
41+
]);
42+
43+
let set_error = RegexSet::new(&[
44+
OPENING_PAREN,
45+
//~^ERROR: regex syntax error on position 0: unclosed
46+
r"[a-z]+\.(com|org|net)",
47+
]);
48+
let bset_error = BRegexSet::new(&[
49+
OPENING_PAREN,
50+
//~^ERROR: regex syntax error on position 0: unclosed
51+
r"[a-z]+\.(com|org|net)",
52+
]);
2753
}
2854

2955
fn trivial_regex() {
@@ -64,12 +90,17 @@ fn trivial_regex() {
6490
//~^ERROR: trivial regex
6591
//~|HELP consider using `str::is_empty`
6692

93+
let binary_trivial_empty = BRegex::new("^$");
94+
//~^ERROR: trivial regex
95+
//~|HELP consider using `str::is_empty`
96+
6797
// non-trivial regexes
6898
let non_trivial_dot = Regex::new("a.b");
6999
let non_trivial_eq = Regex::new("^foo|bar$");
70100
let non_trivial_starts_with = Regex::new("^foo|bar");
71101
let non_trivial_ends_with = Regex::new("^foo|bar");
72102
let non_trivial_ends_with = Regex::new("foo|bar");
103+
let non_trivial_binary = BRegex::new("foo|bar");
73104
}
74105

75106
fn main() {

0 commit comments

Comments
 (0)