Skip to content

Regexes #955

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 25, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# Change Log
All notable changes to this project will be documented in this file.

## 0.0.70 — TBD
* [`invalid_regex`] and [`trivial_regex`] can now warn on `RegexSet::new`,
`RegexBuilder::new` and byte regexes

## 0.0.69 — 2016-05-20
* Rustup to *rustc 1.10.0-nightly (476fe6eef 2016-05-21)*
* `used_underscore_binding` has been made `Allow` temporarily
* [`used_underscore_binding`] has been made `Allow` temporarily

## 0.0.68 — 2016-05-17
* Rustup to *rustc 1.10.0-nightly (cd6a40017 2016-05-16)*
Expand Down
112 changes: 71 additions & 41 deletions src/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::error::Error;
use syntax::ast::{LitKind, NodeId};
use syntax::codemap::{Span, BytePos};
use syntax::parse::token::InternedString;
use utils::{is_expn_of, match_path, match_type, paths, span_lint, span_help_and_lint};
use utils::{is_expn_of, match_def_path, match_type, paths, span_lint, span_help_and_lint};

/// **What it does:** This lint checks `Regex::new(_)` invocations for correct regex syntax.
///
Expand Down Expand Up @@ -81,7 +81,7 @@ impl LateLintPass for RegexPass {
span,
"`regex!(_)` found. \
Please use `Regex::new(_)`, which is faster for now.");
self.spans.insert(span);
self.spans.insert(span);
}
self.last = Some(block.id);
}}
Expand All @@ -96,46 +96,22 @@ impl LateLintPass for RegexPass {
fn check_expr(&mut self, cx: &LateContext, expr: &Expr) {
if_let_chain!{[
let ExprCall(ref fun, ref args) = expr.node,
let ExprPath(_, ref path) = fun.node,
match_path(path, &paths::REGEX_NEW) && args.len() == 1
args.len() == 1,
let Some(def) = cx.tcx.def_map.borrow().get(&fun.id),
], {
if let ExprLit(ref lit) = args[0].node {
if let LitKind::Str(ref r, _) = lit.node {
match regex_syntax::Expr::parse(r) {
Ok(r) => {
if let Some(repl) = is_trivial_regex(&r) {
span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
"trivial regex",
&format!("consider using {}", repl));
}
}
Err(e) => {
span_lint(cx,
INVALID_REGEX,
str_span(args[0].span, &r, e.position()),
&format!("regex syntax error: {}",
e.description()));
}
}
}
} else if let Some(r) = const_str(cx, &*args[0]) {
match regex_syntax::Expr::parse(&r) {
Ok(r) => {
if let Some(repl) = is_trivial_regex(&r) {
span_help_and_lint(cx, TRIVIAL_REGEX, args[0].span,
"trivial regex",
&format!("consider using {}", repl));
}
}
Err(e) => {
span_lint(cx,
INVALID_REGEX,
args[0].span,
&format!("regex syntax error on position {}: {}",
e.position(),
e.description()));
}
}
let def_id = def.def_id();
if match_def_path(cx, def_id, &paths::REGEX_NEW) {
check_regex(cx, &args[0], true);
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_NEW) {
check_regex(cx, &args[0], false);
} else if match_def_path(cx, def_id, &paths::REGEX_BUILDER_NEW) {
check_regex(cx, &args[0], true);
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_BUILDER_NEW) {
check_regex(cx, &args[0], false);
} else if match_def_path(cx, def_id, &paths::REGEX_SET_NEW) {
check_set(cx, &args[0], true);
} else if match_def_path(cx, def_id, &paths::REGEX_BYTES_SET_NEW) {
check_set(cx, &args[0], false);
}
}}
}
Expand Down Expand Up @@ -193,3 +169,57 @@ fn is_trivial_regex(s: &regex_syntax::Expr) -> Option<&'static str> {
_ => None,
}
}

fn check_set(cx: &LateContext, expr: &Expr, utf8: bool) {
if_let_chain! {[
let ExprAddrOf(_, ref expr) = expr.node,
let ExprVec(ref exprs) = expr.node,
], {
for expr in exprs {
check_regex(cx, expr, utf8);
}
}}
}

fn check_regex(cx: &LateContext, expr: &Expr, utf8: bool) {
let builder = regex_syntax::ExprBuilder::new().unicode(utf8);

if let ExprLit(ref lit) = expr.node {
if let LitKind::Str(ref r, _) = lit.node {
match builder.parse(r) {
Ok(r) => {
if let Some(repl) = is_trivial_regex(&r) {
span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
"trivial regex",
&format!("consider using {}", repl));
}
}
Err(e) => {
span_lint(cx,
INVALID_REGEX,
str_span(expr.span, r, e.position()),
&format!("regex syntax error: {}",
e.description()));
}
}
}
} else if let Some(r) = const_str(cx, expr) {
match builder.parse(&r) {
Ok(r) => {
if let Some(repl) = is_trivial_regex(&r) {
span_help_and_lint(cx, TRIVIAL_REGEX, expr.span,
"trivial regex",
&format!("consider using {}", repl));
}
}
Err(e) => {
span_lint(cx,
INVALID_REGEX,
expr.span,
&format!("regex syntax error on position {}: {}",
e.position(),
e.description()));
}
}
}
}
8 changes: 7 additions & 1 deletion src/utils/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,13 @@ pub const RANGE_TO_INCLUSIVE: [&'static str; 3] = ["core", "ops", "RangeToInclus
pub const RANGE_TO_INCLUSIVE_STD: [&'static str; 3] = ["std", "ops", "RangeToInclusive"];
pub const RANGE_TO_STD: [&'static str; 3] = ["std", "ops", "RangeTo"];
pub const REGEX: [&'static str; 3] = ["regex", "re_unicode", "Regex"];
pub const REGEX_NEW: [&'static str; 3] = ["regex", "Regex", "new"];
pub const REGEX_BUILDER_NEW: [&'static str; 5] = ["regex", "re_builder", "unicode", "RegexBuilder", "new"];
pub const REGEX_BYTES: [&'static str; 3] = ["regex", "re_bytes", "Regex"];
pub const REGEX_BYTES_BUILDER_NEW: [&'static str; 5] = ["regex", "re_builder", "bytes", "RegexBuilder", "new"];
pub const REGEX_BYTES_NEW: [&'static str; 4] = ["regex", "re_bytes", "Regex", "new"];
pub const REGEX_BYTES_SET_NEW: [&'static str; 5] = ["regex", "re_set", "bytes", "RegexSet", "new"];
pub const REGEX_NEW: [&'static str; 4] = ["regex", "re_unicode", "Regex", "new"];
pub const REGEX_SET_NEW: [&'static str; 5] = ["regex", "re_set", "unicode", "RegexSet", "new"];
pub const RESULT: [&'static str; 3] = ["core", "result", "Result"];
pub const STRING: [&'static str; 3] = ["collections", "string", "String"];
pub const TRANSMUTE: [&'static str; 4] = ["core", "intrinsics", "", "transmute"];
Expand Down
43 changes: 42 additions & 1 deletion tests/compile-fail/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@

extern crate regex;

use regex::Regex;
use regex::{Regex, RegexSet, RegexBuilder};
use regex::bytes::{Regex as BRegex, RegexSet as BRegexSet, RegexBuilder as BRegexBuilder};

const OPENING_PAREN : &'static str = "(";
const NOT_A_REAL_REGEX : &'static str = "foobar";

fn syntax_error() {
let pipe_in_wrong_position = Regex::new("|");
//~^ERROR: regex syntax error: empty alternate
let pipe_in_wrong_position_builder = RegexBuilder::new("|");
//~^ERROR: regex syntax error: empty alternate
let wrong_char_ranice = Regex::new("[z-a]");
//~^ERROR: regex syntax error: invalid character class range
let some_unicode = Regex::new("[é-è]");
Expand All @@ -22,15 +25,46 @@ fn syntax_error() {
let some_regex = Regex::new(OPENING_PAREN);
//~^ERROR: regex syntax error on position 0: unclosed

let binary_pipe_in_wrong_position = BRegex::new("|");
//~^ERROR: regex syntax error: empty alternate
let some_binary_regex = BRegex::new(OPENING_PAREN);
//~^ERROR: regex syntax error on position 0: unclosed
let some_binary_regex_builder = BRegexBuilder::new(OPENING_PAREN);
//~^ERROR: regex syntax error on position 0: unclosed

let closing_paren = ")";
let not_linted = Regex::new(closing_paren);

let set = RegexSet::new(&[
r"[a-z]+@[a-z]+\.(com|org|net)",
r"[a-z]+\.(com|org|net)",
]);
let bset = BRegexSet::new(&[
r"[a-z]+@[a-z]+\.(com|org|net)",
r"[a-z]+\.(com|org|net)",
]);

let set_error = RegexSet::new(&[
OPENING_PAREN,
//~^ERROR: regex syntax error on position 0: unclosed
r"[a-z]+\.(com|org|net)",
]);
let bset_error = BRegexSet::new(&[
OPENING_PAREN,
//~^ERROR: regex syntax error on position 0: unclosed
r"[a-z]+\.(com|org|net)",
]);
}

fn trivial_regex() {
let trivial_eq = Regex::new("^foobar$");
//~^ERROR: trivial regex
//~|HELP consider using `==` on `str`s

let trivial_eq_builder = RegexBuilder::new("^foobar$");
//~^ERROR: trivial regex
//~|HELP consider using `==` on `str`s

let trivial_starts_with = Regex::new("^foobar");
//~^ERROR: trivial regex
//~|HELP consider using `str::starts_with`
Expand Down Expand Up @@ -64,12 +98,19 @@ fn trivial_regex() {
//~^ERROR: trivial regex
//~|HELP consider using `str::is_empty`

let binary_trivial_empty = BRegex::new("^$");
//~^ERROR: trivial regex
//~|HELP consider using `str::is_empty`

// non-trivial regexes
let non_trivial_dot = Regex::new("a.b");
let non_trivial_dot_builder = RegexBuilder::new("a.b");
let non_trivial_eq = Regex::new("^foo|bar$");
let non_trivial_starts_with = Regex::new("^foo|bar");
let non_trivial_ends_with = Regex::new("^foo|bar");
let non_trivial_ends_with = Regex::new("foo|bar");
let non_trivial_binary = BRegex::new("foo|bar");
let non_trivial_binary_builder = BRegexBuilder::new("foo|bar");
}

fn main() {
Expand Down