Skip to content

Commit e409e8d

Browse files
committed
feat: add Search::can_match_relative_path().
This way it's possible to match partial input against a pathspec to see if this root would have a chance to actually match.
1 parent d0c5a0e commit e409e8d

File tree

3 files changed

+233
-3
lines changed

3 files changed

+233
-3
lines changed

gix-pathspec/src/pattern.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,12 @@ impl Pattern {
142142
self.signature.contains(MagicSignature::EXCLUDE)
143143
}
144144

145+
/// Returns `true` is this pattern is supposed to always match, as it's either empty or designated `nil`.
146+
/// Note that technically the pattern might still be excluded.
147+
pub fn always_matches(&self) -> bool {
148+
self.is_nil() || self.path.is_empty()
149+
}
150+
145151
/// Translate ourselves to a long display format, that when parsed back will yield the same pattern.
146152
///
147153
/// Note that the

gix-pathspec/src/search/matching.rs

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::{
88

99
impl Search {
1010
/// Return the first [`Match`] of `relative_path`, or `None`.
11-
/// `is_dir` is `true` if `relative_path` is a directory.
11+
/// `is_dir` is `true` if `relative_path` is a directory, or assumed `false` if `None`.
1212
/// `attributes` is called as `attributes(relative_path, case, is_dir, outcome) -> has_match` to obtain for attributes for `relative_path`, if
1313
/// the underlying pathspec defined an attribute filter, to be stored in `outcome`, returning true if there was a match.
1414
/// All attributes of the pathspec have to be present in the defined value for the pathspec to match.
@@ -52,7 +52,7 @@ impl Search {
5252
}
5353

5454
let case = if ignore_case { Case::Fold } else { Case::Sensitive };
55-
let mut is_match = mapping.value.pattern.is_nil() || mapping.value.pattern.path.is_empty();
55+
let mut is_match = mapping.value.pattern.always_matches();
5656
if !is_match {
5757
is_match = if mapping.pattern.first_wildcard_pos.is_none() {
5858
match_verbatim(mapping, relative_path, is_dir, case)
@@ -117,6 +117,67 @@ impl Search {
117117
res
118118
}
119119
}
120+
121+
/// As opposed to [`Self::pattern_matching_relative_path()`], this method will return `true` for a possibly partial `relative_path`
122+
/// if this pathspec *could* match by looking at the shortest shared prefix only.
123+
///
124+
/// This is useful if `relative_path` is a directory leading up to the item that is going to be matched in full later.
125+
/// Note that it should not end with `/` to indicate it's a directory, rather, use `is_dir` to indicate this.
126+
/// `is_dir` is `true` if `relative_path` is a directory, or assumed `false` if `None`.
127+
/// Returns `false` if this pathspec has no chance of ever matching `relative_path`.
128+
pub fn can_match_relative_path(&self, relative_path: &BStr, is_dir: Option<bool>) -> bool {
129+
if self.patterns.is_empty() {
130+
return true;
131+
}
132+
let common_prefix_len = self.common_prefix_len.min(relative_path.len());
133+
if relative_path.get(..common_prefix_len).map_or(true, |rela_path_prefix| {
134+
rela_path_prefix != self.common_prefix()[..common_prefix_len]
135+
}) {
136+
return false;
137+
}
138+
for mapping in &self.patterns {
139+
let pattern = &mapping.value.pattern;
140+
if mapping.pattern.first_wildcard_pos == Some(0) && !pattern.is_excluded() {
141+
return true;
142+
}
143+
let max_usable_pattern_len = mapping.pattern.first_wildcard_pos.unwrap_or_else(|| pattern.path.len());
144+
let common_len = max_usable_pattern_len.min(relative_path.len());
145+
146+
let pattern_path = pattern.path[..common_len].as_bstr();
147+
let longest_possible_relative_path = &relative_path[..common_len];
148+
let ignore_case = pattern.signature.contains(MagicSignature::ICASE);
149+
let mut is_match = pattern.always_matches();
150+
if !is_match && common_len != 0 {
151+
is_match = if ignore_case {
152+
pattern_path.eq_ignore_ascii_case(longest_possible_relative_path)
153+
} else {
154+
pattern_path == longest_possible_relative_path
155+
};
156+
157+
if is_match {
158+
is_match = if common_len < max_usable_pattern_len {
159+
pattern.path.get(common_len) == Some(&b'/')
160+
} else if relative_path.len() > max_usable_pattern_len {
161+
relative_path.get(common_len) == Some(&b'/')
162+
} else {
163+
is_match
164+
};
165+
if let Some(is_dir) = is_dir.filter(|_| pattern.signature.contains(MagicSignature::MUST_BE_DIR)) {
166+
is_match = if is_dir {
167+
matches!(pattern.path.get(common_len), None | Some(&b'/'))
168+
} else {
169+
relative_path.get(common_len) == Some(&b'/')
170+
}
171+
}
172+
}
173+
}
174+
if is_match {
175+
return !pattern.is_excluded();
176+
}
177+
}
178+
179+
self.all_patterns_are_excluded
180+
}
120181
}
121182

122183
fn match_verbatim(

gix-pathspec/tests/search/mod.rs

Lines changed: 164 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,139 @@ fn no_pathspecs_match_everything() -> crate::Result {
1515
})
1616
.expect("matches");
1717
assert_eq!(m.pattern.prefix_directory(), "", "there is no prefix as none was given");
18+
assert_eq!(
19+
m.sequence_number, 0,
20+
"this is actually a fake pattern, as we have to match even though there isn't anything"
21+
);
22+
23+
assert!(search.can_match_relative_path("anything".into(), None));
24+
25+
Ok(())
26+
}
27+
28+
#[test]
29+
fn simplified_search_respects_must_be_dir() -> crate::Result {
30+
let mut search = gix_pathspec::Search::from_specs(pathspecs(&["a/be/"]), None, Path::new(""))?;
31+
search
32+
.pattern_matching_relative_path("a/be/file".into(), Some(false), &mut |_, _, _, _| {
33+
unreachable!("must not be called")
34+
})
35+
.expect("matches as this is a prefix match");
36+
assert!(
37+
!search.can_match_relative_path("any".into(), Some(false)),
38+
"not our directory: a, and must be dir"
39+
);
40+
assert!(
41+
!search.can_match_relative_path("any".into(), Some(true)),
42+
"not our directory: a"
43+
);
44+
assert!(
45+
!search.can_match_relative_path("any".into(), None),
46+
"not our directory: a, and must be dir, still completely out of scope"
47+
);
48+
assert!(
49+
!search.can_match_relative_path("a/bei".into(), None),
50+
"not our directory: a/be"
51+
);
52+
assert!(!search.can_match_relative_path("a".into(), Some(false)), "must be dir");
53+
assert!(search.can_match_relative_path("a".into(), Some(true)));
54+
assert!(
55+
search.can_match_relative_path("a".into(), None),
56+
"now dir or not doesn't matter"
57+
);
58+
assert!(search.can_match_relative_path("a/be".into(), Some(true)));
59+
assert!(
60+
search.can_match_relative_path("a/be".into(), None),
61+
"dir doesn't matter anymore"
62+
);
63+
assert!(
64+
!search.can_match_relative_path("a/be".into(), Some(false)),
65+
"files can't match as prefix"
66+
);
67+
assert!(
68+
search.can_match_relative_path("a/be/file".into(), Some(false)),
69+
"files can match if they are part of the suffix"
70+
);
71+
72+
assert!(
73+
!search.can_match_relative_path("a/b".into(), Some(false)),
74+
"can't match a/be"
75+
);
76+
assert!(
77+
!search.can_match_relative_path("a/b".into(), None),
78+
"still can't match a/be"
79+
);
80+
assert!(
81+
search
82+
.pattern_matching_relative_path("a/b".into(), None, &mut |_, _, _, _| unreachable!("must not be called"))
83+
.is_none(),
84+
"no match if it's not the whole pattern that matches"
85+
);
86+
assert!(
87+
!search.can_match_relative_path("a/b".into(), Some(true)),
88+
"can't match a/be, which must be directory"
89+
);
90+
91+
Ok(())
92+
}
93+
94+
#[test]
95+
fn simplified_search_respects_ignore_case() -> crate::Result {
96+
let search = gix_pathspec::Search::from_specs(pathspecs(&[":(icase)foo/**/bar"]), None, Path::new(""))?;
97+
assert!(search.can_match_relative_path("Foo".into(), None));
98+
assert!(search.can_match_relative_path("foo".into(), Some(true)));
99+
assert!(search.can_match_relative_path("FOO/".into(), Some(true)));
100+
101+
Ok(())
102+
}
103+
104+
#[test]
105+
fn simplified_search_respects_all_excluded() -> crate::Result {
106+
let search = gix_pathspec::Search::from_specs(
107+
pathspecs(&[":(exclude)a/file", ":(exclude)b/file"]),
108+
None,
109+
Path::new(""),
110+
)?;
111+
assert!(!search.can_match_relative_path("b".into(), None));
112+
assert!(!search.can_match_relative_path("a".into(), None));
113+
assert!(search.can_match_relative_path("c".into(), None));
114+
assert!(search.can_match_relative_path("c/".into(), None));
115+
116+
Ok(())
117+
}
118+
119+
#[test]
120+
fn simplified_search_wildcards() -> crate::Result {
121+
let search = gix_pathspec::Search::from_specs(pathspecs(&["**/a*"]), None, Path::new(""))?;
122+
assert!(
123+
search.can_match_relative_path("a".into(), None),
124+
"it can't determine it, so assume match"
125+
);
126+
assert!(search.can_match_relative_path("a/a".into(), Some(false)));
127+
assert!(search.can_match_relative_path("a/a.o".into(), Some(false)));
128+
assert!(
129+
search.can_match_relative_path("b-unrelated".into(), None),
130+
"this is also assumed to be a match, prefer false-positives over false-negatives"
131+
);
132+
Ok(())
133+
}
134+
135+
#[test]
136+
fn simplified_search_handles_nil() -> crate::Result {
137+
let search = gix_pathspec::Search::from_specs(pathspecs(&[":"]), None, Path::new(""))?;
138+
assert!(search.can_match_relative_path("a".into(), None), "everything matches");
139+
assert!(search.can_match_relative_path("a".into(), Some(false)));
140+
assert!(search.can_match_relative_path("a".into(), Some(true)));
141+
assert!(search.can_match_relative_path("a/b".into(), Some(true)));
142+
143+
let search = gix_pathspec::Search::from_specs(pathspecs(&[":(exclude)"]), None, Path::new(""))?;
144+
assert!(
145+
!search.can_match_relative_path("a".into(), None),
146+
"everything does not match"
147+
);
148+
assert!(!search.can_match_relative_path("a".into(), Some(false)));
149+
assert!(!search.can_match_relative_path("a".into(), Some(true)));
150+
assert!(!search.can_match_relative_path("a/b".into(), Some(true)));
18151

19152
Ok(())
20153
}
@@ -28,6 +161,15 @@ fn init_with_exclude() -> crate::Result {
28161
"re-orded so that excluded are first"
29162
);
30163
assert_eq!(search.common_prefix(), "tests");
164+
assert!(
165+
search.can_match_relative_path("tests".into(), Some(true)),
166+
"prefix matches"
167+
);
168+
assert!(
169+
!search.can_match_relative_path("test".into(), Some(true)),
170+
"prefix can not be shorter"
171+
);
172+
assert!(!search.can_match_relative_path("outside-of-tests".into(), None));
31173
Ok(())
32174
}
33175

@@ -47,6 +189,7 @@ fn no_pathspecs_respect_prefix() -> crate::Result {
47189
.is_none(),
48190
"not the right prefix"
49191
);
192+
assert!(!search.can_match_relative_path("hello".into(), None));
50193
let m = search
51194
.pattern_matching_relative_path("a/b".into(), None, &mut |_, _, _, _| unreachable!("must not be called"))
52195
.expect("match");
@@ -55,12 +198,16 @@ fn no_pathspecs_respect_prefix() -> crate::Result {
55198
"a",
56199
"the prefix directory matched verbatim"
57200
);
201+
assert!(search.can_match_relative_path("a/".into(), Some(true)));
202+
assert!(search.can_match_relative_path("a".into(), Some(true)));
203+
assert!(!search.can_match_relative_path("a".into(), Some(false)));
204+
assert!(search.can_match_relative_path("a".into(), None), "simple prefix search");
58205

59206
Ok(())
60207
}
61208

62209
#[test]
63-
fn prefixes_are_always_case_insensitive() -> crate::Result {
210+
fn prefixes_are_always_case_sensitive() -> crate::Result {
64211
let path = gix_testtools::scripted_fixture_read_only("match_baseline_files.sh")?.join("paths");
65212
let items = baseline::parse_paths(path)?;
66213

@@ -108,6 +255,22 @@ fn prefixes_are_always_case_insensitive() -> crate::Result {
108255
.collect();
109256
assert_eq!(actual, expected, "{spec} {prefix}");
110257
}
258+
259+
let search = gix_pathspec::Search::from_specs(
260+
gix_pathspec::parse(":(icase)bar".as_bytes(), Default::default()),
261+
Some(Path::new("FOO")),
262+
Path::new(""),
263+
)?;
264+
assert!(
265+
!search.can_match_relative_path("foo".into(), Some(true)),
266+
"icase does not apply to the prefix"
267+
);
268+
assert!(search.can_match_relative_path("FOO".into(), Some(true)));
269+
assert!(
270+
!search.can_match_relative_path("FOO/ba".into(), Some(true)),
271+
"a full match is needed"
272+
);
273+
assert!(search.can_match_relative_path("FOO/bar".into(), Some(true)));
111274
Ok(())
112275
}
113276

0 commit comments

Comments
 (0)