Skip to content

Commit 49db3ac

Browse files
committed
feat: match pathspecs just like git does.
This is important for selecting files on disk
1 parent 72df372 commit 49db3ac

20 files changed

+1278
-29
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-pathspec/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ doctest = false
1313

1414
[dependencies]
1515
gix-glob = { version = "^0.10.2", path = "../gix-glob" }
16+
gix-path = { version = "^0.8.4", path = "../gix-path" }
1617
gix-attributes = { version = "^0.16.0", path = "../gix-attributes" }
1718

1819
bstr = { version = "1.3.0", default-features = false, features = ["std"]}

gix-pathspec/src/lib.rs

Lines changed: 56 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,61 @@
55

66
use bitflags::bitflags;
77
use bstr::BString;
8+
use std::path::PathBuf;
9+
10+
///
11+
pub mod normalize {
12+
use std::path::PathBuf;
13+
14+
/// The error returned by [Pattern::normalize()](super::Pattern::normalize()).
15+
#[derive(Debug, thiserror::Error)]
16+
#[allow(missing_docs)]
17+
pub enum Error {
18+
#[error("The path '{}' is not inside of the worktree '{}'", path.display(), worktree_path.display())]
19+
AbsolutePathOutsideOfWorktree { path: PathBuf, worktree_path: PathBuf },
20+
#[error("The path '{}' leaves the repository", path.display())]
21+
OutsideOfWorktree { path: PathBuf },
22+
}
23+
}
24+
25+
mod pattern;
26+
27+
///
28+
pub mod search;
829

930
///
1031
pub mod parse;
1132

33+
/// A lists of pathspec patterns, possibly from a file.
34+
///
35+
/// Pathspecs are generally relative to the root of the repository.
36+
#[derive(Debug, Clone)]
37+
pub struct Search {
38+
/// Patterns and their associated data in the order they were loaded in or specified,
39+
/// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_).
40+
///
41+
/// During matching, this order is reversed.
42+
patterns: Vec<gix_glob::search::pattern::Mapping<search::Spec>>,
43+
44+
/// The path from which the patterns were read, or `None` if the patterns
45+
/// don't originate in a file on disk.
46+
pub source: Option<PathBuf>,
47+
48+
/// If `true`, this means all `patterns` are exclude patterns. This means that if there is no match
49+
/// (which would exclude an item), we would actually match it for lack of exclusion.
50+
all_patterns_are_excluded: bool,
51+
/// The amount of bytes that are in common among all `patterns` and that aren't matched case-insensitively
52+
common_prefix_len: usize,
53+
}
54+
1255
/// The output of a pathspec [parsing][parse()] operation. It can be used to match against a one or more paths.
1356
#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
1457
pub struct Pattern {
15-
/// The path part of a pathspec.
58+
/// The path part of a pathspec, which is typically a path possibly mixed with glob patterns.
59+
/// Note that it might be an empty string as well.
1660
///
17-
/// `:(top,literal,icase,attr,exclude)some/path` would yield `some/path`.
18-
pub path: BString,
61+
/// For example, `:(top,literal,icase,attr,exclude)some/path` would yield `some/path`.
62+
path: BString,
1963
/// All magic signatures that were included in the pathspec.
2064
pub signature: MagicSignature,
2165
/// The search mode of the pathspec.
@@ -24,17 +68,13 @@ pub struct Pattern {
2468
///
2569
/// `:(attr:a=one b=):path` would yield attribute `a` and `b`.
2670
pub attributes: Vec<gix_attributes::Assignment>,
27-
/// If `true`, this was the special `:` spec which acts like `null`
71+
/// If `true`, we are a special Nil pattern and always match.
2872
nil: bool,
29-
}
30-
31-
impl Pattern {
32-
/// Returns `true` if this seems to be a pathspec that indicates that 'there is no pathspec'.
33-
///
34-
/// Note that such a spec is `:`.
35-
pub fn is_null(&self) -> bool {
36-
self.nil
37-
}
73+
/// The length of bytes in `path` that belong to the prefix, which will always be matched case-insensitively.
74+
/// That way, even though pathspecs are applied from the top, we can emulate having changed directory into
75+
/// a specific sub-directory in a case-sensitive file-system.
76+
/// Is set by [Pattern::normalize()].
77+
prefix_len: usize,
3878
}
3979

4080
bitflags! {
@@ -47,6 +87,9 @@ bitflags! {
4787
const ICASE = 1 << 1;
4888
/// Excludes the matching patterns from the previous results
4989
const EXCLUDE = 1 << 2;
90+
/// The pattern must match a directory, and not a file.
91+
/// This is equivalent to how it's handled in `gix-glob`
92+
const MUST_BE_DIR = 1 << 3;
5093
}
5194
}
5295

gix-pathspec/src/parse.rs

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@ pub struct Defaults {
1212
/// The default signature.
1313
pub signature: MagicSignature,
1414
/// The default search-mode.
15+
///
16+
/// Note that even if it's [`MatchMode::Literal`], the pathspecs will be parsed as usual, but matched verbatim afterwards.
1517
pub search_mode: MatchMode,
18+
/// If set, the pathspec will not be parsed but used verbatim. Implies [`MatchMode::Literal`] for `search_mode`.
19+
pub literal: bool,
1620
}
1721

1822
/// The error returned by [parse()][crate::parse()].
@@ -43,10 +47,20 @@ pub enum Error {
4347

4448
impl Pattern {
4549
/// Try to parse a path-spec pattern from the given `input` bytes.
46-
pub fn from_bytes(input: &[u8], Defaults { signature, search_mode }: Defaults) -> Result<Self, Error> {
50+
pub fn from_bytes(
51+
input: &[u8],
52+
Defaults {
53+
signature,
54+
search_mode,
55+
literal,
56+
}: Defaults,
57+
) -> Result<Self, Error> {
4758
if input.is_empty() {
4859
return Err(Error::EmptyString);
4960
}
61+
if literal {
62+
return Ok(Self::from_literal(input, signature));
63+
}
5064
if input.as_bstr() == ":" {
5165
return Ok(Pattern {
5266
nil: true,
@@ -55,11 +69,9 @@ impl Pattern {
5569
}
5670

5771
let mut p = Pattern {
58-
path: BString::default(),
5972
signature,
6073
search_mode,
61-
attributes: Vec::new(),
62-
nil: false,
74+
..Default::default()
6375
};
6476

6577
let mut cursor = 0;
@@ -72,9 +84,25 @@ impl Pattern {
7284
}
7385
}
7486

75-
p.path = BString::from(&input[cursor..]);
87+
let mut path = &input[cursor..];
88+
if path.last() == Some(&b'/') {
89+
p.signature |= MagicSignature::MUST_BE_DIR;
90+
path = &path[..path.len() - 1];
91+
}
92+
p.path = path.into();
7693
Ok(p)
7794
}
95+
96+
/// Take `input` literally without parsing anything. This will also set our mode to `literal` to allow this pathspec to match `input` verbatim, and
97+
/// use `default_signature` as magic signature.
98+
pub fn from_literal(input: &[u8], default_signature: MagicSignature) -> Self {
99+
Pattern {
100+
path: input.into(),
101+
signature: default_signature,
102+
search_mode: MatchMode::Literal,
103+
..Default::default()
104+
}
105+
}
78106
}
79107

80108
fn parse_short_keywords(input: &[u8], cursor: &mut usize) -> Result<MagicSignature, Error> {

gix-pathspec/src/pattern.rs

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
use crate::{normalize, MagicSignature, MatchMode, Pattern};
2+
use bstr::{BStr, BString, ByteSlice, ByteVec};
3+
use std::path::{Component, Path, PathBuf};
4+
5+
/// Access
6+
impl Pattern {
7+
/// Returns `true` if this seems to be a pathspec that indicates that 'there is no pathspec'.
8+
///
9+
/// Note that such a spec is `:`.
10+
pub fn is_nil(&self) -> bool {
11+
self.nil
12+
}
13+
14+
/// Return the prefix-portion of the `path` of this spec, which is a *directory*.
15+
/// It can be empty if there is no prefix.
16+
///
17+
/// A prefix is effectively the CWD seen as relative to the working tree, and it's assumed to
18+
/// match case-sensitively. This makes it useful for skipping over large portions of input by
19+
/// directly comparing them.
20+
pub fn prefix_directory(&self) -> &BStr {
21+
self.path[..self.prefix_len].as_bstr()
22+
}
23+
24+
/// Return the path of this spec, typically used for matching.
25+
pub fn path(&self) -> &BStr {
26+
self.path.as_ref()
27+
}
28+
}
29+
30+
/// Mutation
31+
impl Pattern {
32+
/// Normalize the pattern's path by assuring it's relative to the root of the working tree, and contains
33+
/// no relative path components. Further, it assures that `/` are used as path separator.
34+
///
35+
/// If `self.path` is a relative path, it will be put in front of the pattern path if `self.signature` isn't indicating `TOP` already.
36+
/// If `self.path` is an absolute path, we will use `root` to make it worktree relative if possible.
37+
///
38+
/// `prefix` can be empty, we will still normalize this pathspec to resolve relative path components, and
39+
/// it is assumed not to contain any relative path components, e.g. '', 'a', 'a/b' are valid.
40+
/// `root` is the absolute path to the root of either the worktree or the repository's `git_dir`.
41+
pub fn normalize(&mut self, prefix: &Path, root: &Path) -> Result<&mut Self, normalize::Error> {
42+
fn prefix_components_to_subtract(path: &Path) -> usize {
43+
let parent_component_end_bound = path.components().enumerate().fold(None::<usize>, |acc, (idx, c)| {
44+
matches!(c, Component::ParentDir).then_some(idx + 1).or(acc)
45+
});
46+
let count = path
47+
.components()
48+
.take(parent_component_end_bound.unwrap_or(0))
49+
.map(|c| match c {
50+
Component::ParentDir => 1_isize,
51+
Component::Normal(_) => -1,
52+
_ => 0,
53+
})
54+
.sum::<isize>();
55+
(count > 0).then_some(count as usize).unwrap_or_default()
56+
}
57+
58+
let mut path = gix_path::from_bstr(self.path.as_ref());
59+
let mut num_prefix_components = 0;
60+
let mut was_absolute = false;
61+
if gix_path::is_absolute(path.as_ref()) {
62+
was_absolute = true;
63+
let rela_path = match path.strip_prefix(root) {
64+
Ok(path) => path,
65+
Err(_) => {
66+
return Err(normalize::Error::AbsolutePathOutsideOfWorktree {
67+
path: path.into_owned(),
68+
worktree_path: root.into(),
69+
})
70+
}
71+
};
72+
path = rela_path.to_owned().into();
73+
} else if !prefix.as_os_str().is_empty() && !self.signature.contains(MagicSignature::TOP) {
74+
debug_assert_eq!(
75+
prefix
76+
.components()
77+
.filter(|c| matches!(c, Component::Normal(_)))
78+
.count(),
79+
prefix.components().count(),
80+
"BUG: prefixes must not have relative path components, or calculations here will be wrong so pattern won't match"
81+
);
82+
num_prefix_components = prefix
83+
.components()
84+
.count()
85+
.saturating_sub(prefix_components_to_subtract(path.as_ref()));
86+
path = prefix.join(path).into();
87+
}
88+
89+
let assure_path_cannot_break_out_upwards = Path::new("");
90+
let path = match gix_path::normalize(path.as_ref(), assure_path_cannot_break_out_upwards) {
91+
Some(path) => {
92+
if was_absolute {
93+
num_prefix_components = path.components().count().saturating_sub(
94+
if self.signature.contains(MagicSignature::MUST_BE_DIR) {
95+
0
96+
} else {
97+
1
98+
},
99+
);
100+
}
101+
path
102+
}
103+
None => {
104+
return Err(normalize::Error::OutsideOfWorktree {
105+
path: path.into_owned(),
106+
})
107+
}
108+
};
109+
110+
self.path = if path == Path::new(".") {
111+
BString::from(".")
112+
} else {
113+
let cleaned = PathBuf::from_iter(path.components().filter(|c| !matches!(c, Component::CurDir)));
114+
let mut out = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(cleaned)).into_owned();
115+
self.prefix_len = {
116+
if self.signature.contains(MagicSignature::MUST_BE_DIR) {
117+
out.push(b'/');
118+
}
119+
let len = out
120+
.find_iter(b"/")
121+
.take(num_prefix_components)
122+
.last()
123+
.unwrap_or_default();
124+
if self.signature.contains(MagicSignature::MUST_BE_DIR) {
125+
out.pop();
126+
}
127+
len
128+
};
129+
out
130+
};
131+
132+
Ok(self)
133+
}
134+
}
135+
136+
/// Access
137+
impl Pattern {
138+
/// Return `true` if this pathspec is negated, which means it will exclude an item from the result set instead of including it.
139+
pub fn is_excluded(&self) -> bool {
140+
self.signature.contains(MagicSignature::EXCLUDE)
141+
}
142+
143+
/// Translate ourselves to a long display format, that when parsed back will yield the same pattern.
144+
///
145+
/// Note that the
146+
pub fn to_bstring(&self) -> BString {
147+
if self.is_nil() {
148+
":".into()
149+
} else {
150+
let mut buf: BString = ":(".into();
151+
if self.signature.contains(MagicSignature::TOP) {
152+
buf.push_str("top,");
153+
}
154+
if self.signature.contains(MagicSignature::EXCLUDE) {
155+
buf.push_str("exclude,");
156+
}
157+
if self.signature.contains(MagicSignature::ICASE) {
158+
buf.push_str("icase,");
159+
}
160+
match self.search_mode {
161+
MatchMode::ShellGlob => {}
162+
MatchMode::Literal => buf.push_str("literal,"),
163+
MatchMode::PathAwareGlob => buf.push_str("glob,"),
164+
}
165+
if self.attributes.is_empty() {
166+
if buf.last() == Some(&b',') {
167+
buf.pop();
168+
}
169+
} else {
170+
buf.push_str("attr:");
171+
for attr in &self.attributes {
172+
let attr = attr.as_ref().to_string().replace(',', "\\,");
173+
buf.push_str(&attr);
174+
buf.push(b' ');
175+
}
176+
buf.pop(); // trailing ' '
177+
}
178+
buf.push(b')');
179+
buf.extend_from_slice(&self.path);
180+
if self.signature.contains(MagicSignature::MUST_BE_DIR) {
181+
buf.push(b'/');
182+
}
183+
buf
184+
}
185+
}
186+
}
187+
188+
impl std::fmt::Display for Pattern {
189+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190+
self.to_bstring().fmt(f)
191+
}
192+
}

0 commit comments

Comments
 (0)