Skip to content

Commit 322e7f3

Browse files
committed
feat(ops): Commit filtering support
This is a building block for faster operations by operating on less data.
1 parent c7fa8c5 commit 322e7f3

File tree

7 files changed

+1020
-17
lines changed

7 files changed

+1020
-17
lines changed

Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,12 @@ which = "4"
3939
[dev-dependencies]
4040
git-fixture = { version = "0.3", features = ["yaml"] }
4141
assert_fs = "1"
42+
eyre = "0.6"
43+
snapbox = "0.2"
44+
regex = "1.5.5"
45+
criterion = "0.3.5"
46+
47+
[[bench]]
48+
harness = false
49+
name = "ops"
50+
path = "benches/ops.rs"

benches/ops.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
use std::collections::HashSet;
2+
3+
fn get_repo() -> git2::Repository {
4+
let repo_dir =
5+
std::env::var("PATH_TO_REPO").expect("`PATH_TO_REPO` environment variable not set");
6+
git2::Repository::discover(&std::path::PathBuf::from(repo_dir)).unwrap()
7+
}
8+
9+
fn bench_get_changed_paths_between_trees(c: &mut criterion::Criterion) {
10+
c.bench_function("get_changed_paths_between_trees", |b| {
11+
let repo = get_repo();
12+
let oid = repo.head().unwrap().target().unwrap();
13+
let commit = repo.find_commit(oid).unwrap();
14+
let parent = commit.parent(0).unwrap();
15+
let parent_tree = parent.tree().unwrap();
16+
let commit_tree = commit.tree().unwrap();
17+
18+
b.iter(|| -> HashSet<std::path::PathBuf> {
19+
git2_ext::tree::get_changed_paths_between_trees(
20+
&repo,
21+
Some(&parent_tree),
22+
Some(&commit_tree),
23+
)
24+
.unwrap()
25+
});
26+
});
27+
}
28+
29+
criterion::criterion_group!(benches, bench_get_changed_paths_between_trees,);
30+
criterion::criterion_main!(benches);

src/bytes.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// From git2 crate
2+
#[cfg(unix)]
3+
pub(crate) fn bytes2path(b: &[u8]) -> &std::path::Path {
4+
use std::os::unix::prelude::*;
5+
std::path::Path::new(std::ffi::OsStr::from_bytes(b))
6+
}
7+
8+
// From git2 crate
9+
#[cfg(windows)]
10+
pub(crate) fn bytes2path(b: &[u8]) -> &std::path::Path {
11+
use std::str;
12+
std::path::Path::new(str::from_utf8(b).unwrap())
13+
}

src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,10 @@
66
77
pub mod hooks;
88
pub mod ops;
9+
pub mod tree;
910
pub mod utils;
11+
12+
pub(crate) mod bytes;
13+
14+
#[cfg(test)]
15+
mod testing;

src/ops.rs

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,19 @@ pub fn cherry_pick(
9191
let our_path = conflict
9292
.our
9393
.as_ref()
94-
.map(|c| bytes2path(&c.path))
95-
.or_else(|| conflict.their.as_ref().map(|c| bytes2path(&c.path)))
96-
.or_else(|| conflict.ancestor.as_ref().map(|c| bytes2path(&c.path)))
94+
.map(|c| crate::bytes::bytes2path(&c.path))
95+
.or_else(|| {
96+
conflict
97+
.their
98+
.as_ref()
99+
.map(|c| crate::bytes::bytes2path(&c.path))
100+
})
101+
.or_else(|| {
102+
conflict
103+
.ancestor
104+
.as_ref()
105+
.map(|c| crate::bytes::bytes2path(&c.path))
106+
})
97107
.unwrap_or_else(|| std::path::Path::new("<unknown>"));
98108
format!("{}", our_path.display())
99109
})
@@ -171,9 +181,19 @@ pub fn squash(
171181
let our_path = conflict
172182
.our
173183
.as_ref()
174-
.map(|c| bytes2path(&c.path))
175-
.or_else(|| conflict.their.as_ref().map(|c| bytes2path(&c.path)))
176-
.or_else(|| conflict.ancestor.as_ref().map(|c| bytes2path(&c.path)))
184+
.map(|c| crate::bytes::bytes2path(&c.path))
185+
.or_else(|| {
186+
conflict
187+
.their
188+
.as_ref()
189+
.map(|c| crate::bytes::bytes2path(&c.path))
190+
})
191+
.or_else(|| {
192+
conflict
193+
.ancestor
194+
.as_ref()
195+
.map(|c| crate::bytes::bytes2path(&c.path))
196+
})
177197
.unwrap_or_else(|| std::path::Path::new("<unknown>"));
178198
format!("{}", our_path.display())
179199
})
@@ -218,16 +238,47 @@ pub fn reword(
218238
Ok(new_id)
219239
}
220240

221-
// From git2 crate
222-
#[cfg(unix)]
223-
fn bytes2path(b: &[u8]) -> &std::path::Path {
224-
use std::os::unix::prelude::*;
225-
std::path::Path::new(std::ffi::OsStr::from_bytes(b))
226-
}
241+
/// Filter the directory tree of `commit_id` to just the specified file paths
242+
///
243+
/// Performance of some git2 operations scale with the number of files within the repo. Operating on just the
244+
/// subset of files should dramatically speed up these operations
245+
pub fn filter_commit(
246+
repo: &git2::Repository,
247+
commit_id: git2::Oid,
248+
parent_id: Option<git2::Oid>,
249+
filtered_paths: &[&std::path::Path],
250+
) -> Result<git2::Oid, git2::Error> {
251+
let commit = repo.find_commit(commit_id)?;
252+
let tree = commit.tree()?;
253+
let dehydrated_tree_oid = crate::tree::filter_tree(repo, &tree, filtered_paths)?;
254+
let dehydrated_tree = repo.find_tree(dehydrated_tree_oid)?;
255+
256+
let signature = git2::Signature::new(
257+
"git-branchless",
258+
259+
&git2::Time::new(0, 0),
260+
)?;
261+
let message = format!(
262+
"generated by git-branchless: temporary dehydrated commit \
263+
\
264+
This commit was originally: {}",
265+
commit_id
266+
);
227267

228-
// From git2 crate
229-
#[cfg(windows)]
230-
fn bytes2path(b: &[u8]) -> &std::path::Path {
231-
use std::str;
232-
std::path::Path::new(str::from_utf8(b).unwrap())
268+
let dehydrated_parent: git2::Commit<'_>;
269+
let parents = if let Some(parent_id) = parent_id {
270+
dehydrated_parent = repo.find_commit(parent_id)?;
271+
vec![&dehydrated_parent]
272+
} else {
273+
vec![]
274+
};
275+
let dehydrated_commit_oid = repo.commit(
276+
None,
277+
&signature,
278+
&signature,
279+
&message,
280+
&dehydrated_tree,
281+
&parents,
282+
)?;
283+
Ok(dehydrated_commit_oid)
233284
}

0 commit comments

Comments
 (0)