Skip to content

Commit ebd4428

Browse files
committed
feat: support workspace filters when fetching them from the object database
1 parent 940bdcd commit ebd4428

File tree

17 files changed

+1987
-70
lines changed

17 files changed

+1987
-70
lines changed

Cargo.lock

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crate-status.md

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -293,16 +293,27 @@ The top-level crate that acts as hub to all functionality provided by the `gix-*
293293
Check out the [performance discussion][gix-diff-performance] as well.
294294

295295
* **tree**
296-
* [x] changes needed to obtain _other tree_
296+
* [x] changes needed to obtain _other tree_
297297
* **patches**
298-
* There are various ways to generate a patch from two blobs.
299-
* [ ] any
298+
* There are various ways to generate a patch from two blobs.
299+
* [ ] text
300+
* [ ] binary
300301
* **lines**
301-
* [x] Simple line-by-line diffs powered by the `imara-diff` crate.
302-
* diffing, merging, working with hunks of data
303-
* find differences between various states, i.e. index, working tree, commit-tree
302+
* [x] Simple line-by-line diffs powered by the `imara-diff` crate.
303+
* **generic rename tracker to find renames and copies**
304+
* [x] find by exact match
305+
* [x] find by similarity check
306+
* [ ] heuristics to find best candidate
307+
* [ ] find by basename to help detecting simple moves
308+
* **blob**
309+
* [ ] worktree conversions
310+
* [ ] `textconv` filters
311+
* [ ] caching of diff-able data
312+
* [ ] special handling of files beyond the big-file threshold.
313+
* [ ] detection of binary files by looking at header (first 8k bytes)
314+
* [ ] working with hunks of data
304315
* [x] API documentation
305-
* [ ] Examples
316+
* [ ] Examples
306317

307318
[gix-diff-performance]: https://github.com/Byron/gitoxide/discussions/74
308319

gix-diff/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ autotests = false
1313
[features]
1414
default = ["blob"]
1515
## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation.
16-
blob = ["dep:imara-diff"]
16+
blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command"]
1717
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
1818
serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"]
1919
## Make it possible to compile to the `wasm32-unknown-unknown` target.
@@ -25,6 +25,11 @@ doctest = false
2525
[dependencies]
2626
gix-hash = { version = "^0.13.1", path = "../gix-hash" }
2727
gix-object = { version = "^0.38.0", path = "../gix-object" }
28+
gix-filter = { version = "^0.6.0", path = "../gix-filter", optional = true }
29+
gix-worktree = { version = "^0.27.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true }
30+
gix-command = { version = "^0.2.10", path = "../gix-command", optional = true }
31+
gix-path = { version = "^0.10.0", path = "../gix-path", optional = true }
32+
gix-fs = { version = "^0.8.0", path = "../gix-fs", optional = true }
2833

2934
thiserror = "1.0.32"
3035
imara-diff = { version = "0.1.3", optional = true }

gix-diff/src/blob.rs

Lines changed: 0 additions & 18 deletions
This file was deleted.

gix-diff/src/blob/mod.rs

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
//! For using text diffs, please have a look at the [`imara-diff` documentation](https://docs.rs/imara-diff),
2+
//! maintained by [Pascal Kuthe](https://github.com/pascalkuthe).
3+
pub use imara_diff::*;
4+
use std::collections::HashMap;
5+
use std::path::PathBuf;
6+
7+
use bstr::BString;
8+
9+
/// Information about the diff performed to detect similarity.
10+
#[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)]
11+
pub struct DiffLineStats {
12+
/// The amount of lines to remove from the source to get to the destination.
13+
pub removals: u32,
14+
/// The amount of lines to add to the source to get to the destination.
15+
pub insertions: u32,
16+
/// The amount of lines of the previous state, in the source.
17+
pub before: u32,
18+
/// The amount of lines of the new state, in the destination.
19+
pub after: u32,
20+
/// A range from 0 to 1.0, where 1.0 is a perfect match and 0.5 is a similarity of 50%.
21+
/// Similarity is the ratio between all lines in the previous blob and the current blob,
22+
/// calculated as `(old_lines_count - new_lines_count) as f32 / old_lines_count.max(new_lines_count) as f32`.
23+
pub similarity: f32,
24+
}
25+
26+
/// A way to classify a resource suitable for diffing.
27+
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
28+
pub enum ResourceKind {
29+
/// The source of a rewrite, rename or copy operation, or generally the old version of a resource.
30+
OldOrSource,
31+
/// The destination of a rewrite, rename or copy operation, or generally the new version of a resource.
32+
NewOrDestination,
33+
}
34+
35+
/// A set of values to define how to diff something that is associated with it using `git-attributes`, relevant for regular files.
36+
///
37+
/// Some values are related to diffing, some are related to conversions.
38+
#[derive(Default, Debug, Clone)]
39+
pub struct Driver {
40+
/// The name of the driver, as referred to by `[diff "name"]` in the git configuration.
41+
pub name: BString,
42+
/// The command to execute to perform the diff entirely like `<command> old-file old-hex old-mode new-file new-hex new-mode`.
43+
///
44+
/// Please note that we don't make this call ourselves, but use it to determine that we should not run the our standard
45+
/// built-in algorithm but bail instead as the output of such a program isn't standardized.
46+
pub command: Option<BString>,
47+
/// The per-driver algorithm to use.
48+
pub algorithm: Option<Algorithm>,
49+
/// The external filter program to call like `<binary_to_text_command> /path/to/blob` which outputs a textual version of the provided
50+
/// binary file.
51+
/// Note that it's invoked with a shell if arguments are given.
52+
pub binary_to_text_command: Option<BString>,
53+
/// `true` if this driver deals with binary files, which means that a `binary_to_text_command` should be used to convert binary
54+
/// into a textual representation.
55+
pub is_binary: bool,
56+
}
57+
58+
/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree.
59+
#[derive(Clone, Debug, Default)]
60+
pub struct WorktreeRoots {
61+
/// A place where the source of a rewrite, rename or copy, or generally the previous version of resources, are located.
62+
pub old_root: Option<PathBuf>,
63+
/// A place where the destination of a rewrite, rename or copy, or generally the new version of resources, are located.
64+
pub new_root: Option<PathBuf>,
65+
}
66+
67+
/// A conversion pipeline to take an object or path from what's stored in `git` to what can be diffed, while
68+
/// following the guidance of git-attributes at the respective path to learn if diffing should happen or if
69+
/// the content is considered binary.
70+
///
71+
/// There are two different conversion flows, where the target of the flow is a buffer with diffable content:
72+
///
73+
/// * `worktree on disk` -> `text conversion`
74+
/// * `object` -> `worktree-filters` -> `text conversion`
75+
///
76+
/// Based on whether or not [`WorktreeRoots`] has the file in question, we either read directly from disk
77+
/// or transform from the object database.
78+
pub struct Pipeline {
79+
/// A way to read data directly from the worktree.
80+
pub roots: WorktreeRoots,
81+
/// A pipeline to convert objects from what's stored in `git` to its worktree version.
82+
pub worktree_filter: gix_filter::Pipeline,
83+
/// Options affecting the way we read files.
84+
pub options: pipeline::Options,
85+
/// Drivers to help customize the conversion behaviour depending on the location of items.
86+
drivers: Vec<Driver>,
87+
/// Pre-configured attributes to obtain additional diff-related information.
88+
attrs: gix_filter::attributes::search::Outcome,
89+
/// A buffer to manipulate paths
90+
path: PathBuf,
91+
}
92+
93+
/// A utility for performing a diff of two blobs, including flexible conversions, conversion-caching
94+
/// acquisition of diff information.
95+
/// Note that this instance will not call external filters as their output can't be known programmatically,
96+
/// but it allows to prepare their input if the caller wishes to perform this task.
97+
///
98+
/// Optimized for NxM lookups with built-in caching.
99+
pub struct Platform {
100+
/// The old version of a diff-able blob, if set.
101+
old: Option<platform::Diffable>,
102+
/// The new version of a diff-able blob, if set.
103+
new: Option<platform::Diffable>,
104+
105+
/// Options to alter how diffs should be performed.
106+
pub options: platform::Options,
107+
/// A way to convert objects into a diff-able format.
108+
pub filter: Pipeline,
109+
/// A way to access .gitattributes
110+
pub attr_stack: gix_worktree::Stack,
111+
/// A continuously growing cache keeping ready-for-diff blobs by their path in the worktree,
112+
/// as that is what affects their final diff-able state.
113+
///
114+
/// That way, expensive rewrite-checks with NxM matrix checks would be as fast as possible,
115+
/// avoiding duplicate work.
116+
diff_cache: HashMap<platform::CacheKey, platform::CacheValue>,
117+
}
118+
119+
mod impls {
120+
use crate::blob::{ResourceKind, WorktreeRoots};
121+
use std::path::Path;
122+
123+
impl std::fmt::Display for ResourceKind {
124+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125+
f.write_str(match self {
126+
ResourceKind::OldOrSource => "old",
127+
ResourceKind::NewOrDestination => "new",
128+
})
129+
}
130+
}
131+
132+
impl WorktreeRoots {
133+
/// Return the root path for the given `kind`
134+
pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> {
135+
match kind {
136+
ResourceKind::OldOrSource => self.old_root.as_deref(),
137+
ResourceKind::NewOrDestination => self.new_root.as_deref(),
138+
}
139+
}
140+
}
141+
}
142+
143+
///
144+
pub mod pipeline;
145+
146+
///
147+
pub mod platform;

0 commit comments

Comments
 (0)