|
| 1 | +//! For using text diffs, please have a look at the [`imara-diff` documentation](https://docs.rs/imara-diff), |
| 2 | +//! maintained by [Pascal Kuthe](https://github.com/pascalkuthe). |
| 3 | +pub use imara_diff::*; |
| 4 | +use std::collections::HashMap; |
| 5 | +use std::path::PathBuf; |
| 6 | + |
| 7 | +use bstr::BString; |
| 8 | + |
| 9 | +/// Information about the diff performed to detect similarity. |
| 10 | +#[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)] |
| 11 | +pub struct DiffLineStats { |
| 12 | + /// The amount of lines to remove from the source to get to the destination. |
| 13 | + pub removals: u32, |
| 14 | + /// The amount of lines to add to the source to get to the destination. |
| 15 | + pub insertions: u32, |
| 16 | + /// The amount of lines of the previous state, in the source. |
| 17 | + pub before: u32, |
| 18 | + /// The amount of lines of the new state, in the destination. |
| 19 | + pub after: u32, |
| 20 | + /// A range from 0 to 1.0, where 1.0 is a perfect match and 0.5 is a similarity of 50%. |
| 21 | + /// Similarity is the ratio between all lines in the previous blob and the current blob, |
| 22 | + /// calculated as `(old_lines_count - new_lines_count) as f32 / old_lines_count.max(new_lines_count) as f32`. |
| 23 | + pub similarity: f32, |
| 24 | +} |
| 25 | + |
| 26 | +/// A way to classify a resource suitable for diffing. |
| 27 | +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] |
| 28 | +pub enum ResourceKind { |
| 29 | + /// The source of a rewrite, rename or copy operation, or generally the old version of a resource. |
| 30 | + OldOrSource, |
| 31 | + /// The destination of a rewrite, rename or copy operation, or generally the new version of a resource. |
| 32 | + NewOrDestination, |
| 33 | +} |
| 34 | + |
| 35 | +/// A set of values to define how to diff something that is associated with it using `git-attributes`, relevant for regular files. |
| 36 | +/// |
| 37 | +/// Some values are related to diffing, some are related to conversions. |
| 38 | +#[derive(Default, Debug, Clone)] |
| 39 | +pub struct Driver { |
| 40 | + /// The name of the driver, as referred to by `[diff "name"]` in the git configuration. |
| 41 | + pub name: BString, |
| 42 | + /// The command to execute to perform the diff entirely like `<command> old-file old-hex old-mode new-file new-hex new-mode`. |
| 43 | + /// |
| 44 | + /// Please note that we don't make this call ourselves, but use it to determine that we should not run the our standard |
| 45 | + /// built-in algorithm but bail instead as the output of such a program isn't standardized. |
| 46 | + pub command: Option<BString>, |
| 47 | + /// The per-driver algorithm to use. |
| 48 | + pub algorithm: Option<Algorithm>, |
| 49 | + /// The external filter program to call like `<binary_to_text_command> /path/to/blob` which outputs a textual version of the provided |
| 50 | + /// binary file. |
| 51 | + /// Note that it's invoked with a shell if arguments are given. |
| 52 | + pub binary_to_text_command: Option<BString>, |
| 53 | + /// `true` if this driver deals with binary files, which means that a `binary_to_text_command` should be used to convert binary |
| 54 | + /// into a textual representation. |
| 55 | + pub is_binary: bool, |
| 56 | +} |
| 57 | + |
| 58 | +/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree. |
| 59 | +#[derive(Clone, Debug, Default)] |
| 60 | +pub struct WorktreeRoots { |
| 61 | + /// A place where the source of a rewrite, rename or copy, or generally the previous version of resources, are located. |
| 62 | + pub old_root: Option<PathBuf>, |
| 63 | + /// A place where the destination of a rewrite, rename or copy, or generally the new version of resources, are located. |
| 64 | + pub new_root: Option<PathBuf>, |
| 65 | +} |
| 66 | + |
| 67 | +/// A conversion pipeline to take an object or path from what's stored in `git` to what can be diffed, while |
| 68 | +/// following the guidance of git-attributes at the respective path to learn if diffing should happen or if |
| 69 | +/// the content is considered binary. |
| 70 | +/// |
| 71 | +/// There are two different conversion flows, where the target of the flow is a buffer with diffable content: |
| 72 | +/// |
| 73 | +/// * `worktree on disk` -> `text conversion` |
| 74 | +/// * `object` -> `worktree-filters` -> `text conversion` |
| 75 | +/// |
| 76 | +/// Based on whether or not [`WorktreeRoots`] has the file in question, we either read directly from disk |
| 77 | +/// or transform from the object database. |
| 78 | +pub struct Pipeline { |
| 79 | + /// A way to read data directly from the worktree. |
| 80 | + pub roots: WorktreeRoots, |
| 81 | + /// A pipeline to convert objects from what's stored in `git` to its worktree version. |
| 82 | + pub worktree_filter: gix_filter::Pipeline, |
| 83 | + /// Options affecting the way we read files. |
| 84 | + pub options: pipeline::Options, |
| 85 | + /// Drivers to help customize the conversion behaviour depending on the location of items. |
| 86 | + drivers: Vec<Driver>, |
| 87 | + /// Pre-configured attributes to obtain additional diff-related information. |
| 88 | + attrs: gix_filter::attributes::search::Outcome, |
| 89 | + /// A buffer to manipulate paths |
| 90 | + path: PathBuf, |
| 91 | +} |
| 92 | + |
| 93 | +/// A utility for performing a diff of two blobs, including flexible conversions, conversion-caching |
| 94 | +/// acquisition of diff information. |
| 95 | +/// Note that this instance will not call external filters as their output can't be known programmatically, |
| 96 | +/// but it allows to prepare their input if the caller wishes to perform this task. |
| 97 | +/// |
| 98 | +/// Optimized for NxM lookups with built-in caching. |
| 99 | +pub struct Platform { |
| 100 | + /// The old version of a diff-able blob, if set. |
| 101 | + old: Option<platform::Diffable>, |
| 102 | + /// The new version of a diff-able blob, if set. |
| 103 | + new: Option<platform::Diffable>, |
| 104 | + |
| 105 | + /// Options to alter how diffs should be performed. |
| 106 | + pub options: platform::Options, |
| 107 | + /// A way to convert objects into a diff-able format. |
| 108 | + pub filter: Pipeline, |
| 109 | + /// A way to access .gitattributes |
| 110 | + pub attr_stack: gix_worktree::Stack, |
| 111 | + /// A continuously growing cache keeping ready-for-diff blobs by their path in the worktree, |
| 112 | + /// as that is what affects their final diff-able state. |
| 113 | + /// |
| 114 | + /// That way, expensive rewrite-checks with NxM matrix checks would be as fast as possible, |
| 115 | + /// avoiding duplicate work. |
| 116 | + diff_cache: HashMap<platform::CacheKey, platform::CacheValue>, |
| 117 | +} |
| 118 | + |
| 119 | +mod impls { |
| 120 | + use crate::blob::{ResourceKind, WorktreeRoots}; |
| 121 | + use std::path::Path; |
| 122 | + |
| 123 | + impl std::fmt::Display for ResourceKind { |
| 124 | + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 125 | + f.write_str(match self { |
| 126 | + ResourceKind::OldOrSource => "old", |
| 127 | + ResourceKind::NewOrDestination => "new", |
| 128 | + }) |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + impl WorktreeRoots { |
| 133 | + /// Return the root path for the given `kind` |
| 134 | + pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> { |
| 135 | + match kind { |
| 136 | + ResourceKind::OldOrSource => self.old_root.as_deref(), |
| 137 | + ResourceKind::NewOrDestination => self.new_root.as_deref(), |
| 138 | + } |
| 139 | + } |
| 140 | + } |
| 141 | +} |
| 142 | + |
| 143 | +/// |
| 144 | +pub mod pipeline; |
| 145 | + |
| 146 | +/// |
| 147 | +pub mod platform; |
0 commit comments