Skip to content

Commit 6307f57

Browse files
authored
Merge pull request #1917 from pierrechevalier83/issue_1887
[fix] Make Tree roundtrip by storing additional bit of information
2 parents 2ef2006 + 103819d commit 6307f57

File tree

9 files changed

+158
-105
lines changed

9 files changed

+158
-105
lines changed

examples/ls-tree.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ fn run(args: Args) -> anyhow::Result<()> {
5454
for entry in entries {
5555
writeln!(
5656
out,
57-
"{:06o} {:4} {} {}",
58-
*entry.mode,
57+
"{:>6o} {:4} {} {}",
58+
entry.mode,
5959
entry.mode.as_str(),
6060
entry.hash,
6161
entry.path

gitoxide-core/src/repository/diff.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ fn write_changes(
4949
} => {
5050
writeln!(out, "A: {}", typed_location(location, entry_mode))?;
5151
writeln!(out, " {}", id.attach(repo).shorten_or_id())?;
52-
writeln!(out, " -> {:o}", entry_mode.0)?;
52+
writeln!(out, " -> {entry_mode:o}")?;
5353
}
5454
gix::diff::tree_with_rewrites::Change::Deletion {
5555
location,
@@ -59,7 +59,7 @@ fn write_changes(
5959
} => {
6060
writeln!(out, "D: {}", typed_location(location, entry_mode))?;
6161
writeln!(out, " {}", id.attach(repo).shorten_or_id())?;
62-
writeln!(out, " {:o} ->", entry_mode.0)?;
62+
writeln!(out, " {entry_mode:o} ->")?;
6363
}
6464
gix::diff::tree_with_rewrites::Change::Modification {
6565
location,
@@ -76,7 +76,7 @@ fn write_changes(
7676
id = id.attach(repo).shorten_or_id()
7777
)?;
7878
if previous_entry_mode != entry_mode {
79-
writeln!(out, " {:o} -> {:o}", previous_entry_mode.0, entry_mode.0)?;
79+
writeln!(out, " {previous_entry_mode:o} -> {entry_mode:o}")?;
8080
}
8181
}
8282
gix::diff::tree_with_rewrites::Change::Rewrite {
@@ -101,7 +101,7 @@ fn write_changes(
101101
id = id.attach(repo).shorten_or_id()
102102
)?;
103103
if source_entry_mode != entry_mode {
104-
writeln!(out, " {:o} -> {:o}", source_entry_mode.0, entry_mode.0)?;
104+
writeln!(out, " {source_entry_mode:o} -> {entry_mode:o}")?;
105105
}
106106
}
107107
}

gix-index/src/entry/mode.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ impl Mode {
7171

7272
impl From<gix_object::tree::EntryMode> for Mode {
7373
fn from(value: gix_object::tree::EntryMode) -> Self {
74-
Self::from_bits_truncate(u32::from(value.0))
74+
let value: u16 = value.value();
75+
Self::from_bits_truncate(u32::from(value))
7576
}
7677
}
7778

gix-merge/tests/merge/tree/baseline.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ fn parse_conflict_file_info(line: &str) -> Option<(Entry, Side)> {
262262
Entry {
263263
location: path.to_owned(),
264264
id: gix_hash::ObjectId::from_hex(hex_id.as_bytes()).unwrap(),
265-
mode: EntryMode(gix_utils::btoi::to_signed_with_radix::<usize>(oct_mode.as_bytes(), 8).unwrap() as u16),
265+
mode: EntryMode::try_from(oct_mode.as_bytes()).unwrap(),
266266
},
267267
match stage {
268268
"1" => Side::Ancestor,
@@ -339,7 +339,7 @@ pub fn visualize_tree(
339339
mode = if mode.is_tree() {
340340
"".into()
341341
} else {
342-
format!("{:o}:", mode.0)
342+
format!("{mode:o}:")
343343
}
344344
)
345345
}

gix-object/src/tree/mod.rs

Lines changed: 115 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pub mod write;
2424
pub struct Editor<'a> {
2525
/// A way to lookup trees.
2626
find: &'a dyn crate::FindExt,
27-
/// The kind of hashes to produce
27+
/// The kind of hashes to produce>
2828
object_hash: gix_hash::Kind,
2929
/// All trees we currently hold in memory. Each of these may change while adding and removing entries.
3030
/// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be
@@ -44,18 +44,121 @@ pub struct Editor<'a> {
4444
/// create it by converting [`EntryKind`] into `EntryMode`.
4545
#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
4646
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47-
pub struct EntryMode(pub u16);
47+
pub struct EntryMode {
48+
// Represents the value read from Git, except that "040000" is represented with 0o140000 but
49+
// "40000" is represented with 0o40000.
50+
internal: u16,
51+
}
52+
53+
impl TryFrom<u32> for tree::EntryMode {
54+
type Error = u32;
55+
fn try_from(mode: u32) -> Result<Self, Self::Error> {
56+
Ok(match mode {
57+
0o40000 | 0o120000 | 0o160000 => EntryMode { internal: mode as u16 },
58+
blob_mode if blob_mode & 0o100000 == 0o100000 => EntryMode { internal: mode as u16 },
59+
_ => return Err(mode),
60+
})
61+
}
62+
}
63+
64+
impl EntryMode {
65+
/// Expose the value as u16 (lossy, unlike the internal representation that is hidden).
66+
pub const fn value(self) -> u16 {
67+
// Demangle the hack: In the case where the second leftmost octet is 4 (Tree), the leftmost bit is
68+
// there to represent whether the bytes representation should have 5 or 6 octets.
69+
if self.internal & IFMT == 0o140000 {
70+
0o040000
71+
} else {
72+
self.internal
73+
}
74+
}
75+
76+
/// Return the representation as used in the git internal format, which is octal and written
77+
/// to the `backing` buffer. The respective sub-slice that was written to is returned.
78+
pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
79+
if self.internal == 0 {
80+
std::slice::from_ref(&b'0')
81+
} else {
82+
for (idx, backing_octet) in backing.iter_mut().enumerate() {
83+
let bit_pos = 3 /* because base 8 and 2^3 == 8*/ * (6 - idx - 1);
84+
let oct_mask = 0b111 << bit_pos;
85+
let digit = (self.internal & oct_mask) >> bit_pos;
86+
*backing_octet = b'0' + digit as u8;
87+
}
88+
// Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
89+
if backing[1] == b'4' {
90+
if backing[0] == b'1' {
91+
backing[0] = b'0';
92+
&backing[0..6]
93+
} else {
94+
&backing[1..6]
95+
}
96+
} else {
97+
&backing[0..6]
98+
}
99+
}
100+
.into()
101+
}
102+
103+
/// Construct an EntryMode from bytes represented as in the git internal format
104+
/// Return the mode and the remainder of the bytes.
105+
pub(crate) fn extract_from_bytes(i: &[u8]) -> Option<(Self, &'_ [u8])> {
106+
let mut mode = 0;
107+
let mut idx = 0;
108+
let mut space_pos = 0;
109+
if i.is_empty() {
110+
return None;
111+
}
112+
// const fn, this is why we can't have nice things (like `.iter().any()`).
113+
while idx < i.len() {
114+
let b = i[idx];
115+
// Delimiter, return what we got
116+
if b == b' ' {
117+
space_pos = idx;
118+
break;
119+
}
120+
// Not a pure octal input.
121+
// Performance matters here, so `!(b'0'..=b'7').contains(&b)` won't do.
122+
#[allow(clippy::manual_range_contains)]
123+
if b < b'0' || b > b'7' {
124+
return None;
125+
}
126+
// More than 6 octal digits we must have hit the delimiter or the input was malformed.
127+
if idx > 6 {
128+
return None;
129+
}
130+
mode = (mode << 3) + (b - b'0') as u16;
131+
idx += 1;
132+
}
133+
// Hack: `0o140000` represents `"040000"`, `0o40000` represents `"40000"`.
134+
if mode == 0o040000 && i[0] == b'0' {
135+
mode += 0o100000;
136+
}
137+
Some((Self { internal: mode }, &i[(space_pos + 1)..]))
138+
}
139+
140+
/// Construct an EntryMode from bytes represented as in the git internal format.
141+
pub fn from_bytes(i: &[u8]) -> Option<Self> {
142+
Self::extract_from_bytes(i).map(|(mode, _rest)| mode)
143+
}
144+
}
48145

49146
impl std::fmt::Debug for EntryMode {
50147
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51-
write!(f, "EntryMode({:#o})", self.0)
148+
write!(f, "EntryMode(0o{})", self.as_bytes(&mut Default::default()))
149+
}
150+
}
151+
152+
impl std::fmt::Octal for EntryMode {
153+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154+
write!(f, "{}", self.as_bytes(&mut Default::default()))
52155
}
53156
}
54157

55158
/// A discretized version of ideal and valid values for entry modes.
56159
///
57160
/// Note that even though it can represent every valid [mode](EntryMode), it might
58-
/// loose information due to that as well.
161+
/// lose information due to that as well.
59162
#[derive(Clone, Copy, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
60163
#[repr(u16)]
61164
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -74,7 +177,7 @@ pub enum EntryKind {
74177

75178
impl From<EntryKind> for EntryMode {
76179
fn from(value: EntryKind) -> Self {
77-
EntryMode(value as u16)
180+
EntryMode { internal: value as u16 }
78181
}
79182
}
80183

@@ -100,22 +203,14 @@ impl EntryKind {
100203
}
101204
}
102205

103-
impl std::ops::Deref for EntryMode {
104-
type Target = u16;
105-
106-
fn deref(&self) -> &Self::Target {
107-
&self.0
108-
}
109-
}
110-
111206
const IFMT: u16 = 0o170000;
112207

113208
impl EntryMode {
114209
/// Discretize the raw mode into an enum with well-known state while dropping unnecessary details.
115210
pub const fn kind(&self) -> EntryKind {
116-
let etype = self.0 & IFMT;
211+
let etype = self.value() & IFMT;
117212
if etype == 0o100000 {
118-
if self.0 & 0o000100 == 0o000100 {
213+
if self.value() & 0o000100 == 0o000100 {
119214
EntryKind::BlobExecutable
120215
} else {
121216
EntryKind::Blob
@@ -131,27 +226,27 @@ impl EntryMode {
131226

132227
/// Return true if this entry mode represents a Tree/directory
133228
pub const fn is_tree(&self) -> bool {
134-
self.0 & IFMT == EntryKind::Tree as u16
229+
self.value() & IFMT == EntryKind::Tree as u16
135230
}
136231

137232
/// Return true if this entry mode represents the commit of a submodule.
138233
pub const fn is_commit(&self) -> bool {
139-
self.0 & IFMT == EntryKind::Commit as u16
234+
self.value() & IFMT == EntryKind::Commit as u16
140235
}
141236

142237
/// Return true if this entry mode represents a symbolic link
143238
pub const fn is_link(&self) -> bool {
144-
self.0 & IFMT == EntryKind::Link as u16
239+
self.value() & IFMT == EntryKind::Link as u16
145240
}
146241

147242
/// Return true if this entry mode represents anything BUT Tree/directory
148243
pub const fn is_no_tree(&self) -> bool {
149-
self.0 & IFMT != EntryKind::Tree as u16
244+
self.value() & IFMT != EntryKind::Tree as u16
150245
}
151246

152247
/// Return true if the entry is any kind of blob.
153248
pub const fn is_blob(&self) -> bool {
154-
self.0 & IFMT == 0o100000
249+
self.value() & IFMT == 0o100000
155250
}
156251

157252
/// Return true if the entry is an executable blob.
@@ -178,27 +273,6 @@ impl EntryMode {
178273
Commit => "commit",
179274
}
180275
}
181-
182-
/// Return the representation as used in the git internal format, which is octal and written
183-
/// to the `backing` buffer. The respective sub-slice that was written to is returned.
184-
pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
185-
if self.0 == 0 {
186-
std::slice::from_ref(&b'0')
187-
} else {
188-
let mut nb = 0;
189-
let mut n = self.0;
190-
while n > 0 {
191-
let remainder = (n % 8) as u8;
192-
backing[nb] = b'0' + remainder;
193-
n /= 8;
194-
nb += 1;
195-
}
196-
let res = &mut backing[..nb];
197-
res.reverse();
198-
res
199-
}
200-
.into()
201-
}
202276
}
203277

204278
impl TreeRef<'_> {

gix-object/src/tree/ref_iter.rs

Lines changed: 3 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -171,54 +171,18 @@ impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
171171
type Error = &'a [u8];
172172

173173
fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> {
174-
mode_from_decimal(mode)
175-
.map(|(mode, _rest)| tree::EntryMode(mode as u16))
176-
.ok_or(mode)
177-
}
178-
}
179-
180-
fn mode_from_decimal(i: &[u8]) -> Option<(u32, &[u8])> {
181-
let mut mode = 0u32;
182-
let mut spacer_pos = 1;
183-
for b in i.iter().take_while(|b| **b != b' ') {
184-
if *b < b'0' || *b > b'7' {
185-
return None;
186-
}
187-
mode = (mode << 3) + u32::from(b - b'0');
188-
spacer_pos += 1;
189-
}
190-
if i.len() < spacer_pos {
191-
return None;
192-
}
193-
let (_, i) = i.split_at(spacer_pos);
194-
Some((mode, i))
195-
}
196-
197-
impl TryFrom<u32> for tree::EntryMode {
198-
type Error = u32;
199-
200-
fn try_from(mode: u32) -> Result<Self, Self::Error> {
201-
Ok(match mode {
202-
0o40000 | 0o120000 | 0o160000 => tree::EntryMode(mode as u16),
203-
blob_mode if blob_mode & 0o100000 == 0o100000 => tree::EntryMode(mode as u16),
204-
_ => return Err(mode),
205-
})
174+
tree::EntryMode::from_bytes(mode).ok_or(mode)
206175
}
207176
}
208177

209178
mod decode {
210179
use bstr::ByteSlice;
211180
use winnow::{error::ParserError, prelude::*};
212181

213-
use crate::{
214-
tree,
215-
tree::{ref_iter::mode_from_decimal, EntryRef},
216-
TreeRef,
217-
};
182+
use crate::{tree, tree::EntryRef, TreeRef};
218183

219184
pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> {
220-
let (mode, i) = mode_from_decimal(i)?;
221-
let mode = tree::EntryMode::try_from(mode).ok()?;
185+
let (mode, i) = tree::EntryMode::extract_from_bytes(i)?;
222186
let (filename, i) = i.split_at(i.find_byte(0)?);
223187
let i = &i[1..];
224188
const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot

0 commit comments

Comments
 (0)