Skip to content

Commit 001aca7

Browse files
committed
feat: Add directory lookup, as well as ignore-case capabilities.
This is needed to be able to refer from an entry on disk to the index, and figure out if the index already has such entry. New methods are: * File::entry_by_path_icase * File::prefixed_entry_range_icase * File::entry_by_path_and_stage_icase * File::directory_kind_by_path_icase
1 parent c1e4c62 commit 001aca7

File tree

8 files changed

+526
-3
lines changed

8 files changed

+526
-3
lines changed

crate-status.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,7 @@ The git staging area.
629629
* [x] 'link' base indices to take information from, split index
630630
* [x] 'sdir' [sparse directory entries](https://github.blog/2021-08-16-highlights-from-git-2-33/) - marker
631631
* [x] verification of entries and extensions as well as checksum
632+
* [ ] expand sparse directory entries using information of the tree itself
632633
* write
633634
* [x] V2
634635
* [x] V3 - extension bits
@@ -655,7 +656,7 @@ The git staging area.
655656
* [ ] IEOT index entry offset table
656657
* [ ] 'link' base indices to take information from, split index
657658
* [ ] 'sdir' sparse directory entries
658-
* add and remove entries
659+
* [ ] add and remove entries
659660
* [x] API documentation
660661
* [ ] Some examples
661662

gix-index/src/access/mod.rs

Lines changed: 239 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::{cmp::Ordering, ops::Range};
33
use bstr::{BStr, ByteSlice, ByteVec};
44
use filetime::FileTime;
55

6-
use crate::{entry, extension, Entry, PathStorage, PathStorageRef, State, Version};
6+
use crate::{entry, extension, DirectoryKind, Entry, PathStorage, PathStorageRef, State, Version};
77

88
// TODO: integrate this somehow, somewhere, depending on later usage.
99
#[allow(dead_code)]
@@ -84,6 +84,38 @@ impl State {
8484
self.entry_index_by_idx_and_stage(path, idx, stage, stage_cmp)
8585
}
8686

87+
/// Find the entry index in [`entries()`][State::entries()] matching the given repository-relative
88+
/// `path` and `stage`, or `None`.
89+
/// If `ignore_case` is `true`, a case-insensitive (ASCII-folding only) search will be performed.
90+
///
91+
/// Note that if there are ambiguities, like `x` and `X` being present in the index, any of these will be returned,
92+
/// deterministically.
93+
///
94+
/// Use the index for accessing multiple stages if they exists, but at least the single matching entry.
95+
pub fn entry_index_by_path_and_stage_icase(
96+
&self,
97+
path: &BStr,
98+
stage: entry::Stage,
99+
ignore_case: bool,
100+
) -> Option<usize> {
101+
if ignore_case {
102+
let mut stage_cmp = Ordering::Equal;
103+
let idx = self
104+
.entries
105+
.binary_search_by(|e| {
106+
let res = icase_cmp(e.path(self), path);
107+
if res.is_eq() {
108+
stage_cmp = e.stage().cmp(&stage);
109+
}
110+
res
111+
})
112+
.ok()?;
113+
self.entry_index_by_idx_and_stage_icase(path, idx, stage, stage_cmp)
114+
} else {
115+
self.entry_index_by_path_and_stage(path, stage)
116+
}
117+
}
118+
87119
/// Walk as far in `direction` as possible, with [`Ordering::Greater`] towards higher stages, and [`Ordering::Less`]
88120
/// towards lower stages, and return the lowest or highest seen stage.
89121
/// Return `None` if there is no greater or smaller stage.
@@ -108,6 +140,30 @@ impl State {
108140
}
109141
}
110142

143+
/// Walk as far in `direction` as possible, with [`Ordering::Greater`] towards higher stages, and [`Ordering::Less`]
144+
/// towards lower stages, and return the lowest or highest seen stage.
145+
/// Return `None` if there is no greater or smaller stage.
146+
fn walk_entry_stages_icase(&self, path: &BStr, base: usize, direction: Ordering) -> Option<usize> {
147+
match direction {
148+
Ordering::Greater => self
149+
.entries
150+
.get(base + 1..)?
151+
.iter()
152+
.enumerate()
153+
.take_while(|(_, e)| e.path(self).eq_ignore_ascii_case(path))
154+
.last()
155+
.map(|(idx, _)| base + 1 + idx),
156+
Ordering::Equal => Some(base),
157+
Ordering::Less => self.entries[..base]
158+
.iter()
159+
.enumerate()
160+
.rev()
161+
.take_while(|(_, e)| e.path(self).eq_ignore_ascii_case(path))
162+
.last()
163+
.map(|(idx, _)| idx),
164+
}
165+
}
166+
111167
fn entry_index_by_idx_and_stage(
112168
&self,
113169
path: &BStr,
@@ -133,6 +189,31 @@ impl State {
133189
}
134190
}
135191

192+
fn entry_index_by_idx_and_stage_icase(
193+
&self,
194+
path: &BStr,
195+
idx: usize,
196+
wanted_stage: entry::Stage,
197+
stage_cmp: Ordering,
198+
) -> Option<usize> {
199+
match stage_cmp {
200+
Ordering::Greater => self.entries[..idx]
201+
.iter()
202+
.enumerate()
203+
.rev()
204+
.take_while(|(_, e)| e.path(self).eq_ignore_ascii_case(path))
205+
.find_map(|(idx, e)| (e.stage() == wanted_stage).then_some(idx)),
206+
Ordering::Equal => Some(idx),
207+
Ordering::Less => self
208+
.entries
209+
.get(idx + 1..)?
210+
.iter()
211+
.enumerate()
212+
.take_while(|(_, e)| e.path(self).eq_ignore_ascii_case(path))
213+
.find_map(|(ofs, e)| (e.stage() == wanted_stage).then_some(idx + ofs + 1)),
214+
}
215+
}
216+
136217
/// Find the entry index in [`entries()[..upper_bound]`][State::entries()] matching the given repository-relative
137218
/// `path` and `stage`, or `None`.
138219
///
@@ -152,13 +233,89 @@ impl State {
152233
.ok()
153234
}
154235

155-
/// Like [`entry_index_by_path_and_stage()`][State::entry_index_by_path_and_stage()],
236+
/// Like [`entry_index_by_path_and_stage()`](State::entry_index_by_path_and_stage_icase()),
156237
/// but returns the entry instead of the index.
157238
pub fn entry_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<&Entry> {
158239
self.entry_index_by_path_and_stage(path, stage)
159240
.map(|idx| &self.entries[idx])
160241
}
161242

243+
/// Like [`entry_index_by_path_and_stage_icase()`](State::entry_index_by_path_and_stage_icase()),
244+
/// but returns the entry instead of the index.
245+
pub fn entry_by_path_and_stage_icase(&self, path: &BStr, stage: entry::Stage, ignore_case: bool) -> Option<&Entry> {
246+
self.entry_index_by_path_and_stage_icase(path, stage, ignore_case)
247+
.map(|idx| &self.entries[idx])
248+
}
249+
250+
/// Return the kind of directory that `path` represents, or `None` if the path is not a directory, or not
251+
/// tracked in this index in any other way.
252+
///
253+
/// Note that we will not match `path`, like `a/b`, to a submodule or sparse directory at `a`, which means
254+
/// that `path` should be grown one component at a time in order to find the relevant entries.
255+
///
256+
/// If `ignore_case` is `true`, a case-insensitive (ASCII-folding only) search will be performed.
257+
///
258+
/// ### Deviation
259+
///
260+
/// We allow conflicting entries to serve as indicator for an inferred directory, whereas `git` only looks
261+
/// at stage 0.
262+
pub fn directory_kind_by_path_icase(&self, path: &BStr, ignore_case: bool) -> Option<DirectoryKind> {
263+
if ignore_case {
264+
for entry in self
265+
.prefixed_entries_range_icase(path, ignore_case)
266+
.map(|range| &self.entries[range])?
267+
{
268+
let entry_path = entry.path(self);
269+
if !entry_path.get(..path.len())?.eq_ignore_ascii_case(path) {
270+
// This can happen if the range starts with matches, then moves on to non-matches,
271+
// to finally and in matches again.
272+
// TODO(perf): start range from start to first mismatch, then continue from the end.
273+
continue;
274+
}
275+
match entry_path.get(path.len()) {
276+
Some(b'/') => {
277+
return Some(if entry.mode.is_sparse() {
278+
DirectoryKind::SparseDir
279+
} else {
280+
DirectoryKind::Inferred
281+
})
282+
}
283+
Some(_) => break,
284+
None => {
285+
if entry.mode.is_submodule() {
286+
return Some(DirectoryKind::Submodule);
287+
}
288+
}
289+
}
290+
}
291+
} else {
292+
let (Ok(idx) | Err(idx)) = self.entries.binary_search_by(|e| e.path(self).cmp(path));
293+
294+
for entry in self.entries.get(idx..)? {
295+
let entry_path = entry.path(self);
296+
if entry_path.get(..path.len())? != path {
297+
break;
298+
}
299+
match entry_path.get(path.len()) {
300+
Some(b'/') => {
301+
return Some(if entry.mode.is_sparse() {
302+
DirectoryKind::SparseDir
303+
} else {
304+
DirectoryKind::Inferred
305+
})
306+
}
307+
Some(_) => break,
308+
None => {
309+
if entry.mode.is_submodule() {
310+
return Some(DirectoryKind::Submodule);
311+
}
312+
}
313+
}
314+
}
315+
}
316+
None
317+
}
318+
162319
/// Return the entry at `path` that is either at stage 0, or at stage 2 (ours) in case of a merge conflict.
163320
///
164321
/// Using this method is more efficient in comparison to doing two searches, one for stage 0 and one for stage 2.
@@ -182,6 +339,37 @@ impl State {
182339
Some(&self.entries[idx])
183340
}
184341

342+
/// Return the entry at `path` that is either at stage 0, or at stage 2 (ours) in case of a merge conflict.
343+
/// If `ignore_case` is `true`, a case-insensitive (ASCII-folding only) search will be performed.
344+
///
345+
/// Using this method is more efficient in comparison to doing two searches, one for stage 0 and one for stage 2.
346+
///
347+
/// Note that if there are ambiguities, like `x` and `X` being present in the index, any of these will be returned,
348+
/// deterministically.
349+
pub fn entry_by_path_icase(&self, path: &BStr, ignore_case: bool) -> Option<&Entry> {
350+
if ignore_case {
351+
let mut stage_at_index = 0;
352+
let idx = self
353+
.entries
354+
.binary_search_by(|e| {
355+
let res = icase_cmp(e.path(self), path);
356+
if res.is_eq() {
357+
stage_at_index = e.stage();
358+
}
359+
res
360+
})
361+
.ok()?;
362+
let idx = if stage_at_index == 0 || stage_at_index == 2 {
363+
idx
364+
} else {
365+
self.entry_index_by_idx_and_stage_icase(path, idx, 2, stage_at_index.cmp(&2))?
366+
};
367+
Some(&self.entries[idx])
368+
} else {
369+
self.entry_by_path(path)
370+
}
371+
}
372+
185373
/// Return the slice of entries which all share the same `prefix`, or `None` if there isn't a single such entry.
186374
///
187375
/// If `prefix` is empty, all entries are returned.
@@ -221,6 +409,49 @@ impl State {
221409
(low != high).then_some(low..high)
222410
}
223411

412+
/// Return the range of entries which all share the same `prefix`, or `None` if there isn't a single such entry.
413+
/// If `ignore_case` is `true`, a case-insensitive (ASCII-folding only) search will be performed. Otherwise
414+
/// the search is case-sensitive.
415+
///
416+
/// If `prefix` is empty, the range will include all entries.
417+
pub fn prefixed_entries_range_icase(&self, prefix: &BStr, ignore_case: bool) -> Option<Range<usize>> {
418+
if ignore_case {
419+
if prefix.is_empty() {
420+
return Some(0..self.entries.len());
421+
}
422+
let prefix_len = prefix.len();
423+
let mut low = self.entries.partition_point(|e| {
424+
e.path(self).get(..prefix_len).map_or_else(
425+
|| icase_cmp(e.path(self), &prefix[..e.path.len()]).is_le(),
426+
|p| icase_cmp(p, prefix).is_lt(),
427+
)
428+
});
429+
let mut high = low
430+
+ self.entries[low..].partition_point(|e| {
431+
e.path(self)
432+
.get(..prefix_len)
433+
.map_or(false, |p| icase_cmp(p, prefix).is_le())
434+
});
435+
436+
let low_entry = &self.entries.get(low)?;
437+
if low_entry.stage() != 0 {
438+
low = self
439+
.walk_entry_stages_icase(low_entry.path(self), low, Ordering::Less)
440+
.unwrap_or(low);
441+
}
442+
if let Some(high_entry) = self.entries.get(high) {
443+
if high_entry.stage() != 0 {
444+
high = self
445+
.walk_entry_stages_icase(high_entry.path(self), high, Ordering::Less)
446+
.unwrap_or(high);
447+
}
448+
}
449+
(low != high).then_some(low..high)
450+
} else {
451+
self.prefixed_entries_range(prefix)
452+
}
453+
}
454+
224455
/// Return the entry at `idx` or _panic_ if the index is out of bounds.
225456
///
226457
/// The `idx` is typically returned by [`entry_by_path_and_stage()`][State::entry_by_path_and_stage()].
@@ -260,6 +491,12 @@ impl State {
260491
}
261492
}
262493

494+
fn icase_cmp(a: &[u8], b: &[u8]) -> Ordering {
495+
a.iter()
496+
.map(u8::to_ascii_lowercase)
497+
.cmp(b.iter().map(u8::to_ascii_lowercase))
498+
}
499+
263500
/// Mutation
264501
impl State {
265502
/// After usage of the storage obtained by [`take_path_backing()`][Self::take_path_backing()], return it here.

gix-index/src/lib.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,22 @@ pub enum Version {
4747
V4 = 4,
4848
}
4949

50+
/// A representation of a directory in the index.
51+
///
52+
/// These are most of the time inferred, but may also be explicit entries.
53+
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
54+
pub enum DirectoryKind {
55+
/// The directory is implied as there is at least one tracked entry that lives within it.
56+
Inferred,
57+
/// The directory is present directly in the form of a sparse directory.
58+
///
59+
/// These are available when cone-mode is active.
60+
SparseDir,
61+
/// The directory is present directly in the form of the commit of a repository that is
62+
/// a submodule of the superproject (which this is the index of).
63+
Submodule,
64+
}
65+
5066
/// An entry in the index, identifying a non-tree item on disk.
5167
#[derive(Debug, Clone, Eq, PartialEq)]
5268
pub struct Entry {
Binary file not shown.
Binary file not shown.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
set -eu -o pipefail
3+
4+
export GIT_INDEX_VERSION=2;
5+
6+
mkdir sub
7+
(cd sub
8+
mkdir a b c
9+
mkdir c/d
10+
touch a/1 b/2 c/3 c/d/3
11+
)
12+
13+
git init -q
14+
git config index.threads 1
15+
16+
touch a b
17+
chmod +x b
18+
ln -s a c
19+
mkdir d
20+
(cd d && touch a b c
21+
mkdir nested
22+
(cd nested
23+
touch 1
24+
)
25+
)
26+
27+
git add .
28+
git commit -m "init"
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/bin/bash
2+
set -eu -o pipefail
3+
4+
git init -q
5+
6+
empty_oid=$(git hash-object -w --stdin </dev/null)
7+
content_oid=$(echo "content" | git hash-object -w --stdin)
8+
symlink_target=$(echo -n 'X' | git hash-object -w --stdin)
9+
10+
echo "FILE_? filter=arrow" > .gitattributes
11+
git add -A
12+
13+
git update-index --index-info <<-EOF
14+
100644 $content_oid FILE_X
15+
100644 $content_oid FILE_x
16+
100644 $content_oid file_X
17+
100644 $content_oid file_x
18+
100644 $empty_oid D/B
19+
100644 $empty_oid D/C
20+
100644 $empty_oid d
21+
100644 $empty_oid X
22+
120000 $symlink_target x
23+
120000 $symlink_target link-to-X
24+
EOF
25+
26+
git commit -m "init"

0 commit comments

Comments
 (0)