@@ -4,15 +4,17 @@ use std::{io, marker::PhantomData, path::Path};
4
4
use bstr:: BStr ;
5
5
use filetime:: FileTime ;
6
6
use gix_features:: parallel:: { in_parallel_if, Reduce } ;
7
+ use gix_filter:: pipeline:: convert:: ToGitOutcome ;
7
8
9
+ use crate :: index_as_worktree:: traits:: read_data:: Stream ;
8
10
use crate :: {
9
11
index_as_worktree:: {
10
12
traits,
11
13
traits:: { CompareBlobs , SubmoduleStatus } ,
12
14
types:: { Error , Options } ,
13
15
Change , Outcome , VisitEntry ,
14
16
} ,
15
- read , Pathspec ,
17
+ Pathspec , SymlinkCheck ,
16
18
} ;
17
19
18
20
/// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them
@@ -24,6 +26,8 @@ use crate::{
24
26
/// the current time for which it will be considered fresh as long as it is included which depends on `pathspec`.
25
27
///
26
28
/// `should_interrupt` can be used to stop all processing.
29
+ /// `filter` is used to convert worktree files back to their internal git representation. For this to be correct,
30
+ /// [`Options::attributes`] must be configured as well.
27
31
///
28
32
/// ### Note
29
33
///
@@ -45,8 +49,9 @@ pub fn index_as_worktree<'index, T, U, Find, E1, E2>(
45
49
find : Find ,
46
50
progress : & mut dyn gix_features:: progress:: Progress ,
47
51
pathspec : impl Pathspec + Send + Clone ,
52
+ filter : gix_filter:: Pipeline ,
48
53
should_interrupt : & AtomicBool ,
49
- options : Options ,
54
+ mut options : Options ,
50
55
) -> Result < Outcome , Error >
51
56
where
52
57
T : Send ,
70
75
let range = index
71
76
. prefixed_entries_range ( pathspec. common_prefix ( ) )
72
77
. unwrap_or ( 0 ..index. entries ( ) . len ( ) ) ;
78
+
79
+ let stack = gix_worktree:: Stack :: from_state_and_ignore_case (
80
+ worktree,
81
+ options. fs . ignore_case ,
82
+ gix_worktree:: stack:: State :: AttributesStack ( std:: mem:: take ( & mut options. attributes ) ) ,
83
+ index,
84
+ index. path_backing ( ) ,
85
+ ) ;
73
86
let ( entries, path_backing) = index. entries_mut_and_pathbacking ( ) ;
74
87
let mut num_entries = entries. len ( ) ;
75
88
let entries = & mut entries[ range] ;
@@ -87,48 +100,51 @@ where
87
100
progress. init ( entries. len ( ) . into ( ) , gix_features:: progress:: count ( "files" ) ) ;
88
101
let count = progress. counter ( ) ;
89
102
103
+ let new_state = {
104
+ let options = & options;
105
+ let ( skipped_by_pathspec, skipped_by_entry_flags) = ( & skipped_by_pathspec, & skipped_by_entry_flags) ;
106
+ let ( symlink_metadata_calls, entries_updated) = ( & symlink_metadata_calls, & entries_updated) ;
107
+ let ( racy_clean, worktree_bytes) = ( & racy_clean, & worktree_bytes) ;
108
+ let ( worktree_reads, odb_bytes, odb_reads) = ( & worktree_reads, & odb_bytes, & odb_reads) ;
109
+ move |_| {
110
+ (
111
+ State {
112
+ buf : Vec :: new ( ) ,
113
+ buf2 : Vec :: new ( ) ,
114
+ attr_stack : stack,
115
+ path_stack : SymlinkCheck :: new ( worktree. into ( ) ) ,
116
+ timestamp,
117
+ path_backing,
118
+ filter,
119
+ options,
120
+
121
+ skipped_by_pathspec,
122
+ skipped_by_entry_flags,
123
+ symlink_metadata_calls,
124
+ entries_updated,
125
+ racy_clean,
126
+ worktree_reads,
127
+ worktree_bytes,
128
+ odb_reads,
129
+ odb_bytes,
130
+ } ,
131
+ compare,
132
+ submodule,
133
+ find,
134
+ pathspec,
135
+ )
136
+ }
137
+ } ;
90
138
in_parallel_if (
91
139
|| true , // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though.
92
140
gix_features:: interrupt:: Iter :: new ( entries. chunks_mut ( chunk_size) , should_interrupt) ,
93
141
thread_limit,
94
- {
95
- let options = & options;
96
- let ( skipped_by_pathspec, skipped_by_entry_flags) = ( & skipped_by_pathspec, & skipped_by_entry_flags) ;
97
- let ( symlink_metadata_calls, entries_updated) = ( & symlink_metadata_calls, & entries_updated) ;
98
- let ( racy_clean, worktree_bytes) = ( & racy_clean, & worktree_bytes) ;
99
- let ( worktree_reads, odb_bytes, odb_reads) = ( & worktree_reads, & odb_bytes, & odb_reads) ;
100
- move |_| {
101
- (
102
- State {
103
- buf : Vec :: new ( ) ,
104
- odb_buf : Vec :: new ( ) ,
105
- path_stack : crate :: SymlinkCheck :: new ( worktree. to_owned ( ) ) ,
106
- timestamp,
107
- path_backing,
108
- options,
109
-
110
- skipped_by_pathspec,
111
- skipped_by_entry_flags,
112
- symlink_metadata_calls,
113
- entries_updated,
114
- racy_clean,
115
- worktree_reads,
116
- worktree_bytes,
117
- odb_reads,
118
- odb_bytes,
119
- } ,
120
- compare,
121
- submodule,
122
- find,
123
- pathspec,
124
- )
125
- }
126
- } ,
142
+ new_state,
127
143
|entries, ( state, blobdiff, submdule, find, pathspec) | {
128
144
entries
129
145
. iter_mut ( )
130
146
. filter_map ( |entry| {
131
- let res = state. process ( entry, blobdiff, submdule, find, pathspec ) ;
147
+ let res = state. process ( entry, pathspec , blobdiff, submdule, find) ;
132
148
count. fetch_add ( 1 , Ordering :: Relaxed ) ;
133
149
res
134
150
} )
@@ -158,9 +174,16 @@ where
158
174
159
175
struct State < ' a , ' b > {
160
176
buf : Vec < u8 > ,
161
- odb_buf : Vec < u8 > ,
177
+ buf2 : Vec < u8 > ,
162
178
timestamp : FileTime ,
163
- path_stack : crate :: SymlinkCheck ,
179
+ /// This is the cheap stack that only assure that we don't go through symlinks.
180
+ /// It's always used to get the path to perform an lstat on.
181
+ path_stack : SymlinkCheck ,
182
+ /// This is the expensive stack that will need to check for `.gitattributes` files each time
183
+ /// it changes directory. It's only used when we know we have to read a worktree file, which in turn
184
+ /// requires attributes to drive the filter configuration.
185
+ attr_stack : gix_worktree:: Stack ,
186
+ filter : gix_filter:: Pipeline ,
164
187
path_backing : & ' b [ u8 ] ,
165
188
options : & ' a Options ,
166
189
@@ -181,10 +204,10 @@ impl<'index> State<'_, 'index> {
181
204
fn process < T , U , Find , E1 , E2 > (
182
205
& mut self ,
183
206
entry : & ' index mut gix_index:: Entry ,
207
+ pathspec : & mut impl Pathspec ,
184
208
diff : & mut impl CompareBlobs < Output = T > ,
185
209
submodule : & mut impl SubmoduleStatus < Output = U , Error = E2 > ,
186
210
find : & mut Find ,
187
- pathspec : & mut impl Pathspec ,
188
211
) -> Option < StatusResult < ' index , T , U > >
189
212
where
190
213
E1 : std:: error:: Error + Send + Sync + ' static ,
@@ -266,10 +289,9 @@ impl<'index> State<'_, 'index> {
266
289
E2 : std:: error:: Error + Send + Sync + ' static ,
267
290
Find : for < ' a > FnMut ( & gix_hash:: oid , & ' a mut Vec < u8 > ) -> Result < gix_object:: BlobRef < ' a > , E1 > ,
268
291
{
269
- let worktree_path = gix_path:: try_from_bstr ( rela_path) . map_err ( |_| Error :: IllformedUtf8 ) ?;
270
- let worktree_path = match self . path_stack . verified_path ( worktree_path. as_ref ( ) ) {
292
+ let worktree_path = match self . path_stack . verified_path ( gix_path:: from_bstr ( rela_path) . as_ref ( ) ) {
271
293
Ok ( path) => path,
272
- Err ( err) if err. kind ( ) == std :: io:: ErrorKind :: NotFound => return Ok ( Some ( Change :: Removed ) ) ,
294
+ Err ( err) if err. kind ( ) == io:: ErrorKind :: NotFound => return Ok ( Some ( Change :: Removed ) ) ,
273
295
Err ( err) => return Err ( Error :: Io ( err) ) ,
274
296
} ;
275
297
self . symlink_metadata_calls . fetch_add ( 1 , Ordering :: Relaxed ) ;
@@ -336,27 +358,24 @@ impl<'index> State<'_, 'index> {
336
358
}
337
359
338
360
self . buf . clear ( ) ;
339
- let read_file = WorktreeBlob {
361
+ self . buf2 . clear ( ) ;
362
+ let fetch_data = ReadDataImpl {
340
363
buf : & mut self . buf ,
341
364
path : worktree_path,
365
+ rela_path,
342
366
entry,
367
+ file_len : metadata. len ( ) ,
368
+ filter : & mut self . filter ,
369
+ attr_stack : & mut self . attr_stack ,
343
370
options : self . options ,
344
- } ;
345
- self . odb_buf . clear ( ) ;
346
- let read_blob = OdbBlob {
347
- buf : & mut self . odb_buf ,
348
371
id : & entry. id ,
349
372
find,
373
+ worktree_reads : self . worktree_reads ,
374
+ worktree_bytes : self . worktree_bytes ,
375
+ odb_reads : self . odb_reads ,
376
+ odb_bytes : self . odb_bytes ,
350
377
} ;
351
- let content_change = diff. compare_blobs ( entry, metadata. len ( ) as usize , read_file, read_blob) ?;
352
- if !self . buf . is_empty ( ) {
353
- self . worktree_reads . fetch_add ( 1 , Ordering :: Relaxed ) ;
354
- self . worktree_bytes . fetch_add ( self . buf . len ( ) as u64 , Ordering :: Relaxed ) ;
355
- }
356
- if !self . odb_buf . is_empty ( ) {
357
- self . odb_reads . fetch_add ( 1 , Ordering :: Relaxed ) ;
358
- self . odb_bytes . fetch_add ( self . odb_buf . len ( ) as u64 , Ordering :: Relaxed ) ;
359
- }
378
+ let content_change = diff. compare_blobs ( entry, metadata. len ( ) , fetch_data, & mut self . buf2 ) ?;
360
379
// This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated.
361
380
if content_change. is_some ( ) && racy_clean {
362
381
entry. stat . size = 0 ;
@@ -404,43 +423,91 @@ impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>
404
423
}
405
424
}
406
425
407
- struct WorktreeBlob < ' a > {
408
- buf : & ' a mut Vec < u8 > ,
409
- path : & ' a Path ,
410
- entry : & ' a gix_index:: Entry ,
411
- options : & ' a Options ,
412
- }
413
-
414
- struct OdbBlob < ' a , Find , E >
426
+ struct ReadDataImpl < ' a , Find , E >
415
427
where
416
428
E : std:: error:: Error + Send + Sync + ' static ,
417
- Find : FnMut ( & gix_hash:: oid , & ' a mut Vec < u8 > ) -> Result < gix_object:: BlobRef < ' a > , E > ,
429
+ Find : for < ' b > FnMut ( & gix_hash:: oid , & ' b mut Vec < u8 > ) -> Result < gix_object:: BlobRef < ' b > , E > ,
418
430
{
419
431
buf : & ' a mut Vec < u8 > ,
432
+ path : & ' a Path ,
433
+ rela_path : & ' a BStr ,
434
+ file_len : u64 ,
435
+ entry : & ' a gix_index:: Entry ,
436
+ filter : & ' a mut gix_filter:: Pipeline ,
437
+ attr_stack : & ' a mut gix_worktree:: Stack ,
438
+ options : & ' a Options ,
420
439
id : & ' a gix_hash:: oid ,
421
440
find : Find ,
441
+ worktree_bytes : & ' a AtomicU64 ,
442
+ worktree_reads : & ' a AtomicUsize ,
443
+ odb_bytes : & ' a AtomicU64 ,
444
+ odb_reads : & ' a AtomicUsize ,
422
445
}
423
446
424
- impl < ' a > traits:: ReadDataOnce < ' a > for WorktreeBlob < ' a > {
425
- fn read_data ( self ) -> Result < & ' a [ u8 ] , Error > {
426
- let res = read:: data_to_buf_with_meta (
427
- self . path ,
428
- self . buf ,
429
- self . entry . mode == gix_index:: entry:: Mode :: SYMLINK ,
430
- & self . options . fs ,
431
- ) ?;
432
- Ok ( res)
433
- }
434
- }
435
-
436
- impl < ' a , Find , E > traits:: ReadDataOnce < ' a > for OdbBlob < ' a , Find , E >
447
+ impl < ' a , Find , E > traits:: ReadData < ' a > for ReadDataImpl < ' a , Find , E >
437
448
where
438
449
E : std:: error:: Error + Send + Sync + ' static ,
439
- Find : FnMut ( & gix_hash:: oid , & ' a mut Vec < u8 > ) -> Result < gix_object:: BlobRef < ' a > , E > ,
450
+ Find : for < ' b > FnMut ( & gix_hash:: oid , & ' b mut Vec < u8 > ) -> Result < gix_object:: BlobRef < ' b > , E > ,
440
451
{
441
- fn read_data ( mut self ) -> Result < & ' a [ u8 ] , Error > {
452
+ fn read_blob ( mut self ) -> Result < & ' a [ u8 ] , Error > {
442
453
( self . find ) ( self . id , self . buf )
443
- . map ( |b| b. data )
454
+ . map ( |b| {
455
+ self . odb_reads . fetch_add ( 1 , Ordering :: Relaxed ) ;
456
+ self . odb_bytes . fetch_add ( b. data . len ( ) as u64 , Ordering :: Relaxed ) ;
457
+ b. data
458
+ } )
444
459
. map_err ( move |err| Error :: Find ( Box :: new ( err) ) )
445
460
}
461
+
462
+ fn stream_worktree_file ( mut self ) -> Result < Stream < ' a > , Error > {
463
+ self . buf . clear ( ) ;
464
+ // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just
465
+ // normal files with their content equal to the linked path (so can be read normally)
466
+ //
467
+ let is_symlink = self . entry . mode == gix_index:: entry:: Mode :: SYMLINK ;
468
+ // TODO: what to do about precompose unicode and ignore_case for symlinks
469
+ let out = if is_symlink && self . options . fs . symlink {
470
+ // conversion to bstr can never fail because symlinks are only used
471
+ // on unix (by git) so no reason to use the try version here
472
+ let symlink_path = gix_path:: into_bstr ( std:: fs:: read_link ( self . path ) ?) ;
473
+ self . buf . extend_from_slice ( & symlink_path) ;
474
+ self . worktree_bytes . fetch_add ( self . buf . len ( ) as u64 , Ordering :: Relaxed ) ;
475
+ Stream {
476
+ inner : ToGitOutcome :: Buffer ( self . buf ) ,
477
+ bytes : None ,
478
+ len : None ,
479
+ }
480
+ } else {
481
+ self . buf . clear ( ) ;
482
+ let platform = self . attr_stack . at_entry ( self . rela_path , Some ( false ) , & mut self . find ) ?;
483
+ let file = std:: fs:: File :: open ( self . path ) ?;
484
+ let out = self
485
+ . filter
486
+ . convert_to_git (
487
+ file,
488
+ self . path ,
489
+ & mut |_path, attrs| {
490
+ platform. matching_attributes ( attrs) ;
491
+ } ,
492
+ & mut |buf| {
493
+ ( self . find ) ( self . id , buf)
494
+ . map ( |_| Some ( ( ) ) )
495
+ . map_err ( |err| Box :: new ( err) as Box < dyn std:: error:: Error + Send + Sync + ' static > )
496
+ } ,
497
+ )
498
+ . map_err ( |err| io:: Error :: new ( io:: ErrorKind :: Other , err) ) ?;
499
+ let len = match out {
500
+ ToGitOutcome :: Unchanged ( _) => Some ( self . file_len ) ,
501
+ ToGitOutcome :: Process ( _) | ToGitOutcome :: Buffer ( _) => None ,
502
+ } ;
503
+ Stream {
504
+ inner : out,
505
+ bytes : Some ( self . worktree_bytes ) ,
506
+ len,
507
+ }
508
+ } ;
509
+
510
+ self . worktree_reads . fetch_add ( 1 , Ordering :: Relaxed ) ;
511
+ Ok ( out)
512
+ }
446
513
}
0 commit comments