@@ -439,96 +439,208 @@ void ensure_correct_sparsity(struct index_state *istate)
439
439
ensure_full_index (istate );
440
440
}
441
441
442
- static int path_found (const char * path , const char * * dirname , size_t * dir_len ,
443
- int * dir_found )
442
+ struct path_found_data {
443
+ /**
444
+ * The path stored in 'dir', if non-empty, corresponds to the most-
445
+ * recent path that we checked where:
446
+ *
447
+ * 1. The path should be a directory, according to the index.
448
+ * 2. The path does not exist.
449
+ * 3. The parent path _does_ exist. (This may be the root of the
450
+ * working directory.)
451
+ */
452
+ struct strbuf dir ;
453
+ size_t lstat_count ;
454
+ };
455
+
456
+ #define PATH_FOUND_DATA_INIT { \
457
+ .dir = STRBUF_INIT \
458
+ }
459
+
460
+ static void clear_path_found_data (struct path_found_data * data )
461
+ {
462
+ strbuf_release (& data -> dir );
463
+ }
464
+
465
+ /**
466
+ * Return the length of the longest common substring that ends in a
467
+ * slash ('/') to indicate the longest common parent directory. Returns
468
+ * zero if no common directory exists.
469
+ */
470
+ static size_t max_common_dir_prefix (const char * path1 , const char * path2 )
471
+ {
472
+ size_t common_prefix = 0 ;
473
+ for (size_t i = 0 ; path1 [i ] && path2 [i ]; i ++ ) {
474
+ if (path1 [i ] != path2 [i ])
475
+ break ;
476
+
477
+ /*
478
+ * If they agree at a directory separator, then add one
479
+ * to make sure it is included in the common prefix string.
480
+ */
481
+ if (path1 [i ] == '/' )
482
+ common_prefix = i + 1 ;
483
+ }
484
+
485
+ return common_prefix ;
486
+ }
487
+
488
+ static int path_found (const char * path , struct path_found_data * data )
444
489
{
445
490
struct stat st ;
446
- char * newdir ;
447
- char * tmp ;
491
+ size_t common_prefix ;
448
492
449
493
/*
450
- * If dirname corresponds to a directory that doesn't exist, and this
451
- * path starts with dirname, then path can't exist.
494
+ * If data->dir is non-empty, then it contains a path that doesn't
495
+ * exist, including an ending slash ('/'). If it is a prefix of 'path',
496
+ * then we can return 0.
452
497
*/
453
- if (! * dir_found && !memcmp (path , * dirname , * dir_len ))
498
+ if (data -> dir . len && !memcmp (path , data -> dir . buf , data -> dir . len ))
454
499
return 0 ;
455
500
456
501
/*
457
- * If path itself exists, return 1.
502
+ * Otherwise, we must check if the current path exists. If it does, then
503
+ * return 1. The cached directory will be skipped until we come across
504
+ * a missing path again.
458
505
*/
506
+ data -> lstat_count ++ ;
459
507
if (!lstat (path , & st ))
460
508
return 1 ;
461
509
462
510
/*
463
- * Otherwise, path does not exist so we'll return 0...but we'll first
464
- * determine some info about its parent directory so we can avoid
465
- * lstat calls for future cache entries.
511
+ * At this point, we know that 'path' doesn't exist, and we know that
512
+ * the parent directory of 'data->dir' does exist. Let's set 'data->dir'
513
+ * to be the top-most non-existing directory of 'path'. If the first
514
+ * parent of 'path' exists, then we will act as though 'path'
515
+ * corresponds to a directory (by adding a slash).
466
516
*/
467
- newdir = strrchr (path , '/' );
468
- if (!newdir )
469
- return 0 ; /* Didn't find a parent dir; just return 0 now. */
517
+ common_prefix = max_common_dir_prefix (path , data -> dir .buf );
470
518
471
519
/*
472
- * If path starts with directory (which we already lstat'ed and found),
473
- * then no need to lstat parent directory again.
520
+ * At this point, 'path' and 'data->dir' have a common existing parent
521
+ * directory given by path[0..common_prefix] (which could have length 0).
522
+ * We "grow" the data->dir buffer by checking for existing directories
523
+ * along 'path'.
474
524
*/
475
- if (* dir_found && * dirname && memcmp (path , * dirname , * dir_len ))
476
- return 0 ;
477
525
478
- /* Free previous dirname, and cache path's dirname */
479
- * dirname = path ;
480
- * dir_len = newdir - path + 1 ;
526
+ strbuf_setlen (& data -> dir , common_prefix );
527
+ while (1 ) {
528
+ /* Find the next directory in 'path'. */
529
+ const char * rest = path + data -> dir .len ;
530
+ const char * next_slash = strchr (rest , '/' );
531
+
532
+ /*
533
+ * If there are no more slashes, then 'path' doesn't contain a
534
+ * non-existent _parent_ directory. Set 'data->dir' to be equal
535
+ * to 'path' plus an additional slash, so it can be used for
536
+ * caching in the future. The filename of 'path' is considered
537
+ * a non-existent directory.
538
+ *
539
+ * Note: if "{path}/" exists as a directory, then it will never
540
+ * appear as a prefix of other callers to this method, assuming
541
+ * the context from the clear_skip_worktree... methods. If this
542
+ * method is reused, then this must be reconsidered.
543
+ */
544
+ if (!next_slash ) {
545
+ strbuf_addstr (& data -> dir , rest );
546
+ strbuf_addch (& data -> dir , '/' );
547
+ break ;
548
+ }
549
+
550
+ /*
551
+ * Now that we have a slash, let's grow 'data->dir' to include
552
+ * this slash, then test if we should stop.
553
+ */
554
+ strbuf_add (& data -> dir , rest , next_slash - rest + 1 );
481
555
482
- tmp = xstrndup (path , * dir_len );
483
- * dir_found = !lstat (tmp , & st );
484
- free (tmp );
556
+ /* If the parent dir doesn't exist, then stop here. */
557
+ data -> lstat_count ++ ;
558
+ if (lstat (data -> dir .buf , & st ))
559
+ return 0 ;
560
+ }
485
561
562
+ /*
563
+ * At this point, 'data->dir' is equal to 'path' plus a slash character,
564
+ * and the parent directory of 'path' definitely exists. Moreover, we
565
+ * know that 'path' doesn't exist, or we would have returned 1 earlier.
566
+ */
486
567
return 0 ;
487
568
}
488
569
489
- void clear_skip_worktree_from_present_files (struct index_state * istate )
570
+ static int clear_skip_worktree_from_present_files_sparse (struct index_state * istate )
490
571
{
491
- const char * last_dirname = NULL ;
492
- size_t dir_len = 0 ;
493
- int dir_found = 1 ;
494
-
495
- int i ;
496
- int path_count [2 ] = {0 , 0 };
497
- int restarted = 0 ;
572
+ struct path_found_data data = PATH_FOUND_DATA_INIT ;
498
573
499
- if (!core_apply_sparse_checkout ||
500
- sparse_expect_files_outside_of_patterns )
501
- return ;
574
+ int path_count = 0 ;
575
+ int to_restart = 0 ;
502
576
503
- trace2_region_enter ("index" , "clear_skip_worktree_from_present_files " ,
577
+ trace2_region_enter ("index" , "clear_skip_worktree_from_present_files_sparse " ,
504
578
istate -> repo );
505
- restart :
506
- for (i = 0 ; i < istate -> cache_nr ; i ++ ) {
579
+ for (int i = 0 ; i < istate -> cache_nr ; i ++ ) {
507
580
struct cache_entry * ce = istate -> cache [i ];
508
581
509
582
if (ce_skip_worktree (ce )) {
510
- path_count [ restarted ] ++ ;
511
- if (path_found (ce -> name , & last_dirname , & dir_len , & dir_found )) {
583
+ path_count ++ ;
584
+ if (path_found (ce -> name , & data )) {
512
585
if (S_ISSPARSEDIR (ce -> ce_mode )) {
513
- if (restarted )
514
- BUG ("ensure-full-index did not fully flatten?" );
515
- ensure_full_index (istate );
516
- restarted = 1 ;
517
- goto restart ;
586
+ to_restart = 1 ;
587
+ break ;
518
588
}
519
589
ce -> ce_flags &= ~CE_SKIP_WORKTREE ;
520
590
}
521
591
}
522
592
}
523
593
524
- if (path_count [0 ])
525
- trace2_data_intmax ("index" , istate -> repo ,
526
- "sparse_path_count" , path_count [0 ]);
527
- if (restarted )
528
- trace2_data_intmax ("index" , istate -> repo ,
529
- "sparse_path_count_full" , path_count [1 ]);
530
- trace2_region_leave ("index" , "clear_skip_worktree_from_present_files" ,
594
+ trace2_data_intmax ("index" , istate -> repo ,
595
+ "sparse_path_count" , path_count );
596
+ trace2_data_intmax ("index" , istate -> repo ,
597
+ "sparse_lstat_count" , data .lstat_count );
598
+ trace2_region_leave ("index" , "clear_skip_worktree_from_present_files_sparse" ,
599
+ istate -> repo );
600
+ clear_path_found_data (& data );
601
+ return to_restart ;
602
+ }
603
+
604
+ static void clear_skip_worktree_from_present_files_full (struct index_state * istate )
605
+ {
606
+ struct path_found_data data = PATH_FOUND_DATA_INIT ;
607
+
608
+ int path_count = 0 ;
609
+
610
+ trace2_region_enter ("index" , "clear_skip_worktree_from_present_files_full" ,
611
+ istate -> repo );
612
+ for (int i = 0 ; i < istate -> cache_nr ; i ++ ) {
613
+ struct cache_entry * ce = istate -> cache [i ];
614
+
615
+ if (S_ISSPARSEDIR (ce -> ce_mode ))
616
+ BUG ("ensure-full-index did not fully flatten?" );
617
+
618
+ if (ce_skip_worktree (ce )) {
619
+ path_count ++ ;
620
+ if (path_found (ce -> name , & data ))
621
+ ce -> ce_flags &= ~CE_SKIP_WORKTREE ;
622
+ }
623
+ }
624
+
625
+ trace2_data_intmax ("index" , istate -> repo ,
626
+ "full_path_count" , path_count );
627
+ trace2_data_intmax ("index" , istate -> repo ,
628
+ "full_lstat_count" , data .lstat_count );
629
+ trace2_region_leave ("index" , "clear_skip_worktree_from_present_files_full" ,
531
630
istate -> repo );
631
+ clear_path_found_data (& data );
632
+ }
633
+
634
+ void clear_skip_worktree_from_present_files (struct index_state * istate )
635
+ {
636
+ if (!core_apply_sparse_checkout ||
637
+ sparse_expect_files_outside_of_patterns )
638
+ return ;
639
+
640
+ if (clear_skip_worktree_from_present_files_sparse (istate )) {
641
+ ensure_full_index (istate );
642
+ clear_skip_worktree_from_present_files_full (istate );
643
+ }
532
644
}
533
645
534
646
/*
0 commit comments