Skip to content

Commit 47aab63

Browse files
jeffhostetlerdscho
authored andcommitted
read-cache: speed up add_index_entry during checkout
Teach add_index_entry_with_check() and has_dir_name() to see if the path of the new item is greater than the last path in the index array before attempting to search for it. This is a performance optimization. During checkout, merge_working_tree() populates the new index in sorted order, so this change saves at least 2 lookups per file. Signed-off-by: Jeff Hostetler <[email protected]>
1 parent 766bb9c commit 47aab63

File tree

2 files changed

+72
-1
lines changed

2 files changed

+72
-1
lines changed

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,7 @@ extern int write_locked_index(struct index_state *, struct lock_file *lock, unsi
571571
extern int discard_index(struct index_state *);
572572
extern int unmerged_index(const struct index_state *);
573573
extern int verify_path(const char *path);
574+
extern int strcmp_offset(const char *s1_in, const char *s2_in, int *first_change);
574575
extern int index_dir_exists(struct index_state *istate, const char *name, int namelen);
575576
extern void adjust_dirname_case(struct index_state *istate, char *name);
576577
extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);

read-cache.c

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -888,6 +888,34 @@ static int has_file_name(struct index_state *istate,
888888
return retval;
889889
}
890890

891+
/*
892+
* Like strcmp(), but also return the offset of the first change.
893+
*/
894+
int strcmp_offset(const char *s1_in, const char *s2_in, int *first_change)
895+
{
896+
const unsigned char *s1 = (const unsigned char *)s1_in;
897+
const unsigned char *s2 = (const unsigned char *)s2_in;
898+
int diff = 0;
899+
int k;
900+
901+
*first_change = 0;
902+
for (k=0; s1[k]; k++)
903+
if ((diff = (s1[k] - s2[k])))
904+
goto found_it;
905+
if (!s2[k])
906+
return 0;
907+
diff = -1;
908+
909+
found_it:
910+
*first_change = k;
911+
if (diff > 0)
912+
return 1;
913+
else if (diff < 0)
914+
return -1;
915+
else
916+
return 0;
917+
}
918+
891919
/*
892920
* Do we have another file with a pathname that is a proper
893921
* subset of the name we're trying to add?
@@ -899,6 +927,21 @@ static int has_dir_name(struct index_state *istate,
899927
int stage = ce_stage(ce);
900928
const char *name = ce->name;
901929
const char *slash = name + ce_namelen(ce);
930+
int len_eq_last;
931+
int cmp_last = 0;
932+
933+
if (istate->cache_nr > 0) {
934+
/*
935+
* Compare the entry's full path with the last path in the index.
936+
* If it sorts AFTER the last entry in the index and they have no
937+
* common prefix, then there cannot be any F/D name conflicts.
938+
*/
939+
cmp_last = strcmp_offset(name,
940+
istate->cache[istate->cache_nr-1]->name,
941+
&len_eq_last);
942+
if (cmp_last > 0 && len_eq_last == 0)
943+
return retval;
944+
}
902945

903946
for (;;) {
904947
int len;
@@ -911,6 +954,24 @@ static int has_dir_name(struct index_state *istate,
911954
}
912955
len = slash - name;
913956

957+
if (cmp_last > 0) {
958+
/*
959+
* If this part of the directory prefix (including the trailing
960+
* slash) already appears in the path of the last entry in the
961+
* index, then we cannot also have a file with this prefix (or
962+
* any parent directory prefix).
963+
*/
964+
if (len+1 <= len_eq_last)
965+
return retval;
966+
/*
967+
* If this part of the directory prefix (excluding the trailing
968+
* slash) is longer than the known equal portions, then this part
969+
* of the prefix cannot collide with a file. Go on to the parent.
970+
*/
971+
if (len > len_eq_last)
972+
continue;
973+
}
974+
914975
pos = index_name_stage_pos(istate, name, len, stage);
915976
if (pos >= 0) {
916977
/*
@@ -1002,7 +1063,16 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
10021063

10031064
if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
10041065
cache_tree_invalidate_path(istate, ce->name);
1005-
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
1066+
1067+
/*
1068+
* If this entry's path sorts after the last entry in the index,
1069+
* we can avoid searching for it.
1070+
*/
1071+
if (istate->cache_nr > 0 &&
1072+
strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0)
1073+
pos = -istate->cache_nr - 1;
1074+
else
1075+
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
10061076

10071077
/* existing match? Just replace it. */
10081078
if (pos >= 0) {

0 commit comments

Comments
 (0)