Skip to content

Commit b9a4d51

Browse files
jeffhostetlerdscho
authored andcommitted
read-cache: speed up add_index_entry during checkout
Teach add_index_entry_with_check() and has_dir_name() to see if the path of the new item is greater than the last path in the index array before attempting to search for it. This is a performance optimization. During checkout, merge_working_tree() populates the new index in sorted order, so this change saves at least 2 lookups per file. Signed-off-by: Jeff Hostetler <[email protected]>
1 parent 151f343 commit b9a4d51

File tree

2 files changed

+72
-1
lines changed

2 files changed

+72
-1
lines changed

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,7 @@ extern int write_locked_index(struct index_state *, struct lock_file *lock, unsi
572572
extern int discard_index(struct index_state *);
573573
extern int unmerged_index(const struct index_state *);
574574
extern int verify_path(const char *path);
575+
extern int strcmp_offset(const char *s1_in, const char *s2_in, int *first_change);
575576
extern int index_dir_exists(struct index_state *istate, const char *name, int namelen);
576577
extern void adjust_dirname_case(struct index_state *istate, char *name);
577578
extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);

read-cache.c

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,34 @@ static int has_file_name(struct index_state *istate,
895895
return retval;
896896
}
897897

898+
/*
899+
* Like strcmp(), but also return the offset of the first change.
900+
*/
901+
int strcmp_offset(const char *s1_in, const char *s2_in, int *first_change)
902+
{
903+
const unsigned char *s1 = (const unsigned char *)s1_in;
904+
const unsigned char *s2 = (const unsigned char *)s2_in;
905+
int diff = 0;
906+
int k;
907+
908+
*first_change = 0;
909+
for (k=0; s1[k]; k++)
910+
if ((diff = (s1[k] - s2[k])))
911+
goto found_it;
912+
if (!s2[k])
913+
return 0;
914+
diff = -1;
915+
916+
found_it:
917+
*first_change = k;
918+
if (diff > 0)
919+
return 1;
920+
else if (diff < 0)
921+
return -1;
922+
else
923+
return 0;
924+
}
925+
898926
/*
899927
* Do we have another file with a pathname that is a proper
900928
* subset of the name we're trying to add?
@@ -906,6 +934,21 @@ static int has_dir_name(struct index_state *istate,
906934
int stage = ce_stage(ce);
907935
const char *name = ce->name;
908936
const char *slash = name + ce_namelen(ce);
937+
int len_eq_last;
938+
int cmp_last = 0;
939+
940+
if (istate->cache_nr > 0) {
941+
/*
942+
* Compare the entry's full path with the last path in the index.
943+
* If it sorts AFTER the last entry in the index and they have no
944+
* common prefix, then there cannot be any F/D name conflicts.
945+
*/
946+
cmp_last = strcmp_offset(name,
947+
istate->cache[istate->cache_nr-1]->name,
948+
&len_eq_last);
949+
if (cmp_last > 0 && len_eq_last == 0)
950+
return retval;
951+
}
909952

910953
for (;;) {
911954
int len;
@@ -918,6 +961,24 @@ static int has_dir_name(struct index_state *istate,
918961
}
919962
len = slash - name;
920963

964+
if (cmp_last > 0) {
965+
/*
966+
* If this part of the directory prefix (including the trailing
967+
* slash) already appears in the path of the last entry in the
968+
* index, then we cannot also have a file with this prefix (or
969+
* any parent directory prefix).
970+
*/
971+
if (len+1 <= len_eq_last)
972+
return retval;
973+
/*
974+
* If this part of the directory prefix (excluding the trailing
975+
* slash) is longer than the known equal portions, then this part
976+
* of the prefix cannot collide with a file. Go on to the parent.
977+
*/
978+
if (len > len_eq_last)
979+
continue;
980+
}
981+
921982
pos = index_name_stage_pos(istate, name, len, stage);
922983
if (pos >= 0) {
923984
/*
@@ -1009,7 +1070,16 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
10091070

10101071
if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
10111072
cache_tree_invalidate_path(istate, ce->name);
1012-
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
1073+
1074+
/*
1075+
* If this entry's path sorts after the last entry in the index,
1076+
* we can avoid searching for it.
1077+
*/
1078+
if (istate->cache_nr > 0 &&
1079+
strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0)
1080+
pos = -istate->cache_nr - 1;
1081+
else
1082+
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
10131083

10141084
/* existing match? Just replace it. */
10151085
if (pos >= 0) {

0 commit comments

Comments
 (0)