Skip to content

Commit 203c872

Browse files
newrengitster
authored andcommitted
merge-ort: fix a directory rename detection bug
As noted in commit 902c521 ("t6423: more involved directory rename test", 2020-10-15), when we have a case where * dir/subdir/ has several files * almost all files in dir/subdir/ are renamed to folder/subdir/ * one of the files in dir/subdir/ is renamed to folder/subdir/newsubdir/ * the other side of history (that doesn't do the renames) adds a new file to dir/subdir/ Then for the majority of the file renames, the directory rename of dir/subdir/ -> folder/subdir/ is actually not represented that way but as dir/ -> folder/ We also had one rename that was represented as dir/subdir/ -> folder/subdir/newsubdir/ Now, since there's a new file in dir/subdir/, where does it go? Well, there's only one rule for dir/subdir/, so the code previously noted that this rule had the "majority" of the one "relevant" rename and thus erroneously used it to place the file in folder/subdir/newsubdir/. We really want the heavy weight associated with dir/ -> folder/ to also be treated as dir/subdir/ -> folder/subdir/, so that we correctly place the file in folder/subdir/. Add a bunch of logic to make sure that we use all relevant renamings in directory rename detection. Note that testcase 12f of t6423 still fails after this, but it gets further than merge-recursive does. There are some performance related bits in that testcase (the region_enter messages) that do not yet succeed, but the rest of the testcase works after this patch. Subsequent patch series will fix up the performance side. Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 1b6b902 commit 203c872

File tree

1 file changed

+81
-117
lines changed

1 file changed

+81
-117
lines changed

merge-ort.c

Lines changed: 81 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -764,109 +764,6 @@ static char *apply_dir_rename(struct strmap_entry *rename_info,
764764
return strbuf_detach(&new_path, NULL);
765765
}
766766

767-
static void get_renamed_dir_portion(const char *old_path, const char *new_path,
768-
char **old_dir, char **new_dir)
769-
{
770-
char *end_of_old, *end_of_new;
771-
772-
/* Default return values: NULL, meaning no rename */
773-
*old_dir = NULL;
774-
*new_dir = NULL;
775-
776-
/*
777-
* For
778-
* "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c"
779-
* the "e/foo.c" part is the same, we just want to know that
780-
* "a/b/c/d" was renamed to "a/b/some/thing/else"
781-
* so, for this example, this function returns "a/b/c/d" in
782-
* *old_dir and "a/b/some/thing/else" in *new_dir.
783-
*/
784-
785-
/*
786-
* If the basename of the file changed, we don't care. We want
787-
* to know which portion of the directory, if any, changed.
788-
*/
789-
end_of_old = strrchr(old_path, '/');
790-
end_of_new = strrchr(new_path, '/');
791-
792-
/*
793-
* If end_of_old is NULL, old_path wasn't in a directory, so there
794-
* could not be a directory rename (our rule elsewhere that a
795-
* directory which still exists is not considered to have been
796-
* renamed means the root directory can never be renamed -- because
797-
* the root directory always exists).
798-
*/
799-
if (end_of_old == NULL)
800-
return; /* Note: *old_dir and *new_dir are still NULL */
801-
802-
/*
803-
* If new_path contains no directory (end_of_new is NULL), then we
804-
* have a rename of old_path's directory to the root directory.
805-
*/
806-
if (end_of_new == NULL) {
807-
*old_dir = xstrndup(old_path, end_of_old - old_path);
808-
*new_dir = xstrdup("");
809-
return;
810-
}
811-
812-
/* Find the first non-matching character traversing backwards */
813-
while (*--end_of_new == *--end_of_old &&
814-
end_of_old != old_path &&
815-
end_of_new != new_path)
816-
; /* Do nothing; all in the while loop */
817-
818-
/*
819-
* If both got back to the beginning of their strings, then the
820-
* directory didn't change at all, only the basename did.
821-
*/
822-
if (end_of_old == old_path && end_of_new == new_path &&
823-
*end_of_old == *end_of_new)
824-
return; /* Note: *old_dir and *new_dir are still NULL */
825-
826-
/*
827-
* If end_of_new got back to the beginning of its string, and
828-
* end_of_old got back to the beginning of some subdirectory, then
829-
* we have a rename/merge of a subdirectory into the root, which
830-
* needs slightly special handling.
831-
*
832-
* Note: There is no need to consider the opposite case, with a
833-
* rename/merge of the root directory into some subdirectory
834-
* because as noted above the root directory always exists so it
835-
* cannot be considered to be renamed.
836-
*/
837-
if (end_of_new == new_path &&
838-
end_of_old != old_path && end_of_old[-1] == '/') {
839-
*old_dir = xstrndup(old_path, --end_of_old - old_path);
840-
*new_dir = xstrdup("");
841-
return;
842-
}
843-
844-
/*
845-
* We've found the first non-matching character in the directory
846-
* paths. That means the current characters we were looking at
847-
* were part of the first non-matching subdir name going back from
848-
* the end of the strings. Get the whole name by advancing both
849-
* end_of_old and end_of_new to the NEXT '/' character. That will
850-
* represent the entire directory rename.
851-
*
852-
* The reason for the increment is cases like
853-
* a/b/star/foo/whatever.c -> a/b/tar/foo/random.c
854-
* After dropping the basename and going back to the first
855-
* non-matching character, we're now comparing:
856-
* a/b/s and a/b/
857-
* and we want to be comparing:
858-
* a/b/star/ and a/b/tar/
859-
* but without the pre-increment, the one on the right would stay
860-
* a/b/.
861-
*/
862-
end_of_old = strchr(++end_of_old, '/');
863-
end_of_new = strchr(++end_of_new, '/');
864-
865-
/* Copy the old and new directories into *old_dir and *new_dir. */
866-
*old_dir = xstrndup(old_path, end_of_old - old_path);
867-
*new_dir = xstrndup(new_path, end_of_new - new_path);
868-
}
869-
870767
static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask)
871768
{
872769
struct merged_info *mi = strmap_get(paths, path);
@@ -952,6 +849,14 @@ static char *handle_path_level_conflicts(struct merge_options *opt,
952849
return new_path;
953850
}
954851

852+
static void dirname_munge(char *filename)
853+
{
854+
char *slash = strrchr(filename, '/');
855+
if (!slash)
856+
slash = filename;
857+
*slash = '\0';
858+
}
859+
955860
static void increment_count(struct strmap *dir_rename_count,
956861
char *old_dir,
957862
char *new_dir)
@@ -973,40 +878,99 @@ static void increment_count(struct strmap *dir_rename_count,
973878
strintmap_incr(counts, new_dir, 1);
974879
}
975880

881+
static void update_dir_rename_counts(struct strmap *dir_rename_count,
882+
struct strset *dirs_removed,
883+
const char *oldname,
884+
const char *newname)
885+
{
886+
char *old_dir = xstrdup(oldname);
887+
char *new_dir = xstrdup(newname);
888+
char new_dir_first_char = new_dir[0];
889+
int first_time_in_loop = 1;
890+
891+
while (1) {
892+
dirname_munge(old_dir);
893+
dirname_munge(new_dir);
894+
895+
/*
896+
* When renaming
897+
* "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c"
898+
* then this suggests that both
899+
* a/b/c/d/e/ => a/b/some/thing/else/e/
900+
* a/b/c/d/ => a/b/some/thing/else/
901+
* so we want to increment counters for both. We do NOT,
902+
* however, also want to suggest that there was the following
903+
* rename:
904+
* a/b/c/ => a/b/some/thing/
905+
* so we need to quit at that point.
906+
*
907+
* Note the when first_time_in_loop, we only strip off the
908+
* basename, and we don't care if that's different.
909+
*/
910+
if (!first_time_in_loop) {
911+
char *old_sub_dir = strchr(old_dir, '\0')+1;
912+
char *new_sub_dir = strchr(new_dir, '\0')+1;
913+
if (!*new_dir) {
914+
/*
915+
* Special case when renaming to root directory,
916+
* i.e. when new_dir == "". In this case, we had
917+
* something like
918+
* a/b/subdir => subdir
919+
* and so dirname_munge() sets things up so that
920+
* old_dir = "a/b\0subdir\0"
921+
* new_dir = "\0ubdir\0"
922+
* We didn't have a '/' to overwrite a '\0' onto
923+
* in new_dir, so we have to compare differently.
924+
*/
925+
if (new_dir_first_char != old_sub_dir[0] ||
926+
strcmp(old_sub_dir+1, new_sub_dir))
927+
break;
928+
} else {
929+
if (strcmp(old_sub_dir, new_sub_dir))
930+
break;
931+
}
932+
}
933+
934+
if (strset_contains(dirs_removed, old_dir))
935+
increment_count(dir_rename_count, old_dir, new_dir);
936+
else
937+
break;
938+
939+
/* If we hit toplevel directory ("") for old or new dir, quit */
940+
if (!*old_dir || !*new_dir)
941+
break;
942+
943+
first_time_in_loop = 0;
944+
}
945+
946+
/* Free resources we don't need anymore */
947+
free(old_dir);
948+
free(new_dir);
949+
}
950+
976951
static void compute_rename_counts(struct diff_queue_struct *pairs,
977952
struct strmap *dir_rename_count,
978953
struct strset *dirs_removed)
979954
{
980955
int i;
981956

982957
for (i = 0; i < pairs->nr; ++i) {
983-
char *old_dir, *new_dir;
984958
struct diff_filepair *pair = pairs->queue[i];
985959

986960
/* File not part of directory rename if it wasn't renamed */
987961
if (pair->status != 'R')
988962
continue;
989963

990-
/* Get the old and new directory names */
991-
get_renamed_dir_portion(pair->one->path, pair->two->path,
992-
&old_dir, &new_dir);
993-
if (!old_dir)
994-
/* Directory didn't change at all; ignore this one. */
995-
continue;
996-
997964
/*
998965
* Make dir_rename_count contain a map of a map:
999966
* old_directory -> {new_directory -> count}
1000967
* In other words, for every pair look at the directories for
1001968
* the old filename and the new filename and count how many
1002969
* times that pairing occurs.
1003970
*/
1004-
if (strset_contains(dirs_removed, old_dir))
1005-
increment_count(dir_rename_count, old_dir, new_dir);
1006-
1007-
/* Free resources we don't need anymore */
1008-
free(old_dir);
1009-
free(new_dir);
971+
update_dir_rename_counts(dir_rename_count, dirs_removed,
972+
pair->one->path,
973+
pair->two->path);
1010974
}
1011975
}
1012976

0 commit comments

Comments
 (0)