Skip to content

Commit dda9bff

Browse files
LukeShugitster
authored andcommitted
fast-export: do not modify memory from get_commit_buffer
fast-export's helper function find_encoding() takes a `const char *`, but modifies that memory despite the `const`. Ultimately, this memory came from get_commit_buffer(), and you're not supposed to modify the memory that you get from get_commit_buffer(). So, get rid of find_encoding() in favor of commit.h:find_commit_header(), which gives back a string length, rather than mutating the memory to insert a '\0' terminator. Because find_commit_header() detects the "\n\n" string that separates the headers and the commit message, move the call to be above the `message = strstr(..., "\n\n")` call. This helps readability, and allows for the value of `encoding` to be used for a better value of "..." so that the same memory doesn't need to be checked twice. Introduce a `commit_buffer_cursor` variable to avoid writing an awkward `encoding ? encoding + encoding_len : committer_end` expression. Signed-off-by: Luke Shumaker <[email protected]> Signed-off-by: Christian Couder <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 87f2a91 commit dda9bff

File tree

1 file changed

+33
-28
lines changed

1 file changed

+33
-28
lines changed

builtin/fast-export.c

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -510,21 +510,6 @@ static void show_filemodify(struct diff_queue_struct *q,
510510
}
511511
}
512512

513-
static const char *find_encoding(const char *begin, const char *end)
514-
{
515-
const char *needle = "\nencoding ";
516-
char *bol, *eol;
517-
518-
bol = memmem(begin, end ? end - begin : strlen(begin),
519-
needle, strlen(needle));
520-
if (!bol)
521-
return NULL;
522-
bol += strlen(needle);
523-
eol = strchrnul(bol, '\n');
524-
*eol = '\0';
525-
return bol;
526-
}
527-
528513
static char *anonymize_ref_component(void)
529514
{
530515
static int counter;
@@ -630,9 +615,11 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
630615
struct string_list *paths_of_changed_objects)
631616
{
632617
int saved_output_format = rev->diffopt.output_format;
633-
const char *commit_buffer;
618+
const char *commit_buffer, *commit_buffer_cursor;
634619
const char *author, *author_end, *committer, *committer_end;
635-
const char *encoding, *message;
620+
const char *encoding = NULL;
621+
size_t encoding_len;
622+
const char *message;
636623
char *reencoded = NULL;
637624
struct commit_list *p;
638625
const char *refname;
@@ -641,21 +628,35 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
641628
rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
642629

643630
parse_commit_or_die(commit);
644-
commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
645-
author = strstr(commit_buffer, "\nauthor ");
631+
commit_buffer_cursor = commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
632+
633+
author = strstr(commit_buffer_cursor, "\nauthor ");
646634
if (!author)
647635
die("could not find author in commit %s",
648636
oid_to_hex(&commit->object.oid));
649637
author++;
650-
author_end = strchrnul(author, '\n');
651-
committer = strstr(author_end, "\ncommitter ");
638+
commit_buffer_cursor = author_end = strchrnul(author, '\n');
639+
640+
committer = strstr(commit_buffer_cursor, "\ncommitter ");
652641
if (!committer)
653642
die("could not find committer in commit %s",
654643
oid_to_hex(&commit->object.oid));
655644
committer++;
656-
committer_end = strchrnul(committer, '\n');
657-
message = strstr(committer_end, "\n\n");
658-
encoding = find_encoding(committer_end, message);
645+
commit_buffer_cursor = committer_end = strchrnul(committer, '\n');
646+
647+
/*
648+
* find_commit_header() gets a `+ 1` because
649+
* commit_buffer_cursor points at the trailing "\n" at the end
650+
* of the previous line, but find_commit_header() wants a
651+
* pointer to the beginning of the next line.
652+
*/
653+
if (*commit_buffer_cursor == '\n') {
654+
encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len);
655+
if (encoding)
656+
commit_buffer_cursor = encoding + encoding_len;
657+
}
658+
659+
message = strstr(commit_buffer_cursor, "\n\n");
659660
if (message)
660661
message += 2;
661662

@@ -694,16 +695,20 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
694695
if (anonymize) {
695696
reencoded = anonymize_commit_message();
696697
} else if (encoding) {
698+
char *buf;
697699
switch (reencode_mode) {
698700
case REENCODE_YES:
699-
reencoded = reencode_string(message, "UTF-8", encoding);
701+
buf = xstrfmt("%.*s", (int)encoding_len, encoding);
702+
reencoded = reencode_string(message, "UTF-8", buf);
703+
free(buf);
700704
break;
701705
case REENCODE_NO:
702706
break;
703707
case REENCODE_ABORT:
704-
die("Encountered commit-specific encoding %s in commit "
708+
die("Encountered commit-specific encoding %.*s in commit "
705709
"%s; use --reencode=[yes|no] to handle it",
706-
encoding, oid_to_hex(&commit->object.oid));
710+
(int)encoding_len, encoding,
711+
oid_to_hex(&commit->object.oid));
707712
}
708713
}
709714
if (!commit->parents)
@@ -715,7 +720,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
715720
(int)(author_end - author), author,
716721
(int)(committer_end - committer), committer);
717722
if (!reencoded && encoding)
718-
printf("encoding %s\n", encoding);
723+
printf("encoding %.*s\n", (int)encoding_len, encoding);
719724
printf("data %u\n%s",
720725
(unsigned)(reencoded
721726
? strlen(reencoded) : message

0 commit comments

Comments
 (0)