Skip to content

Commit 7e77df3

Browse files
pcloudsgitster
authored andcommitted
pretty: two phase conversion for non utf-8 commits
Always assume format_commit_item() takes an utf-8 string for string handling simplicity (we can handle utf-8 strings, but can't with other encodings). If commit message is in non-utf8, or output encoding is not, then the commit is first converted to utf-8, processed, then output converted to output encoding. This of course only works with encodings that are compatible with Unicode. This also fixes the iso8859-1 test in t6006. It's supposed to create an iso8859-1 commit, but the commit content in t6006 is in UTF-8. t6006 is now converted back in UTF-8 (the downside is we can't put utf-8 strings there anymore). Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent b782bba commit 7e77df3

File tree

2 files changed

+28
-8
lines changed

2 files changed

+28
-8
lines changed

pretty.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -954,7 +954,8 @@ static int format_reflog_person(struct strbuf *sb,
954954
return format_person_part(sb, part, ident, strlen(ident), dmode);
955955
}
956956

957-
static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
957+
static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
958+
const char *placeholder,
958959
void *context)
959960
{
960961
struct format_commit_context *c = context;
@@ -1193,7 +1194,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
11931194
return 0; /* unknown placeholder */
11941195
}
11951196

1196-
static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
1197+
static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
1198+
const char *placeholder,
11971199
void *context)
11981200
{
11991201
int consumed;
@@ -1273,6 +1275,7 @@ void format_commit_message(const struct commit *commit,
12731275
{
12741276
struct format_commit_context context;
12751277
const char *output_enc = pretty_ctx->output_encoding;
1278+
const char *utf8 = "UTF-8";
12761279

12771280
memset(&context, 0, sizeof(context));
12781281
context.commit = commit;
@@ -1285,6 +1288,23 @@ void format_commit_message(const struct commit *commit,
12851288
strbuf_expand(sb, format, format_commit_item, &context);
12861289
rewrap_message_tail(sb, &context, 0, 0, 0);
12871290

1291+
if (output_enc) {
1292+
if (same_encoding(utf8, output_enc))
1293+
output_enc = NULL;
1294+
} else {
1295+
if (context.commit_encoding &&
1296+
!same_encoding(context.commit_encoding, utf8))
1297+
output_enc = context.commit_encoding;
1298+
}
1299+
1300+
if (output_enc) {
1301+
int outsz;
1302+
char *out = reencode_string_len(sb->buf, sb->len,
1303+
output_enc, utf8, &outsz);
1304+
if (out)
1305+
strbuf_attach(sb, out, outsz, outsz + 1);
1306+
}
1307+
12881308
free(context.commit_encoding);
12891309
logmsg_free(context.message, commit);
12901310
free(context.signature_check.gpg_output);

t/t6006-rev-list-format.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -184,22 +184,22 @@ Test printing of complex bodies
184184
185185
This commit message is much longer than the others,
186186
and it will be encoded in iso8859-1. We should therefore
187-
include an iso8859 character: ¡bueno!
187+
include an iso8859 character: ¡bueno!
188188
EOF
189189
test_expect_success 'setup complex body' '
190190
git config i18n.commitencoding iso8859-1 &&
191191
echo change2 >foo && git commit -a -F commit-msg
192192
'
193193

194194
test_format complex-encoding %e <<'EOF'
195-
commit f58db70b055c5718631e5c61528b28b12090cdea
195+
commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
196196
iso8859-1
197197
commit 131a310eb913d107dd3c09a65d1651175898735d
198198
commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
199199
EOF
200200

201201
test_format complex-subject %s <<'EOF'
202-
commit f58db70b055c5718631e5c61528b28b12090cdea
202+
commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
203203
Test printing of complex bodies
204204
commit 131a310eb913d107dd3c09a65d1651175898735d
205205
changed foo
@@ -208,17 +208,17 @@ added foo
208208
EOF
209209

210210
test_format complex-body %b <<'EOF'
211-
commit f58db70b055c5718631e5c61528b28b12090cdea
211+
commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
212212
This commit message is much longer than the others,
213213
and it will be encoded in iso8859-1. We should therefore
214-
include an iso8859 character: ¡bueno!
214+
include an iso8859 character: ¡bueno!
215215
216216
commit 131a310eb913d107dd3c09a65d1651175898735d
217217
commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
218218
EOF
219219

220220
test_expect_success '%x00 shows NUL' '
221-
echo >expect commit f58db70b055c5718631e5c61528b28b12090cdea &&
221+
echo >expect commit 1ed88da4a5b5ed8c449114ac131efc62178734c3 &&
222222
echo >>expect fooQbar &&
223223
git rev-list -1 --format=foo%x00bar HEAD >actual.nul &&
224224
nul_to_q <actual.nul >actual &&

0 commit comments

Comments
 (0)