@@ -45,6 +45,7 @@ static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
45
45
static struct string_list tag_refs = STRING_LIST_INIT_NODUP ;
46
46
static struct refspec refspecs = REFSPEC_INIT_FETCH ;
47
47
static int anonymize ;
48
+ static struct hashmap anonymized_seeds ;
48
49
static struct revision_sources revision_sources ;
49
50
50
51
static int parse_opt_signed_tag_mode (const struct option * opt ,
@@ -119,57 +120,74 @@ static int has_unshown_parent(struct commit *commit)
119
120
}
120
121
121
122
struct anonymized_entry {
123
+ struct hashmap_entry hash ;
124
+ const char * anon ;
125
+ const char orig [FLEX_ARRAY ];
126
+ };
127
+
128
+ struct anonymized_entry_key {
122
129
struct hashmap_entry hash ;
123
130
const char * orig ;
124
131
size_t orig_len ;
125
- const char * anon ;
126
- size_t anon_len ;
127
132
};
128
133
129
134
static int anonymized_entry_cmp (const void * unused_cmp_data ,
130
135
const struct hashmap_entry * eptr ,
131
136
const struct hashmap_entry * entry_or_key ,
132
- const void * unused_keydata )
137
+ const void * keydata )
133
138
{
134
139
const struct anonymized_entry * a , * b ;
135
140
136
141
a = container_of (eptr , const struct anonymized_entry , hash );
137
- b = container_of (entry_or_key , const struct anonymized_entry , hash );
142
+ if (keydata ) {
143
+ const struct anonymized_entry_key * key = keydata ;
144
+ int equal = !strncmp (a -> orig , key -> orig , key -> orig_len ) &&
145
+ !a -> orig [key -> orig_len ];
146
+ return !equal ;
147
+ }
138
148
139
- return a -> orig_len != b -> orig_len ||
140
- memcmp (a -> orig , b -> orig , a -> orig_len );
149
+ b = container_of ( entry_or_key , const struct anonymized_entry , hash );
150
+ return strcmp (a -> orig , b -> orig );
141
151
}
142
152
143
153
/*
144
154
* Basically keep a cache of X->Y so that we can repeatedly replace
145
155
* the same anonymized string with another. The actual generation
146
156
* is farmed out to the generate function.
147
157
*/
148
- static const void * anonymize_mem (struct hashmap * map ,
149
- void * (* generate )(const void * , size_t * ),
150
- const void * orig , size_t * len )
158
+ static const char * anonymize_str (struct hashmap * map ,
159
+ char * (* generate )(void * ),
160
+ const char * orig , size_t len ,
161
+ void * data )
151
162
{
152
- struct anonymized_entry key , * ret ;
163
+ struct anonymized_entry_key key ;
164
+ struct anonymized_entry * ret ;
153
165
154
166
if (!map -> cmpfn )
155
167
hashmap_init (map , anonymized_entry_cmp , NULL , 0 );
156
168
157
- hashmap_entry_init (& key .hash , memhash (orig , * len ));
169
+ hashmap_entry_init (& key .hash , memhash (orig , len ));
158
170
key .orig = orig ;
159
- key .orig_len = * len ;
160
- ret = hashmap_get_entry (map , & key , hash , NULL );
171
+ key .orig_len = len ;
172
+
173
+ /* First check if it's a token the user configured manually... */
174
+ if (anonymized_seeds .cmpfn )
175
+ ret = hashmap_get_entry (& anonymized_seeds , & key , hash , & key );
176
+ else
177
+ ret = NULL ;
178
+
179
+ /* ...otherwise check if we've already seen it in this context... */
180
+ if (!ret )
181
+ ret = hashmap_get_entry (map , & key , hash , & key );
161
182
183
+ /* ...and finally generate a new mapping if necessary */
162
184
if (!ret ) {
163
- ret = xmalloc ( sizeof ( * ret ) );
185
+ FLEX_ALLOC_MEM ( ret , orig , orig , len );
164
186
hashmap_entry_init (& ret -> hash , key .hash .hash );
165
- ret -> orig = xstrdup (orig );
166
- ret -> orig_len = * len ;
167
- ret -> anon = generate (orig , len );
168
- ret -> anon_len = * len ;
187
+ ret -> anon = generate (data );
169
188
hashmap_put (map , & ret -> hash );
170
189
}
171
190
172
- * len = ret -> anon_len ;
173
191
return ret -> anon ;
174
192
}
175
193
@@ -181,13 +199,13 @@ static const void *anonymize_mem(struct hashmap *map,
181
199
*/
182
200
static void anonymize_path (struct strbuf * out , const char * path ,
183
201
struct hashmap * map ,
184
- void * (* generate )(const void * , size_t * ))
202
+ char * (* generate )(void * ))
185
203
{
186
204
while (* path ) {
187
205
const char * end_of_component = strchrnul (path , '/' );
188
206
size_t len = end_of_component - path ;
189
- const char * c = anonymize_mem (map , generate , path , & len );
190
- strbuf_add (out , c , len );
207
+ const char * c = anonymize_str (map , generate , path , len , NULL );
208
+ strbuf_addstr (out , c );
191
209
path = end_of_component ;
192
210
if (* path )
193
211
strbuf_addch (out , * path ++ );
@@ -361,12 +379,12 @@ static void print_path_1(const char *path)
361
379
printf ("%s" , path );
362
380
}
363
381
364
- static void * anonymize_path_component (const void * path , size_t * len )
382
+ static char * anonymize_path_component (void * data )
365
383
{
366
384
static int counter ;
367
385
struct strbuf out = STRBUF_INIT ;
368
386
strbuf_addf (& out , "path%d" , counter ++ );
369
- return strbuf_detach (& out , len );
387
+ return strbuf_detach (& out , NULL );
370
388
}
371
389
372
390
static void print_path (const char * path )
@@ -383,20 +401,23 @@ static void print_path(const char *path)
383
401
}
384
402
}
385
403
386
- static void * generate_fake_oid (const void * old , size_t * len )
404
+ static char * generate_fake_oid (void * data )
387
405
{
388
406
static uint32_t counter = 1 ; /* avoid null oid */
389
407
const unsigned hashsz = the_hash_algo -> rawsz ;
390
- unsigned char * out = xcalloc (hashsz , 1 );
408
+ unsigned char out [GIT_MAX_RAWSZ ];
409
+ char * hex = xmallocz (GIT_MAX_HEXSZ );
410
+
411
+ hashclr (out );
391
412
put_be32 (out + hashsz - 4 , counter ++ );
392
- return out ;
413
+ return hash_to_hex_algop_r ( hex , out , the_hash_algo ) ;
393
414
}
394
415
395
- static const struct object_id * anonymize_oid (const struct object_id * oid )
416
+ static const char * anonymize_oid (const char * oid_hex )
396
417
{
397
418
static struct hashmap objs ;
398
- size_t len = the_hash_algo -> rawsz ;
399
- return anonymize_mem (& objs , generate_fake_oid , oid , & len );
419
+ size_t len = strlen ( oid_hex ) ;
420
+ return anonymize_str (& objs , generate_fake_oid , oid_hex , len , NULL );
400
421
}
401
422
402
423
static void show_filemodify (struct diff_queue_struct * q ,
@@ -455,9 +476,9 @@ static void show_filemodify(struct diff_queue_struct *q,
455
476
*/
456
477
if (no_data || S_ISGITLINK (spec -> mode ))
457
478
printf ("M %06o %s " , spec -> mode ,
458
- oid_to_hex ( anonymize ?
459
- anonymize_oid (& spec -> oid ) :
460
- & spec -> oid ));
479
+ anonymize ?
480
+ anonymize_oid (oid_to_hex ( & spec -> oid ) ) :
481
+ oid_to_hex ( & spec -> oid ));
461
482
else {
462
483
struct object * object = lookup_object (the_repository ,
463
484
& spec -> oid );
@@ -493,12 +514,12 @@ static const char *find_encoding(const char *begin, const char *end)
493
514
return bol ;
494
515
}
495
516
496
- static void * anonymize_ref_component (const void * old , size_t * len )
517
+ static char * anonymize_ref_component (void * data )
497
518
{
498
519
static int counter ;
499
520
struct strbuf out = STRBUF_INIT ;
500
521
strbuf_addf (& out , "ref%d" , counter ++ );
501
- return strbuf_detach (& out , len );
522
+ return strbuf_detach (& out , NULL );
502
523
}
503
524
504
525
static const char * anonymize_refname (const char * refname )
@@ -517,13 +538,6 @@ static const char *anonymize_refname(const char *refname)
517
538
static struct strbuf anon = STRBUF_INIT ;
518
539
int i ;
519
540
520
- /*
521
- * We also leave "master" as a special case, since it does not reveal
522
- * anything interesting.
523
- */
524
- if (!strcmp (refname , "refs/heads/master" ))
525
- return refname ;
526
-
527
541
strbuf_reset (& anon );
528
542
for (i = 0 ; i < ARRAY_SIZE (prefixes ); i ++ ) {
529
543
if (skip_prefix (refname , prefixes [i ], & refname )) {
@@ -546,14 +560,13 @@ static char *anonymize_commit_message(const char *old)
546
560
return xstrfmt ("subject %d\n\nbody\n" , counter ++ );
547
561
}
548
562
549
- static struct hashmap idents ;
550
- static void * anonymize_ident (const void * old , size_t * len )
563
+ static char * anonymize_ident (void * data )
551
564
{
552
565
static int counter ;
553
566
struct strbuf out = STRBUF_INIT ;
554
567
strbuf_addf (
& out ,
"User %d <user%[email protected] >" ,
counter ,
counter );
555
568
counter ++ ;
556
- return strbuf_detach (& out , len );
569
+ return strbuf_detach (& out , NULL );
557
570
}
558
571
559
572
/*
@@ -563,6 +576,7 @@ static void *anonymize_ident(const void *old, size_t *len)
563
576
*/
564
577
static void anonymize_ident_line (const char * * beg , const char * * end )
565
578
{
579
+ static struct hashmap idents ;
566
580
static struct strbuf buffers [] = { STRBUF_INIT , STRBUF_INIT };
567
581
static unsigned which_buffer ;
568
582
@@ -588,9 +602,9 @@ static void anonymize_ident_line(const char **beg, const char **end)
588
602
size_t len ;
589
603
590
604
len = split .mail_end - split .name_begin ;
591
- ident = anonymize_mem (& idents , anonymize_ident ,
592
- split .name_begin , & len );
593
- strbuf_add (out , ident , len );
605
+ ident = anonymize_str (& idents , anonymize_ident ,
606
+ split .name_begin , len , NULL );
607
+ strbuf_addstr (out , ident );
594
608
strbuf_addch (out , ' ' );
595
609
strbuf_add (out , split .date_begin , split .tz_end - split .date_begin );
596
610
} else {
@@ -712,9 +726,10 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
712
726
if (mark )
713
727
printf (":%d\n" , mark );
714
728
else
715
- printf ("%s\n" , oid_to_hex (anonymize ?
716
- anonymize_oid (& obj -> oid ) :
717
- & obj -> oid ));
729
+ printf ("%s\n" ,
730
+ anonymize ?
731
+ anonymize_oid (oid_to_hex (& obj -> oid )) :
732
+ oid_to_hex (& obj -> oid ));
718
733
i ++ ;
719
734
}
720
735
@@ -729,12 +744,12 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
729
744
show_progress ();
730
745
}
731
746
732
- static void * anonymize_tag (const void * old , size_t * len )
747
+ static char * anonymize_tag (void * data )
733
748
{
734
749
static int counter ;
735
750
struct strbuf out = STRBUF_INIT ;
736
751
strbuf_addf (& out , "tag message %d" , counter ++ );
737
- return strbuf_detach (& out , len );
752
+ return strbuf_detach (& out , NULL );
738
753
}
739
754
740
755
static void handle_tail (struct object_array * commits , struct rev_info * revs ,
@@ -804,8 +819,8 @@ static void handle_tag(const char *name, struct tag *tag)
804
819
name = anonymize_refname (name );
805
820
if (message ) {
806
821
static struct hashmap tags ;
807
- message = anonymize_mem (& tags , anonymize_tag ,
808
- message , & message_size );
822
+ message = anonymize_str (& tags , anonymize_tag ,
823
+ message , message_size , NULL );
809
824
}
810
825
}
811
826
@@ -1136,6 +1151,37 @@ static void handle_deletes(void)
1136
1151
}
1137
1152
}
1138
1153
1154
+ static char * anonymize_seed (void * data )
1155
+ {
1156
+ return xstrdup (data );
1157
+ }
1158
+
1159
+ static int parse_opt_anonymize_map (const struct option * opt ,
1160
+ const char * arg , int unset )
1161
+ {
1162
+ struct hashmap * map = opt -> value ;
1163
+ const char * delim , * value ;
1164
+ size_t keylen ;
1165
+
1166
+ BUG_ON_OPT_NEG (unset );
1167
+
1168
+ delim = strchr (arg , ':' );
1169
+ if (delim ) {
1170
+ keylen = delim - arg ;
1171
+ value = delim + 1 ;
1172
+ } else {
1173
+ keylen = strlen (arg );
1174
+ value = arg ;
1175
+ }
1176
+
1177
+ if (!keylen || !* value )
1178
+ return error (_ ("--anonymize-map token cannot be empty" ));
1179
+
1180
+ anonymize_str (map , anonymize_seed , arg , keylen , (void * )value );
1181
+
1182
+ return 0 ;
1183
+ }
1184
+
1139
1185
int cmd_fast_export (int argc , const char * * argv , const char * prefix )
1140
1186
{
1141
1187
struct rev_info revs ;
@@ -1177,6 +1223,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
1177
1223
OPT_STRING_LIST (0 , "refspec" , & refspecs_list , N_ ("refspec" ),
1178
1224
N_ ("Apply refspec to exported refs" )),
1179
1225
OPT_BOOL (0 , "anonymize" , & anonymize , N_ ("anonymize output" )),
1226
+ OPT_CALLBACK_F (0 , "anonymize-map" , & anonymized_seeds , N_ ("from:to" ),
1227
+ N_ ("convert <from> to <to> in anonymized output" ),
1228
+ PARSE_OPT_NONEG , parse_opt_anonymize_map ),
1180
1229
OPT_BOOL (0 , "reference-excluded-parents" ,
1181
1230
& reference_excluded_commits , N_ ("Reference parents which are not in fast-export stream by object id" )),
1182
1231
OPT_BOOL (0 , "show-original-ids" , & show_original_ids ,
@@ -1204,6 +1253,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
1204
1253
if (argc > 1 )
1205
1254
usage_with_options (fast_export_usage , options );
1206
1255
1256
+ if (anonymized_seeds .cmpfn && !anonymize )
1257
+ die (_ ("--anonymize-map without --anonymize does not make sense" ));
1258
+
1207
1259
if (refspecs_list .nr ) {
1208
1260
int i ;
1209
1261
0 commit comments