9
9
#include "hashmap.h"
10
10
#include "progress.h"
11
11
#include "promisor-remote.h"
12
+ #include "strmap.h"
12
13
13
14
/* Table of rename/copy destinations */
14
15
15
16
static struct diff_rename_dst {
16
- struct diff_filespec * two ;
17
- struct diff_filepair * pair ;
17
+ struct diff_filepair * p ;
18
+ struct diff_filespec * filespec_to_free ;
19
+ int is_rename ; /* false -> just a create; true -> rename or copy */
18
20
} * rename_dst ;
19
21
static int rename_dst_nr , rename_dst_alloc ;
22
+ /* Mapping from break source pathname to break destination index */
23
+ static struct strintmap * break_idx = NULL ;
20
24
21
- static int find_rename_dst (struct diff_filespec * two )
22
- {
23
- int first , last ;
24
-
25
- first = 0 ;
26
- last = rename_dst_nr ;
27
- while (last > first ) {
28
- int next = first + ((last - first ) >> 1 );
29
- struct diff_rename_dst * dst = & (rename_dst [next ]);
30
- int cmp = strcmp (two -> path , dst -> two -> path );
31
- if (!cmp )
32
- return next ;
33
- if (cmp < 0 ) {
34
- last = next ;
35
- continue ;
36
- }
37
- first = next + 1 ;
38
- }
39
- return - first - 1 ;
40
- }
41
-
42
- static struct diff_rename_dst * locate_rename_dst (struct diff_filespec * two )
25
+ static struct diff_rename_dst * locate_rename_dst (struct diff_filepair * p )
43
26
{
44
- int ofs = find_rename_dst (two );
45
- return ofs < 0 ? NULL : & rename_dst [ofs ];
27
+ /* Lookup by p->ONE->path */
28
+ int idx = break_idx ? strintmap_get (break_idx , p -> one -> path ) : -1 ;
29
+ return (idx == -1 ) ? NULL : & rename_dst [idx ];
46
30
}
47
31
48
32
/*
49
33
* Returns 0 on success, -1 if we found a duplicate.
50
34
*/
51
- static int add_rename_dst (struct diff_filespec * two )
35
+ static int add_rename_dst (struct diff_filepair * p )
52
36
{
53
- int first = find_rename_dst (two );
54
-
55
- if (first >= 0 )
56
- return -1 ;
57
- first = - first - 1 ;
58
-
59
- /* insert to make it at "first" */
60
37
ALLOC_GROW (rename_dst , rename_dst_nr + 1 , rename_dst_alloc );
38
+ rename_dst [rename_dst_nr ].p = p ;
39
+ rename_dst [rename_dst_nr ].filespec_to_free = NULL ;
40
+ rename_dst [rename_dst_nr ].is_rename = 0 ;
61
41
rename_dst_nr ++ ;
62
- if (first < rename_dst_nr )
63
- MOVE_ARRAY (rename_dst + first + 1 , rename_dst + first ,
64
- rename_dst_nr - first - 1 );
65
- rename_dst [first ].two = alloc_filespec (two -> path );
66
- fill_filespec (rename_dst [first ].two , & two -> oid , two -> oid_valid ,
67
- two -> mode );
68
- rename_dst [first ].pair = NULL ;
69
42
return 0 ;
70
43
}
71
44
@@ -76,36 +49,20 @@ static struct diff_rename_src {
76
49
} * rename_src ;
77
50
static int rename_src_nr , rename_src_alloc ;
78
51
79
- static struct diff_rename_src * register_rename_src (struct diff_filepair * p )
52
+ static void register_rename_src (struct diff_filepair * p )
80
53
{
81
- int first , last ;
82
- struct diff_filespec * one = p -> one ;
83
- unsigned short score = p -> score ;
84
-
85
- first = 0 ;
86
- last = rename_src_nr ;
87
- while (last > first ) {
88
- int next = first + ((last - first ) >> 1 );
89
- struct diff_rename_src * src = & (rename_src [next ]);
90
- int cmp = strcmp (one -> path , src -> p -> one -> path );
91
- if (!cmp )
92
- return src ;
93
- if (cmp < 0 ) {
94
- last = next ;
95
- continue ;
54
+ if (p -> broken_pair ) {
55
+ if (!break_idx ) {
56
+ break_idx = xmalloc (sizeof (* break_idx ));
57
+ strintmap_init (break_idx , -1 );
96
58
}
97
- first = next + 1 ;
59
+ strintmap_set ( break_idx , p -> one -> path , rename_dst_nr ) ;
98
60
}
99
61
100
- /* insert to make it at "first" */
101
62
ALLOC_GROW (rename_src , rename_src_nr + 1 , rename_src_alloc );
63
+ rename_src [rename_src_nr ].p = p ;
64
+ rename_src [rename_src_nr ].score = p -> score ;
102
65
rename_src_nr ++ ;
103
- if (first < rename_src_nr )
104
- MOVE_ARRAY (rename_src + first + 1 , rename_src + first ,
105
- rename_src_nr - first - 1 );
106
- rename_src [first ].p = p ;
107
- rename_src [first ].score = score ;
108
- return & (rename_src [first ]);
109
66
}
110
67
111
68
static int basename_same (struct diff_filespec * src , struct diff_filespec * dst )
@@ -141,14 +98,14 @@ static void prefetch(void *prefetch_options)
141
98
struct oid_array to_fetch = OID_ARRAY_INIT ;
142
99
143
100
for (i = 0 ; i < rename_dst_nr ; i ++ ) {
144
- if (rename_dst [i ].pair )
101
+ if (rename_dst [i ].p -> renamed_pair )
145
102
/*
146
103
* The loop in diffcore_rename() will not need these
147
104
* blobs, so skip prefetching.
148
105
*/
149
106
continue ; /* already found exact match */
150
107
diff_add_if_missing (options -> repo , & to_fetch ,
151
- rename_dst [i ].two );
108
+ rename_dst [i ].p -> two );
152
109
}
153
110
for (i = 0 ; i < rename_src_nr ; i ++ ) {
154
111
if (options -> skip_unmodified &&
@@ -258,26 +215,24 @@ static int estimate_similarity(struct repository *r,
258
215
259
216
static void record_rename_pair (int dst_index , int src_index , int score )
260
217
{
261
- struct diff_filespec * src , * dst ;
262
- struct diff_filepair * dp ;
218
+ struct diff_filepair * src = rename_src [ src_index ]. p ;
219
+ struct diff_filepair * dst = rename_dst [ dst_index ]. p ;
263
220
264
- if (rename_dst [ dst_index ]. pair )
221
+ if (dst -> renamed_pair )
265
222
die ("internal error: dst already matched." );
266
223
267
- src = rename_src [src_index ].p -> one ;
268
- src -> rename_used ++ ;
269
- src -> count ++ ;
224
+ src -> one -> rename_used ++ ;
225
+ src -> one -> count ++ ;
270
226
271
- dst = rename_dst [dst_index ].two ;
272
- dst -> count ++ ;
227
+ rename_dst [dst_index ].filespec_to_free = dst -> one ;
228
+ rename_dst [ dst_index ]. is_rename = 1 ;
273
229
274
- dp = diff_queue ( NULL , src , dst ) ;
275
- dp -> renamed_pair = 1 ;
276
- if (!strcmp (src -> path , dst -> path ))
277
- dp -> score = rename_src [src_index ].score ;
230
+ dst -> one = src -> one ;
231
+ dst -> renamed_pair = 1 ;
232
+ if (!strcmp (dst -> one -> path , dst -> two -> path ))
233
+ dst -> score = rename_src [src_index ].score ;
278
234
else
279
- dp -> score = score ;
280
- rename_dst [dst_index ].pair = dp ;
235
+ dst -> score = score ;
281
236
}
282
237
283
238
/*
@@ -323,7 +278,7 @@ static int find_identical_files(struct hashmap *srcs,
323
278
struct diff_options * options )
324
279
{
325
280
int renames = 0 ;
326
- struct diff_filespec * target = rename_dst [dst_index ].two ;
281
+ struct diff_filespec * target = rename_dst [dst_index ].p -> two ;
327
282
struct file_similarity * p , * best = NULL ;
328
283
int i = 100 , best_score = -1 ;
329
284
unsigned int hash = hash_filespec (options -> repo , target );
@@ -434,44 +389,46 @@ static void record_if_better(struct diff_score m[], struct diff_score *o)
434
389
* 1 if we need to disable inexact rename detection;
435
390
* 2 if we would be under the limit if we were given -C instead of -C -C.
436
391
*/
437
- static int too_many_rename_candidates (int num_create ,
392
+ static int too_many_rename_candidates (int num_destinations , int num_sources ,
438
393
struct diff_options * options )
439
394
{
440
395
int rename_limit = options -> rename_limit ;
441
- int num_src = rename_src_nr ;
442
- int i ;
396
+ int i , limited_sources ;
443
397
444
398
options -> needed_rename_limit = 0 ;
445
399
446
400
/*
447
401
* This basically does a test for the rename matrix not
448
402
* growing larger than a "rename_limit" square matrix, ie:
449
403
*
450
- * num_create * num_src > rename_limit * rename_limit
404
+ * num_destinations * num_sources > rename_limit * rename_limit
405
+ *
406
+ * We use st_mult() to check overflow conditions; in the
407
+ * exceptional circumstance that size_t isn't large enough to hold
408
+ * the multiplication, the system won't be able to allocate enough
409
+ * memory for the matrix anyway.
451
410
*/
452
411
if (rename_limit <= 0 )
453
412
rename_limit = 32767 ;
454
- if ((num_create <= rename_limit || num_src <= rename_limit ) &&
455
- ((uint64_t )num_create * (uint64_t )num_src
456
- <= (uint64_t )rename_limit * (uint64_t )rename_limit ))
413
+ if (st_mult (num_destinations , num_sources )
414
+ <= st_mult (rename_limit , rename_limit ))
457
415
return 0 ;
458
416
459
417
options -> needed_rename_limit =
460
- num_src > num_create ? num_src : num_create ;
418
+ num_sources > num_destinations ? num_sources : num_destinations ;
461
419
462
420
/* Are we running under -C -C? */
463
421
if (!options -> flags .find_copies_harder )
464
422
return 1 ;
465
423
466
424
/* Would we bust the limit if we were running under -C? */
467
- for (num_src = i = 0 ; i < rename_src_nr ; i ++ ) {
425
+ for (limited_sources = i = 0 ; i < num_sources ; i ++ ) {
468
426
if (diff_unmodified_pair (rename_src [i ].p ))
469
427
continue ;
470
- num_src ++ ;
428
+ limited_sources ++ ;
471
429
}
472
- if ((num_create <= rename_limit || num_src <= rename_limit ) &&
473
- ((uint64_t )num_create * (uint64_t )num_src
474
- <= (uint64_t )rename_limit * (uint64_t )rename_limit ))
430
+ if (st_mult (num_destinations , limited_sources )
431
+ <= st_mult (rename_limit , rename_limit ))
475
432
return 2 ;
476
433
return 1 ;
477
434
}
@@ -487,7 +444,7 @@ static int find_renames(struct diff_score *mx, int dst_cnt, int minimum_score, i
487
444
(mx [i ].score < minimum_score ))
488
445
break ; /* there is no more usable pair. */
489
446
dst = & rename_dst [mx [i ].dst ];
490
- if (dst -> pair )
447
+ if (dst -> is_rename )
491
448
continue ; /* already done, either exact or fuzzy. */
492
449
if (!copies && rename_src [mx [i ].src ].p -> one -> rename_used )
493
450
continue ;
@@ -505,7 +462,7 @@ void diffcore_rename(struct diff_options *options)
505
462
struct diff_queue_struct outq ;
506
463
struct diff_score * mx ;
507
464
int i , j , rename_count , skip_unmodified = 0 ;
508
- int num_create , dst_cnt ;
465
+ int num_destinations , dst_cnt ;
509
466
struct progress * progress = NULL ;
510
467
511
468
if (!minimum_score )
@@ -522,7 +479,7 @@ void diffcore_rename(struct diff_options *options)
522
479
else if (!options -> flags .rename_empty &&
523
480
is_empty_blob_oid (& p -> two -> oid ))
524
481
continue ;
525
- else if (add_rename_dst (p -> two ) < 0 ) {
482
+ else if (add_rename_dst (p ) < 0 ) {
526
483
warning ("skipping rename detection, detected"
527
484
" duplicate destination '%s'" ,
528
485
p -> two -> path );
@@ -570,13 +527,14 @@ void diffcore_rename(struct diff_options *options)
570
527
* Calculate how many renames are left (but all the source
571
528
* files still remain as options for rename/copies!)
572
529
*/
573
- num_create = (rename_dst_nr - rename_count );
530
+ num_destinations = (rename_dst_nr - rename_count );
574
531
575
532
/* All done? */
576
- if (!num_create )
533
+ if (!num_destinations )
577
534
goto cleanup ;
578
535
579
- switch (too_many_rename_candidates (num_create , options )) {
536
+ switch (too_many_rename_candidates (num_destinations , rename_src_nr ,
537
+ options )) {
580
538
case 1 :
581
539
goto cleanup ;
582
540
case 2 :
@@ -590,15 +548,16 @@ void diffcore_rename(struct diff_options *options)
590
548
if (options -> show_rename_progress ) {
591
549
progress = start_delayed_progress (
592
550
_ ("Performing inexact rename detection" ),
593
- (uint64_t )rename_dst_nr * (uint64_t )rename_src_nr );
551
+ (uint64_t )num_destinations * (uint64_t )rename_src_nr );
594
552
}
595
553
596
- mx = xcalloc (st_mult (NUM_CANDIDATE_PER_DST , num_create ), sizeof (* mx ));
554
+ mx = xcalloc (st_mult (NUM_CANDIDATE_PER_DST , num_destinations ),
555
+ sizeof (* mx ));
597
556
for (dst_cnt = i = 0 ; i < rename_dst_nr ; i ++ ) {
598
- struct diff_filespec * two = rename_dst [i ].two ;
557
+ struct diff_filespec * two = rename_dst [i ].p -> two ;
599
558
struct diff_score * m ;
600
559
601
- if (rename_dst [i ].pair )
560
+ if (rename_dst [i ].is_rename )
602
561
continue ; /* dealt with exact match already. */
603
562
604
563
m = & mx [dst_cnt * NUM_CANDIDATE_PER_DST ];
@@ -629,7 +588,8 @@ void diffcore_rename(struct diff_options *options)
629
588
diff_free_filespec_blob (two );
630
589
}
631
590
dst_cnt ++ ;
632
- display_progress (progress , (uint64_t )(i + 1 )* (uint64_t )rename_src_nr );
591
+ display_progress (progress ,
592
+ (uint64_t )dst_cnt * (uint64_t )rename_src_nr );
633
593
}
634
594
stop_progress (& progress );
635
595
@@ -654,22 +614,8 @@ void diffcore_rename(struct diff_options *options)
654
614
diff_q (& outq , p );
655
615
}
656
616
else if (!DIFF_FILE_VALID (p -> one ) && DIFF_FILE_VALID (p -> two )) {
657
- /*
658
- * Creation
659
- *
660
- * We would output this create record if it has
661
- * not been turned into a rename/copy already.
662
- */
663
- struct diff_rename_dst * dst = locate_rename_dst (p -> two );
664
- if (dst && dst -> pair ) {
665
- diff_q (& outq , dst -> pair );
666
- pair_to_free = p ;
667
- }
668
- else
669
- /* no matching rename/copy source, so
670
- * record this as a creation.
671
- */
672
- diff_q (& outq , p );
617
+ /* Creation */
618
+ diff_q (& outq , p );
673
619
}
674
620
else if (DIFF_FILE_VALID (p -> one ) && !DIFF_FILE_VALID (p -> two )) {
675
621
/*
@@ -690,8 +636,10 @@ void diffcore_rename(struct diff_options *options)
690
636
*/
691
637
if (DIFF_PAIR_BROKEN (p )) {
692
638
/* broken delete */
693
- struct diff_rename_dst * dst = locate_rename_dst (p -> one );
694
- if (dst && dst -> pair )
639
+ struct diff_rename_dst * dst = locate_rename_dst (p );
640
+ if (!dst )
641
+ BUG ("tracking failed somehow; failed to find associated dst for broken pair" );
642
+ if (dst -> is_rename )
695
643
/* counterpart is now rename/copy */
696
644
pair_to_free = p ;
697
645
}
@@ -701,16 +649,14 @@ void diffcore_rename(struct diff_options *options)
701
649
pair_to_free = p ;
702
650
}
703
651
704
- if (pair_to_free )
705
- ;
706
- else
652
+ if (!pair_to_free )
707
653
diff_q (& outq , p );
708
654
}
709
655
else if (!diff_unmodified_pair (p ))
710
656
/* all the usual ones need to be kept */
711
657
diff_q (& outq , p );
712
658
else
713
- /* no need to keep unmodified pairs */
659
+ /* no need to keep unmodified pairs; FIXME: remove earlier? */
714
660
pair_to_free = p ;
715
661
716
662
if (pair_to_free )
@@ -723,11 +669,16 @@ void diffcore_rename(struct diff_options *options)
723
669
diff_debug_queue ("done collapsing" , q );
724
670
725
671
for (i = 0 ; i < rename_dst_nr ; i ++ )
726
- free_filespec (rename_dst [i ].two );
672
+ if (rename_dst [i ].filespec_to_free )
673
+ free_filespec (rename_dst [i ].filespec_to_free );
727
674
728
675
FREE_AND_NULL (rename_dst );
729
676
rename_dst_nr = rename_dst_alloc = 0 ;
730
677
FREE_AND_NULL (rename_src );
731
678
rename_src_nr = rename_src_alloc = 0 ;
679
+ if (break_idx ) {
680
+ strintmap_clear (break_idx );
681
+ FREE_AND_NULL (break_idx );
682
+ }
732
683
return ;
733
684
}
0 commit comments