Skip to content

Commit 0c83680

Browse files
committed
Merge branch 'master' of git://git.bogomips.org/git-svn
* 'master' of git://git.bogomips.org/git-svn: git-svn: improve rebase/mkdirs performance
2 parents dbba85e + 8262574 commit 0c83680

File tree

1 file changed

+76
-8
lines changed

1 file changed

+76
-8
lines changed

perl/Git/SVN.pm

Lines changed: 76 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,20 +1211,87 @@ sub do_fetch {
12111211
sub mkemptydirs {
12121212
my ($self, $r) = @_;
12131213

1214+
# add/remove/collect a paths table
1215+
#
1216+
# Paths are split into a tree of nodes, stored as a hash of hashes.
1217+
#
1218+
# Each node contains a 'path' entry for the path (if any) associated
1219+
# with that node and a 'children' entry for any nodes under that
1220+
# location.
1221+
#
1222+
# Removing a path requires a hash lookup for each component then
1223+
# dropping that node (and anything under it), which is substantially
1224+
# faster than a grep slice into a single hash of paths for large
1225+
# numbers of paths.
1226+
#
1227+
# For a large (200K) number of empty_dir directives this reduces
1228+
# scanning time to 3 seconds vs 10 minutes for grep+delete on a single
1229+
# hash of paths.
1230+
sub add_path {
1231+
my ($paths_table, $path) = @_;
1232+
my $node_ref;
1233+
1234+
foreach my $x (split('/', $path)) {
1235+
if (!exists($paths_table->{$x})) {
1236+
$paths_table->{$x} = { children => {} };
1237+
}
1238+
1239+
$node_ref = $paths_table->{$x};
1240+
$paths_table = $paths_table->{$x}->{children};
1241+
}
1242+
1243+
$node_ref->{path} = $path;
1244+
}
1245+
1246+
sub remove_path {
1247+
my ($paths_table, $path) = @_;
1248+
my $nodes_ref;
1249+
my $node_name;
1250+
1251+
foreach my $x (split('/', $path)) {
1252+
if (!exists($paths_table->{$x})) {
1253+
return;
1254+
}
1255+
1256+
$nodes_ref = $paths_table;
1257+
$node_name = $x;
1258+
1259+
$paths_table = $paths_table->{$x}->{children};
1260+
}
1261+
1262+
delete($nodes_ref->{$node_name});
1263+
}
1264+
1265+
sub collect_paths {
1266+
my ($paths_table, $paths_ref) = @_;
1267+
1268+
foreach my $v (values %$paths_table) {
1269+
my $p = $v->{path};
1270+
my $c = $v->{children};
1271+
1272+
collect_paths($c, $paths_ref);
1273+
1274+
if (defined($p)) {
1275+
push(@$paths_ref, $p);
1276+
}
1277+
}
1278+
}
1279+
12141280
sub scan {
1215-
my ($r, $empty_dirs, $line) = @_;
1281+
my ($r, $paths_table, $line) = @_;
12161282
if (defined $r && $line =~ /^r(\d+)$/) {
12171283
return 0 if $1 > $r;
12181284
} elsif ($line =~ /^ \+empty_dir: (.+)$/) {
1219-
$empty_dirs->{$1} = 1;
1285+
add_path($paths_table, $1);
12201286
} elsif ($line =~ /^ \-empty_dir: (.+)$/) {
1221-
my @d = grep {m[^\Q$1\E(/|$)]} (keys %$empty_dirs);
1222-
delete @$empty_dirs{@d};
1287+
remove_path($paths_table, $1);
12231288
}
12241289
1; # continue
12251290
};
12261291

1227-
my %empty_dirs = ();
1292+
my @empty_dirs;
1293+
my %paths_table;
1294+
12281295
my $gz_file = "$self->{dir}/unhandled.log.gz";
12291296
if (-f $gz_file) {
12301297
if (!can_compress()) {
@@ -1235,7 +1302,7 @@ sub mkemptydirs {
12351302
die "Unable to open $gz_file: $!\n";
12361303
my $line;
12371304
while ($gz->gzreadline($line) > 0) {
1238-
scan($r, \%empty_dirs, $line) or last;
1305+
scan($r, \%paths_table, $line) or last;
12391306
}
12401307
$gz->gzclose;
12411308
}
@@ -1244,13 +1311,14 @@ sub mkemptydirs {
12441311
if (open my $fh, '<', "$self->{dir}/unhandled.log") {
12451312
binmode $fh or croak "binmode: $!";
12461313
while (<$fh>) {
1247-
scan($r, \%empty_dirs, $_) or last;
1314+
scan($r, \%paths_table, $_) or last;
12481315
}
12491316
close $fh;
12501317
}
12511318

1319+
collect_paths(\%paths_table, \@empty_dirs);
12521320
my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/;
1253-
foreach my $d (sort keys %empty_dirs) {
1321+
foreach my $d (sort @empty_dirs) {
12541322
$d = uri_decode($d);
12551323
$d =~ s/$strip//;
12561324
next unless length($d);

0 commit comments

Comments
 (0)