Skip to content

Commit 350d870

Browse files
jeffhostetlergitster
authored andcommitted
p0006-read-tree-checkout: perf test to time read-tree
Created t/perf/repos/many-files.sh to generate large, but artificial repositories. Created t/perf/inflate-repo.sh to alter an EXISTING repo to have a set of large commits. This can be used to create a branch with 1M+ files in repositories like git.git or linux.git, but with more realistic content. It does this by making multiple copies of the entire worktree in a series of sub-directories. The branch name and ballast structure created by both scripts match, so either script can be used to generate very large test repositories for the following perf test. Created t/perf/p0006-read-tree-checkout.sh to measure performance on various read-tree, checkout, and update-index operations. This test can run using either normal repos or ones from the above scripts. Signed-off-by: Jeff Hostetler <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent a6db3fb commit 350d870

File tree

4 files changed

+263
-0
lines changed

4 files changed

+263
-0
lines changed

t/perf/p0006-read-tree-checkout.sh

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/bin/sh
2+
#
3+
# This test measures the performance of various read-tree
4+
# and checkout operations. It is primarily interested in
5+
# the algorithmic costs of index operations and recursive
6+
# tree traversal -- and NOT disk I/O on thousands of files.
7+
8+
test_description="Tests performance of read-tree"
9+
10+
. ./perf-lib.sh
11+
12+
test_perf_default_repo
13+
14+
# If the test repo was generated by ./repos/many-files.sh
15+
# then we know something about the data shape and branches,
16+
# so we can isolate testing to the ballast-related commits
17+
# and setup sparse-checkout so we don't have to populate
18+
# the ballast files and directories.
19+
#
20+
# Otherwise, we make some general assumptions about the
21+
# repo and consider the entire history of the current
22+
# branch to be the ballast.
23+
24+
test_expect_success "setup repo" '
25+
if git rev-parse --verify refs/heads/p0006-ballast^{commit}
26+
then
27+
echo Assuming synthetic repo from many-files.sh
28+
git branch br_base master
29+
git branch br_ballast p0006-ballast^
30+
git branch br_ballast_alias p0006-ballast^
31+
git branch br_ballast_plus_1 p0006-ballast
32+
git config --local core.sparsecheckout 1
33+
cat >.git/info/sparse-checkout <<-EOF
34+
/*
35+
!ballast/*
36+
EOF
37+
else
38+
echo Assuming non-synthetic repo...
39+
git branch br_base $(git rev-list HEAD | tail -n 1)
40+
git branch br_ballast HEAD^ || error "no ancestor commit from current head"
41+
git branch br_ballast_alias HEAD^
42+
git branch br_ballast_plus_1 HEAD
43+
fi &&
44+
git checkout -q br_ballast &&
45+
nr_files=$(git ls-files | wc -l)
46+
'
47+
48+
test_perf "read-tree br_base br_ballast ($nr_files)" '
49+
git read-tree -m br_base br_ballast -n
50+
'
51+
52+
test_perf "switch between br_base br_ballast ($nr_files)" '
53+
git checkout -q br_base &&
54+
git checkout -q br_ballast
55+
'
56+
57+
test_perf "switch between br_ballast br_ballast_plus_1 ($nr_files)" '
58+
git checkout -q br_ballast_plus_1 &&
59+
git checkout -q br_ballast
60+
'
61+
62+
test_perf "switch between aliases ($nr_files)" '
63+
git checkout -q br_ballast_alias &&
64+
git checkout -q br_ballast
65+
'
66+
67+
test_done

t/perf/repos/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
gen-*/

t/perf/repos/inflate-repo.sh

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/bin/sh
2+
# Inflate the size of an EXISTING repo.
3+
#
4+
# This script should be run inside the worktree of a TEST repo.
5+
# It will use the contents of the current HEAD to generate a
6+
# commit containing copies of the current worktree such that the
7+
# total size of the commit has at least <target_size> files.
8+
#
9+
# Usage: [-t target_size] [-b branch_name]
10+
11+
set -e
12+
13+
target_size=10000
14+
branch_name=p0006-ballast
15+
ballast=ballast
16+
17+
while test "$#" -ne 0
18+
do
19+
case "$1" in
20+
-b)
21+
shift;
22+
test "$#" -ne 0 || { echo 'error: -b requires an argument' >&2; exit 1; }
23+
branch_name=$1;
24+
shift ;;
25+
-t)
26+
shift;
27+
test "$#" -ne 0 || { echo 'error: -t requires an argument' >&2; exit 1; }
28+
target_size=$1;
29+
shift ;;
30+
*)
31+
echo "error: unknown option '$1'" >&2; exit 1 ;;
32+
esac
33+
done
34+
35+
git ls-tree -r HEAD >GEN_src_list
36+
nr_src_files=$(cat GEN_src_list | wc -l)
37+
38+
src_branch=$(git symbolic-ref --short HEAD)
39+
40+
echo "Branch $src_branch initially has $nr_src_files files."
41+
42+
if test $target_size -le $nr_src_files
43+
then
44+
echo "Repository already exceeds target size $target_size."
45+
rm GEN_src_list
46+
exit 1
47+
fi
48+
49+
# Create well-known branch and add 1 file change to start
50+
# if off before the ballast.
51+
git checkout -b $branch_name HEAD
52+
echo "$target_size" > inflate-repo.params
53+
git add inflate-repo.params
54+
git commit -q -m params
55+
56+
# Create ballast for in our branch.
57+
copy=1
58+
nr_files=$nr_src_files
59+
while test $nr_files -lt $target_size
60+
do
61+
sed -e "s| | $ballast/$copy/|" <GEN_src_list |
62+
git update-index --index-info
63+
64+
nr_files=$(expr $nr_files + $nr_src_files)
65+
copy=$(expr $copy + 1)
66+
done
67+
rm GEN_src_list
68+
git commit -q -m "ballast"
69+
70+
# Modify 1 file and commit.
71+
echo "$target_size" >> inflate-repo.params
72+
git add inflate-repo.params
73+
git commit -q -m "ballast plus 1"
74+
75+
nr_files=$(git ls-files | wc -l)
76+
77+
# Checkout master to put repo in canonical state (because
78+
# the perf test may need to clone and enable sparse-checkout
79+
# before attempting to checkout a commit with the ballast
80+
# (because it may contain 100K directories and 1M files)).
81+
git checkout $src_branch
82+
83+
echo "Repository inflated. Branch $branch_name has $nr_files files."
84+
85+
exit 0

t/perf/repos/many-files.sh

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#!/bin/sh
2+
# Generate test data repository using the given parameters.
3+
# When omitted, we create "gen-many-files-d-w-f.git".
4+
#
5+
# Usage: [-r repo] [-d depth] [-w width] [-f files]
6+
#
7+
# -r repo: path to the new repo to be generated
8+
# -d depth: the depth of sub-directories
9+
# -w width: the number of sub-directories at each level
10+
# -f files: the number of files created in each directory
11+
#
12+
# Note that all files will have the same SHA-1 and each
13+
# directory at a level will have the same SHA-1, so we
14+
# will potentially have a large index, but not a large
15+
# ODB.
16+
#
17+
# Ballast will be created under "ballast/".
18+
19+
EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
20+
21+
set -e
22+
23+
# (5, 10, 9) will create 999,999 ballast files.
24+
# (4, 10, 9) will create 99,999 ballast files.
25+
depth=5
26+
width=10
27+
files=9
28+
29+
while test "$#" -ne 0
30+
do
31+
case "$1" in
32+
-r)
33+
shift;
34+
test "$#" -ne 0 || { echo 'error: -r requires an argument' >&2; exit 1; }
35+
repo=$1;
36+
shift ;;
37+
-d)
38+
shift;
39+
test "$#" -ne 0 || { echo 'error: -d requires an argument' >&2; exit 1; }
40+
depth=$1;
41+
shift ;;
42+
-w)
43+
shift;
44+
test "$#" -ne 0 || { echo 'error: -w requires an argument' >&2; exit 1; }
45+
width=$1;
46+
shift ;;
47+
-f)
48+
shift;
49+
test "$#" -ne 0 || { echo 'error: -f requires an argument' >&2; exit 1; }
50+
files=$1;
51+
shift ;;
52+
*)
53+
echo "error: unknown option '$1'" >&2; exit 1 ;;
54+
esac
55+
done
56+
57+
# Inflate the index with thousands of empty files.
58+
# usage: dir depth width files
59+
fill_index() {
60+
awk -v arg_dir=$1 -v arg_depth=$2 -v arg_width=$3 -v arg_files=$4 '
61+
function make_paths(dir, depth, width, files, f, w) {
62+
for (f = 1; f <= files; f++) {
63+
print dir "/file" f
64+
}
65+
if (depth > 0) {
66+
for (w = 1; w <= width; w++) {
67+
make_paths(dir "/dir" w, depth - 1, width, files)
68+
}
69+
}
70+
}
71+
END { make_paths(arg_dir, arg_depth, arg_width, arg_files) }
72+
' </dev/null |
73+
sed "s/^/100644 $EMPTY_BLOB /" |
74+
git update-index --index-info
75+
return 0
76+
}
77+
78+
[ -z "$repo" ] && repo=gen-many-files-$depth.$width.$files.git
79+
80+
mkdir $repo
81+
cd $repo
82+
git init .
83+
84+
# Create an initial commit just to define master.
85+
touch many-files.empty
86+
echo "$depth $width $files" >many-files.params
87+
git add many-files.*
88+
git commit -q -m params
89+
90+
# Create ballast for p0006 based upon the given params and
91+
# inflate the index with thousands of empty files and commit.
92+
git checkout -b p0006-ballast
93+
fill_index "ballast" $depth $width $files
94+
git commit -q -m "ballast"
95+
96+
nr_files=$(git ls-files | wc -l)
97+
98+
# Modify 1 file and commit.
99+
echo "$depth $width $files" >>many-files.params
100+
git add many-files.params
101+
git commit -q -m "ballast plus 1"
102+
103+
# Checkout master to put repo in canonical state (because
104+
# the perf test may need to clone and enable sparse-checkout
105+
# before attempting to checkout a commit with the ballast
106+
# (because it may contain 100K directories and 1M files)).
107+
git checkout master
108+
109+
echo "Repository "$repo" ($depth, $width, $files) created. Ballast $nr_files."
110+
exit 0

0 commit comments

Comments
 (0)