Skip to content

Commit 32f3c54

Browse files
derrickstoleegitster
authored andcommitted
multi-pack-index: write pack names in chunk
The multi-pack-index needs to track which packfiles it indexes. Store these in our first required chunk. Since filenames are not well structured, add padding to keep good alignment in later chunks. Modify the 'git multi-pack-index read' subcommand to output the existence of the pack-file name chunk. Modify t5319-multi-pack-index.sh to reflect this new output and the new expected number of chunks. Defense in depth: A pattern we are using in the multi-pack-index feature is to verify the data as we write it. We want to ensure we never write invalid data to the multi-pack-index. There are many checks that verify that the values we are writing fit the format definitions. This mainly helps developers while working on the feature, but it can also identify issues that only appear when dealing with very large data sets. These large sets are hard to encode into test cases. Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 396f257 commit 32f3c54

File tree

5 files changed

+189
-3
lines changed

5 files changed

+189
-3
lines changed

Documentation/technical/pack-format.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,12 @@ CHUNK LOOKUP:
296296

297297
CHUNK DATA:
298298

299+
Packfile Names (ID: {'P', 'N', 'A', 'M'})
300+
Stores the packfile names as concatenated, null-terminated strings.
301+
Packfiles must be listed in lexicographic order for fast lookups by
302+
name. This is the only chunk not guaranteed to be a multiple of four
303+
bytes in length, so should be the last chunk for alignment reasons.
304+
299305
(This section intentionally left incomplete.)
300306

301307
TRAILER:

midx.c

Lines changed: 172 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
#define MIDX_HASH_LEN 20
1818
#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
1919

20+
#define MIDX_MAX_CHUNKS 1
21+
#define MIDX_CHUNK_ALIGNMENT 4
22+
#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
23+
#define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
24+
2025
static char *get_midx_filename(const char *object_dir)
2126
{
2227
return xstrfmt("%s/pack/multi-pack-index", object_dir);
@@ -31,6 +36,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)
3136
void *midx_map = NULL;
3237
uint32_t hash_version;
3338
char *midx_name = get_midx_filename(object_dir);
39+
uint32_t i;
3440

3541
fd = git_open(midx_name);
3642

@@ -82,6 +88,33 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)
8288

8389
m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
8490

91+
for (i = 0; i < m->num_chunks; i++) {
92+
uint32_t chunk_id = get_be32(m->data + MIDX_HEADER_SIZE +
93+
MIDX_CHUNKLOOKUP_WIDTH * i);
94+
uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 +
95+
MIDX_CHUNKLOOKUP_WIDTH * i);
96+
97+
switch (chunk_id) {
98+
case MIDX_CHUNKID_PACKNAMES:
99+
m->chunk_pack_names = m->data + chunk_offset;
100+
break;
101+
102+
case 0:
103+
die(_("terminating multi-pack-index chunk id appears earlier than expected"));
104+
break;
105+
106+
default:
107+
/*
108+
* Do nothing on unrecognized chunks, allowing future
109+
* extensions to add optional chunks.
110+
*/
111+
break;
112+
}
113+
}
114+
115+
if (!m->chunk_pack_names)
116+
die(_("multi-pack-index missing required pack-name chunk"));
117+
85118
return m;
86119

87120
cleanup_fail:
@@ -113,8 +146,11 @@ static size_t write_midx_header(struct hashfile *f,
113146

114147
struct pack_list {
115148
struct packed_git **list;
149+
char **names;
116150
uint32_t nr;
117151
uint32_t alloc_list;
152+
uint32_t alloc_names;
153+
size_t pack_name_concat_len;
118154
};
119155

120156
static void add_pack_to_midx(const char *full_path, size_t full_path_len,
@@ -124,6 +160,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
124160

125161
if (ends_with(file_name, ".idx")) {
126162
ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list);
163+
ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names);
127164

128165
packs->list[packs->nr] = add_packed_git(full_path,
129166
full_path_len,
@@ -134,18 +171,89 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
134171
return;
135172
}
136173

174+
packs->names[packs->nr] = xstrdup(file_name);
175+
packs->pack_name_concat_len += strlen(file_name) + 1;
137176
packs->nr++;
138177
}
139178
}
140179

180+
struct pack_pair {
181+
uint32_t pack_int_id;
182+
char *pack_name;
183+
};
184+
185+
static int pack_pair_compare(const void *_a, const void *_b)
186+
{
187+
struct pack_pair *a = (struct pack_pair *)_a;
188+
struct pack_pair *b = (struct pack_pair *)_b;
189+
return strcmp(a->pack_name, b->pack_name);
190+
}
191+
192+
static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm)
193+
{
194+
uint32_t i;
195+
struct pack_pair *pairs;
196+
197+
ALLOC_ARRAY(pairs, nr_packs);
198+
199+
for (i = 0; i < nr_packs; i++) {
200+
pairs[i].pack_int_id = i;
201+
pairs[i].pack_name = pack_names[i];
202+
}
203+
204+
QSORT(pairs, nr_packs, pack_pair_compare);
205+
206+
for (i = 0; i < nr_packs; i++) {
207+
pack_names[i] = pairs[i].pack_name;
208+
perm[pairs[i].pack_int_id] = i;
209+
}
210+
211+
free(pairs);
212+
}
213+
214+
static size_t write_midx_pack_names(struct hashfile *f,
215+
char **pack_names,
216+
uint32_t num_packs)
217+
{
218+
uint32_t i;
219+
unsigned char padding[MIDX_CHUNK_ALIGNMENT];
220+
size_t written = 0;
221+
222+
for (i = 0; i < num_packs; i++) {
223+
size_t writelen = strlen(pack_names[i]) + 1;
224+
225+
if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0)
226+
BUG("incorrect pack-file order: %s before %s",
227+
pack_names[i - 1],
228+
pack_names[i]);
229+
230+
hashwrite(f, pack_names[i], writelen);
231+
written += writelen;
232+
}
233+
234+
/* add padding to be aligned */
235+
i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT);
236+
if (i < MIDX_CHUNK_ALIGNMENT) {
237+
memset(padding, 0, sizeof(padding));
238+
hashwrite(f, padding, i);
239+
written += i;
240+
}
241+
242+
return written;
243+
}
244+
141245
int write_midx_file(const char *object_dir)
142246
{
143-
unsigned char num_chunks = 0;
247+
unsigned char cur_chunk, num_chunks = 0;
144248
char *midx_name;
145249
uint32_t i;
146250
struct hashfile *f = NULL;
147251
struct lock_file lk;
148252
struct pack_list packs;
253+
uint32_t *pack_perm = NULL;
254+
uint64_t written = 0;
255+
uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
256+
uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
149257

150258
midx_name = get_midx_filename(object_dir);
151259
if (safe_create_leading_directories(midx_name)) {
@@ -156,16 +264,76 @@ int write_midx_file(const char *object_dir)
156264

157265
packs.nr = 0;
158266
packs.alloc_list = 16;
267+
packs.alloc_names = 16;
159268
packs.list = NULL;
269+
packs.pack_name_concat_len = 0;
160270
ALLOC_ARRAY(packs.list, packs.alloc_list);
271+
ALLOC_ARRAY(packs.names, packs.alloc_names);
161272

162273
for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs);
163274

275+
if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
276+
packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
277+
(packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
278+
279+
ALLOC_ARRAY(pack_perm, packs.nr);
280+
sort_packs_by_name(packs.names, packs.nr, pack_perm);
281+
164282
hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
165283
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
166284
FREE_AND_NULL(midx_name);
167285

168-
write_midx_header(f, num_chunks, packs.nr);
286+
cur_chunk = 0;
287+
num_chunks = 1;
288+
289+
written = write_midx_header(f, num_chunks, packs.nr);
290+
291+
chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
292+
chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
293+
294+
cur_chunk++;
295+
chunk_ids[cur_chunk] = 0;
296+
chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len;
297+
298+
for (i = 0; i <= num_chunks; i++) {
299+
if (i && chunk_offsets[i] < chunk_offsets[i - 1])
300+
BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64,
301+
chunk_offsets[i - 1],
302+
chunk_offsets[i]);
303+
304+
if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT)
305+
BUG("chunk offset %"PRIu64" is not properly aligned",
306+
chunk_offsets[i]);
307+
308+
hashwrite_be32(f, chunk_ids[i]);
309+
hashwrite_be32(f, chunk_offsets[i] >> 32);
310+
hashwrite_be32(f, chunk_offsets[i]);
311+
312+
written += MIDX_CHUNKLOOKUP_WIDTH;
313+
}
314+
315+
for (i = 0; i < num_chunks; i++) {
316+
if (written != chunk_offsets[i])
317+
BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32,
318+
chunk_offsets[i],
319+
written,
320+
chunk_ids[i]);
321+
322+
switch (chunk_ids[i]) {
323+
case MIDX_CHUNKID_PACKNAMES:
324+
written += write_midx_pack_names(f, packs.names, packs.nr);
325+
break;
326+
327+
default:
328+
BUG("trying to write unknown chunk id %"PRIx32,
329+
chunk_ids[i]);
330+
}
331+
}
332+
333+
if (written != chunk_offsets[num_chunks])
334+
BUG("incorrect final offset %"PRIu64" != %"PRIu64,
335+
written,
336+
chunk_offsets[num_chunks]);
169337

170338
finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
171339
commit_lock_file(&lk);
@@ -175,8 +343,10 @@ int write_midx_file(const char *object_dir)
175343
close_pack(packs.list[i]);
176344
free(packs.list[i]);
177345
}
346+
free(packs.names[i]);
178347
}
179348

180349
free(packs.list);
350+
free(packs.names);
181351
return 0;
182352
}

midx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ struct multi_pack_index {
1414
uint32_t num_packs;
1515
uint32_t num_objects;
1616

17+
const unsigned char *chunk_pack_names;
18+
1719
char object_dir[FLEX_ARRAY];
1820
};
1921

t/helper/test-read-midx.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ static int read_midx_file(const char *object_dir)
1717
m->num_chunks,
1818
m->num_packs);
1919

20+
printf("chunks:");
21+
22+
if (m->chunk_pack_names)
23+
printf(" pack-names");
24+
25+
printf("\n");
26+
2027
printf("object-dir: %s\n", m->object_dir);
2128

2229
return 0;

t/t5319-multi-pack-index.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ test_description='multi-pack-indexes'
66
midx_read_expect () {
77
NUM_PACKS=$1
88
cat >expect <<-EOF
9-
header: 4d494458 1 0 $NUM_PACKS
9+
header: 4d494458 1 1 $NUM_PACKS
10+
chunks: pack-names
1011
object-dir: .
1112
EOF
1213
test-tool read-midx . >actual &&

0 commit comments

Comments
 (0)