Skip to content

Commit c98d762

Browse files
pks-tgitster
authored andcommitted
global: ensure that object IDs are always padded
The `oidcmp()` and `oideq()` functions only compare the prefix length as specified by the given hash algorithm. This mandates that the object IDs have a valid hash algorithm set, or otherwise we wouldn't be able to figure out that prefix. As we do not have a hash algorithm in many cases, for example when handling null object IDs, this assumption cannot always be fulfilled. We thus have a fallback in place that instead uses `the_repository` to derive the hash function. This implicit dependency is hidden away from callers and can be quite surprising, especially in contexts where there may be no repository. In theory, we can adapt those functions to always memcmp(3P) the whole length of their hash arrays. But there exist a couple of sites where we populate `struct object_id`s such that only the prefix of its hash that is actually used by the hash algorithm is populated. The remaining bytes are left uninitialized. The fact that those bytes are uninitialized also leads to warnings under Valgrind in some places where we copy those bytes. Refactor callsites where we populate object IDs to always initialize all bytes. This also allows us to get rid of `oidcpy_with_padding()`, for one because the input is now fully initialized, and because `oidcpy()` will now always copy the whole hash array. Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 9da95bd commit c98d762

File tree

8 files changed

+15
-26
lines changed

8 files changed

+15
-26
lines changed

hash-ll.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,8 @@ static inline void oidread(struct object_id *oid, const unsigned char *hash,
288288
const struct git_hash_algo *algop)
289289
{
290290
memcpy(oid->hash, hash, algop->rawsz);
291+
if (algop->rawsz < GIT_MAX_RAWSZ)
292+
memset(oid->hash + algop->rawsz, 0, GIT_MAX_RAWSZ - algop->rawsz);
291293
oid->algo = hash_algo_by_ptr(algop);
292294
}
293295

hash.h

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,6 @@ static inline int is_null_oid(const struct object_id *oid)
3131
return oideq(oid, null_oid());
3232
}
3333

34-
/* Like oidcpy() but zero-pads the unused bytes in dst's hash array. */
35-
static inline void oidcpy_with_padding(struct object_id *dst,
36-
const struct object_id *src)
37-
{
38-
size_t hashsz;
39-
40-
if (!src->algo)
41-
hashsz = the_hash_algo->rawsz;
42-
else
43-
hashsz = hash_algos[src->algo].rawsz;
44-
45-
memcpy(dst->hash, src->hash, hashsz);
46-
memset(dst->hash + hashsz, 0, GIT_MAX_RAWSZ - hashsz);
47-
dst->algo = src->algo;
48-
}
49-
5034
static inline int is_empty_blob_oid(const struct object_id *oid)
5135
{
5236
return oideq(oid, the_hash_algo->empty_blob);

hex.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,12 @@ int get_oid_hex_algop(const char *hex, struct object_id *oid,
2525
const struct git_hash_algo *algop)
2626
{
2727
int ret = get_hash_hex_algop(hex, oid->hash, algop);
28-
if (!ret)
28+
if (!ret) {
2929
oid_set_algo(oid, algop);
30+
if (algop->rawsz != GIT_MAX_RAWSZ)
31+
memset(oid->hash + algop->rawsz, 0,
32+
GIT_MAX_RAWSZ - algop->rawsz);
33+
}
3034
return ret;
3135
}
3236

http-push.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,6 +1016,7 @@ static void remote_ls(const char *path, int flags,
10161016
/* extract hex from sharded "xx/x{38}" filename */
10171017
static int get_oid_hex_from_objpath(const char *path, struct object_id *oid)
10181018
{
1019+
memset(oid->hash, 0, GIT_MAX_RAWSZ);
10191020
oid->algo = hash_algo_by_ptr(the_hash_algo);
10201021

10211022
if (strlen(path) != the_hash_algo->hexsz + 1)

notes.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,8 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree,
427427
hashsz - prefix_len))
428428
goto handle_non_note; /* entry.path is not a SHA1 */
429429

430+
memset(object_oid.hash + hashsz, 0, GIT_MAX_RAWSZ - hashsz);
431+
430432
type = PTR_TYPE_NOTE;
431433
} else if (path_len == 2) {
432434
/* This is potentially an internal node */

object-file.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2743,6 +2743,8 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr,
27432743
!hex_to_bytes(oid.hash + 1, de->d_name,
27442744
the_hash_algo->rawsz - 1)) {
27452745
oid_set_algo(&oid, the_hash_algo);
2746+
memset(oid.hash + the_hash_algo->rawsz, 0,
2747+
GIT_MAX_RAWSZ - the_hash_algo->rawsz);
27462748
if (obj_cb) {
27472749
r = obj_cb(&oid, path->buf, data);
27482750
if (r)

oidtree.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid)
4242
* Clear the padding and copy the result in separate steps to
4343
* respect the 4-byte alignment needed by struct object_id.
4444
*/
45-
oidcpy_with_padding(&k, oid);
45+
oidcpy(&k, oid);
4646
memcpy(on->k, &k, sizeof(k));
4747

4848
/*
@@ -60,7 +60,7 @@ int oidtree_contains(struct oidtree *ot, const struct object_id *oid)
6060
struct object_id k;
6161
size_t klen = sizeof(k);
6262

63-
oidcpy_with_padding(&k, oid);
63+
oidcpy(&k, oid);
6464

6565
if (oid->algo == GIT_HASH_UNKNOWN)
6666
klen -= sizeof(oid->algo);

parallel-checkout.c

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -429,13 +429,7 @@ static void send_one_item(int fd, struct parallel_checkout_item *pc_item)
429429
fixed_portion->ident = pc_item->ca.ident;
430430
fixed_portion->name_len = name_len;
431431
fixed_portion->working_tree_encoding_len = working_tree_encoding_len;
432-
/*
433-
* We pad the unused bytes in the hash array because, otherwise,
434-
* Valgrind would complain about passing uninitialized bytes to a
435-
* write() syscall. The warning doesn't represent any real risk here,
436-
* but it could hinder the detection of actual errors.
437-
*/
438-
oidcpy_with_padding(&fixed_portion->oid, &pc_item->ce->oid);
432+
oidcpy(&fixed_portion->oid, &pc_item->ce->oid);
439433

440434
variant = data + sizeof(*fixed_portion);
441435
if (working_tree_encoding_len) {

0 commit comments

Comments
 (0)