Skip to content

Commit 072e3aa

Browse files
pks-tgitster
authored andcommitted
reftable/record: handle overflows when decoding varints
The logic to decode varints isn't able to detect integer overflows: as long as the buffer still has more data available, and as long as the current byte has its 0x80 bit set, we'll continue to add up these values to the result. This will eventually cause the `uint64_t` to overflow, at which point we'll return an invalid result. Refactor the function so that it is able to detect such overflows. The implementation is basically copied from Git's own `decode_varint()`, which already knows to handle overflows. The only adjustment is that we also take into account the string view's length in order to not overrun it. The reftable documentation explicitly notes that those two encoding schemas are supposed to be the same: Varint encoding ^^^^^^^^^^^^^^^ Varint encoding is identical to the ofs-delta encoding method used within pack files. Decoder works as follows: .... val = buf[ptr] & 0x7f while (buf[ptr] & 0x80) { ptr++ val = ((val + 1) << 7) | (buf[ptr] & 0x7f) } .... While at it, refactor `put_var_int()` in the same way by copying over the implementation of `encode_varint()`. While `put_var_int()` doesn't have an issue with overflows, it generates warnings with -Wsign-compare. The implementation of `encode_varint()` doesn't, is battle-tested and at the same time way simpler than what we currently have. Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent a204f92 commit 072e3aa

File tree

3 files changed

+53
-32
lines changed

3 files changed

+53
-32
lines changed

reftable/record.c

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,47 +21,49 @@ static void *reftable_record_data(struct reftable_record *rec);
2121

2222
int get_var_int(uint64_t *dest, struct string_view *in)
2323
{
24-
int ptr = 0;
24+
const unsigned char *buf = in->buf;
25+
unsigned char c;
2526
uint64_t val;
2627

27-
if (in->len == 0)
28+
if (!in->len)
2829
return -1;
29-
val = in->buf[ptr] & 0x7f;
30-
31-
while (in->buf[ptr] & 0x80) {
32-
ptr++;
33-
if (ptr > in->len) {
30+
c = *buf++;
31+
val = c & 0x7f;
32+
33+
while (c & 0x80) {
34+
/*
35+
* We use a micro-optimization here: whenever we see that the
36+
* 0x80 bit is set, we know that the remainder of the value
37+
* cannot be 0. The zero-values thus doesn't need to be encoded
38+
* at all, which is why we subtract 1 when encoding and add 1
39+
* when decoding.
40+
*
41+
* This allows us to save a byte in some edge cases.
42+
*/
43+
val += 1;
44+
if (!val || (val & (uint64_t)(~0ULL << (64 - 7))))
45+
return -1; /* overflow */
46+
if (buf >= in->buf + in->len)
3447
return -1;
35-
}
36-
val = (val + 1) << 7 | (uint64_t)(in->buf[ptr] & 0x7f);
48+
c = *buf++;
49+
val = (val << 7) + (c & 0x7f);
3750
}
3851

3952
*dest = val;
40-
return ptr + 1;
53+
return buf - in->buf;
4154
}
4255

43-
int put_var_int(struct string_view *dest, uint64_t val)
56+
int put_var_int(struct string_view *dest, uint64_t value)
4457
{
45-
uint8_t buf[10] = { 0 };
46-
int i = 9;
47-
int n = 0;
48-
buf[i] = (uint8_t)(val & 0x7f);
49-
i--;
50-
while (1) {
51-
val >>= 7;
52-
if (!val) {
53-
break;
54-
}
55-
val--;
56-
buf[i] = 0x80 | (uint8_t)(val & 0x7f);
57-
i--;
58-
}
59-
60-
n = sizeof(buf) - i - 1;
61-
if (dest->len < n)
58+
unsigned char varint[10];
59+
unsigned pos = sizeof(varint) - 1;
60+
varint[pos] = value & 0x7f;
61+
while (value >>= 7)
62+
varint[--pos] = 0x80 | (--value & 0x7f);
63+
if (dest->len < sizeof(varint) - pos)
6264
return -1;
63-
memcpy(dest->buf, &buf[i + 1], n);
64-
return n;
65+
memcpy(dest->buf, varint + pos, sizeof(varint) - pos);
66+
return sizeof(varint) - pos;
6567
}
6668

6769
int reftable_is_block_type(uint8_t typ)

reftable/record.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ static inline void string_view_consume(struct string_view *s, int n)
3232
s->len -= n;
3333
}
3434

35-
/* utilities for de/encoding varints */
36-
35+
/*
36+
* Decode and encode a varint. Returns the number of bytes read/written, or a
37+
* negative value in case encoding/decoding the varint has failed.
38+
*/
3739
int get_var_int(uint64_t *dest, struct string_view *in);
3840
int put_var_int(struct string_view *dest, uint64_t val);
3941

t/unit-tests/t-reftable-record.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,22 @@ static void t_varint_roundtrip(void)
5858
}
5959
}
6060

61+
static void t_varint_overflow(void)
62+
{
63+
unsigned char buf[] = {
64+
0xFF, 0xFF, 0xFF, 0xFF,
65+
0xFF, 0xFF, 0xFF, 0xFF,
66+
0xFF, 0x00,
67+
};
68+
struct string_view view = {
69+
.buf = buf,
70+
.len = sizeof(buf),
71+
};
72+
uint64_t value;
73+
int err = get_var_int(&value, &view);
74+
check_int(err, ==, -1);
75+
}
76+
6177
static void set_hash(uint8_t *h, int j)
6278
{
6379
for (int i = 0; i < hash_size(REFTABLE_HASH_SHA1); i++)
@@ -544,6 +560,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED)
544560
TEST(t_reftable_log_record_roundtrip(), "record operations work on log record");
545561
TEST(t_reftable_ref_record_roundtrip(), "record operations work on ref record");
546562
TEST(t_varint_roundtrip(), "put_var_int and get_var_int work");
563+
TEST(t_varint_overflow(), "get_var_int notices an integer overflow");
547564
TEST(t_key_roundtrip(), "reftable_encode_key and reftable_decode_key work");
548565
TEST(t_reftable_obj_record_roundtrip(), "record operations work on obj record");
549566
TEST(t_reftable_index_record_roundtrip(), "record operations work on index record");

0 commit comments

Comments
 (0)